/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 211 by ph10, Thu Aug 9 09:52:43 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
43  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
44    applications. */
45    
46    
47  #define NLBLOCK md           /* The block containing newline information */  #ifdef HAVE_CONFIG_H
48    #include <config.h>
49    #endif
50    
51    #define NLBLOCK md             /* Block containing newline information */
52    #define PSSTART start_subject  /* Field containing processed string start */
53    #define PSEND   end_subject    /* Field containing processed string end */
54    
55  #include "pcre_internal.h"  #include "pcre_internal.h"
56    
57    
# Line 58  compatible, but it has advantages in cer Line 66  compatible, but it has advantages in cer
66  *************************************************/  *************************************************/
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
71    never stored, so we push them well clear of the normal opcodes. */
72  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  
73  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_PROP_EXTRA       300
74    #define OP_EXTUNI_EXTRA     320
75    #define OP_ANYNL_EXTRA      340
76    #define OP_HSPACE_EXTRA     360
77    #define OP_VSPACE_EXTRA     380
78    
79    
80  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
81  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
82  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
83  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
84  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
85    that follow must also be modified. */
86    
87  static uschar coptable[] = {  static uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90      0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
92    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
95    1,                             /* Char                                   */    1,                             /* Char                                   */
96    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 83  static uschar coptable[] = { Line 98  static uschar coptable[] = {
98    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
99    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
100    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
101      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
102    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
103    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
104    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
105      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
106    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
107    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
108    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
109      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
110    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
111    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
112    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 107  static uschar coptable[] = { Line 125  static uschar coptable[] = {
125    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
126    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
127    0,                             /* Reverse                                */    0,                             /* Reverse                                */
128    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
129    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131      0,                             /* RREF                                   */
132      0,                             /* DEF                                    */
133    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134    0,                             /* BRANUMBER                              */    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135    0                              /* BRA                                    */    0, 0                           /* FAIL, ACCEPT                           */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static uschar toptable1[] = {
142    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
# Line 127  static uschar toptable1[] = { Line 147  static uschar toptable1[] = {
147  };  };
148    
149  static uschar toptable2[] = {  static uschar toptable2[] = {
150    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
# Line 278  stateblock *next_active_state, *next_new Line 298  stateblock *next_active_state, *next_new
298    
299  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
300  const uschar *ptr;  const uschar *ptr;
301  const uschar *end_code;  const uschar *end_code, *first_op;
302    
303  int active_count, new_count, match_count;  int active_count, new_count, match_count;
304    
# Line 291  const uschar *start_code = md->start_cod Line 311  const uschar *start_code = md->start_cod
311    
312  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
313  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
314    #else
315    BOOL utf8 = FALSE;
316  #endif  #endif
317    
318  rlevel++;  rlevel++;
# Line 314  active_states = (stateblock *)(workspace Line 336  active_states = (stateblock *)(workspace
336  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
337  new_count = 0;  new_count = 0;
338    
339    first_op = this_start_code + 1 + LINK_SIZE +
340      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
341    
342  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
343  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
344  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 323  If the first opcode in the first alterna Line 348  If the first opcode in the first alterna
348  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
349  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
350    
351  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
352    {    {
353    int max_back = 0;    int max_back = 0;
354    int gone_back;    int gone_back;
# Line 405  else Line 430  else
430    
431    else    else
432      {      {
433        int length = 1 + LINK_SIZE +
434          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
435      do      do
436        {        {
437        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
438        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
439          length = 1 + LINK_SIZE;
440        }        }
441      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
442      }      }
# Line 461  for (;;) Line 489  for (;;)
489    
490    if (ptr < end_subject)    if (ptr < end_subject)
491      {      {
492      clen = 1;      clen = 1;        /* Number of bytes in the character */
493  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
494      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
495  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 469  for (;;) Line 497  for (;;)
497      }      }
498    else    else
499      {      {
500      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
501      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
502      }      }
503    
504    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 484  for (;;) Line 512  for (;;)
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue;
515    #ifdef SUPPORT_UCP
516      int chartype, script;      int chartype, script;
517    #endif
518    
519  #ifdef DEBUG  #ifdef DEBUG
520      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
521      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
522        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
523          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
524  #endif  #endif
# Line 532  for (;;) Line 562  for (;;)
562    
563      code = start_code + state_offset;      code = start_code + state_offset;
564      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
565    
566      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
567      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 540  for (;;) Line 569  for (;;)
569      permitted.      permitted.
570    
571      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
572      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
573      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
574      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
575      opcodes. */      */
576    
577      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
578        {        {
# Line 554  for (;;) Line 583  for (;;)
583        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
584        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
585          {          {
586          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
587          if (d >= OP_NOTPROP)            {
588            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
589              case OP_NOTPROP:
590              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
591              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
592              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
593              case OP_NOT_HSPACE:
594              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
595              case OP_NOT_VSPACE:
596              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
597              default: break;
598              }
599          }          }
600        }        }
601      else      else
602        {        {
603        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
604        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
605        }        }
606    
607    
# Line 624  for (;;) Line 663  for (;;)
663    
664        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
665        case OP_BRA:        case OP_BRA:
666          case OP_SBRA:
667        do        do
668          {          {
669          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 633  for (;;) Line 673  for (;;)
673        break;        break;
674    
675        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
676          case OP_CBRA:
677          case OP_SCBRA:
678          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
679          code += GET(code, 1);
680          while (*code == OP_ALT)
681            {
682            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
683            code += GET(code, 1);
684            }
685          break;
686    
687          /*-----------------------------------------------------------------*/
688        case OP_BRAZERO:        case OP_BRAZERO:
689        case OP_BRAMINZERO:        case OP_BRAMINZERO:
690        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 642  for (;;) Line 694  for (;;)
694        break;        break;
695    
696        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
       case OP_BRANUMBER:  
       ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);  
       break;  
   
       /*-----------------------------------------------------------------*/  
697        case OP_CIRC:        case OP_CIRC:
698        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
699            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
             ptr >= start_subject + md->nllen &&  
700              ptr != end_subject &&              ptr != end_subject &&
701              IS_NEWLINE(ptr - md->nllen)))              WAS_NEWLINE(ptr)))
702          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
703        break;        break;
704    
# Line 686  for (;;) Line 732  for (;;)
732    
733        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
734        case OP_ANY:        case OP_ANY:
735        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 ||        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
                        ptr > end_subject - md->nllen ||  
                        !IS_NEWLINE(ptr)))  
736          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
737        break;        break;
738    
739        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
740        case OP_EODN:        case OP_EODN:
741        if (clen == 0 ||        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
            (ptr == end_subject - md->nllen && IS_NEWLINE(ptr)))  
742          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
743        break;        break;
744    
# Line 704  for (;;) Line 747  for (;;)
747        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
748          {          {
749          if (clen == 0 ||          if (clen == 0 ||
750              (ptr <= end_subject - md->nllen && IS_NEWLINE(ptr) &&              (IS_NEWLINE(ptr) &&
751                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
752              ))              ))
753            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
754          }          }
755        else if ((ims & PCRE_MULTILINE) != 0 &&        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
                ptr <= end_subject - md->nllen && IS_NEWLINE(ptr))  
756          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
757        break;        break;
758    
# Line 759  for (;;) Line 801  for (;;)
801        break;        break;
802    
803    
 #ifdef SUPPORT_UCP  
   
804        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
805        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
806        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
807        */        */
808    
809    #ifdef SUPPORT_UCP
810        case OP_PROP:        case OP_PROP:
811        case OP_NOTPROP:        case OP_NOTPROP:
812        if (clen > 0)        if (clen > 0)
# Line 816  for (;;) Line 857  for (;;)
857    
858        case OP_TYPEPLUS:        case OP_TYPEPLUS:
859        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
860          case OP_TYPEPOSPLUS:
861        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
862        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
863        if (clen > 0)        if (clen > 0)
# Line 824  for (;;) Line 866  for (;;)
866              (c < 256 &&              (c < 256 &&
867                (d != OP_ANY ||                (d != OP_ANY ||
868                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
869                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
870                ) &&                ) &&
871                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
872            {            {
873              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
874                {
875                active_count--;            /* Remove non-match possibility */
876                next_active_state--;
877                }
878            count++;            count++;
879            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
880            }            }
# Line 838  for (;;) Line 884  for (;;)
884        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
885        case OP_TYPEQUERY:        case OP_TYPEQUERY:
886        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
887          case OP_TYPEPOSQUERY:
888        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
889        if (clen > 0)        if (clen > 0)
890          {          {
# Line 845  for (;;) Line 892  for (;;)
892              (c < 256 &&              (c < 256 &&
893                (d != OP_ANY ||                (d != OP_ANY ||
894                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
895                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
896                ) &&                ) &&
897                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
898            {            {
899              if (codevalue == OP_TYPEPOSQUERY)
900                {
901                active_count--;            /* Remove non-match possibility */
902                next_active_state--;
903                }
904            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
905            }            }
906          }          }
# Line 858  for (;;) Line 909  for (;;)
909        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
910        case OP_TYPESTAR:        case OP_TYPESTAR:
911        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
912          case OP_TYPEPOSSTAR:
913        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
914        if (clen > 0)        if (clen > 0)
915          {          {
# Line 865  for (;;) Line 917  for (;;)
917              (c < 256 &&              (c < 256 &&
918                (d != OP_ANY ||                (d != OP_ANY ||
919                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
920                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
921                ) &&                ) &&
922                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
923            {            {
924              if (codevalue == OP_TYPEPOSSTAR)
925                {
926                active_count--;            /* Remove non-match possibility */
927                next_active_state--;
928                }
929            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
930            }            }
931          }          }
# Line 877  for (;;) Line 933  for (;;)
933    
934        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
935        case OP_TYPEEXACT:        case OP_TYPEEXACT:
936          count = current_state->count;  /* Number already matched */
937          if (clen > 0)
938            {
939            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
940                (c < 256 &&
941                  (d != OP_ANY ||
942                   (ims & PCRE_DOTALL) != 0 ||
943                   !IS_NEWLINE(ptr)
944                  ) &&
945                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
946              {
947              if (++count >= GET2(code, 1))
948                { ADD_NEW(state_offset + 4, 0); }
949              else
950                { ADD_NEW(state_offset, count); }
951              }
952            }
953          break;
954    
955          /*-----------------------------------------------------------------*/
956        case OP_TYPEUPTO:        case OP_TYPEUPTO:
957        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
958        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
959          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
960        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
961        if (clen > 0)        if (clen > 0)
962          {          {
# Line 888  for (;;) Line 964  for (;;)
964              (c < 256 &&              (c < 256 &&
965                (d != OP_ANY ||                (d != OP_ANY ||
966                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
967                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
968                ) &&                ) &&
969                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
970            {            {
971              if (codevalue == OP_TYPEPOSUPTO)
972                {
973                active_count--;           /* Remove non-match possibility */
974                next_active_state--;
975                }
976            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
977              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
978            else            else
# Line 903  for (;;) Line 983  for (;;)
983    
984  /* ========================================================================== */  /* ========================================================================== */
985        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
986        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
987        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
988        d variable. */        is in the d variable. */
989    
990    #ifdef SUPPORT_UCP
991        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
992        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
993          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
994        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
995        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
996        if (clen > 0)        if (clen > 0)
# Line 944  for (;;) Line 1026  for (;;)
1026            break;            break;
1027            }            }
1028    
1029          if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }          if (OK == (d == OP_PROP))
1030              {
1031              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1032                {
1033                active_count--;           /* Remove non-match possibility */
1034                next_active_state--;
1035                }
1036              count++;
1037              ADD_NEW(state_offset, count);
1038              }
1039          }          }
1040        break;        break;
1041    
1042        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1043        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1044        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1045          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1046        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1047        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1048        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1049          {          {
1050          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1051          int ncount = 0;          int ncount = 0;
1052            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1053              {
1054              active_count--;           /* Remove non-match possibility */
1055              next_active_state--;
1056              }
1057          while (nptr < end_subject)          while (nptr < end_subject)
1058            {            {
1059            int nd;            int nd;
# Line 970  for (;;) Line 1067  for (;;)
1067          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1068          }          }
1069        break;        break;
1070    #endif
1071    
1072        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1073          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1074          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1075          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1076          count = current_state->count;  /* Already matched */
1077          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1078          if (clen > 0)
1079            {
1080            int ncount = 0;
1081            switch (c)
1082              {
1083              case 0x000d:
1084              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1085              /* Fall through */
1086              case 0x000a:
1087              case 0x000b:
1088              case 0x000c:
1089              case 0x0085:
1090              case 0x2028:
1091              case 0x2029:
1092              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1093                {
1094                active_count--;           /* Remove non-match possibility */
1095                next_active_state--;
1096                }
1097              count++;
1098              ADD_NEW_DATA(-state_offset, count, ncount);
1099              break;
1100              default:
1101              break;
1102              }
1103            }
1104          break;
1105    
1106          /*-----------------------------------------------------------------*/
1107          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1108          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1109          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1110          count = current_state->count;  /* Already matched */
1111          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1112          if (clen > 0)
1113            {
1114            BOOL OK;
1115            switch (c)
1116              {
1117              case 0x000a:
1118              case 0x000b:
1119              case 0x000c:
1120              case 0x000d:
1121              case 0x0085:
1122              case 0x2028:
1123              case 0x2029:
1124              OK = TRUE;
1125              break;
1126    
1127              default:
1128              OK = FALSE;
1129              break;
1130              }
1131    
1132            if (OK == (d == OP_VSPACE))
1133              {
1134              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1135                {
1136                active_count--;           /* Remove non-match possibility */
1137                next_active_state--;
1138                }
1139              count++;
1140              ADD_NEW_DATA(-state_offset, count, 0);
1141              }
1142            }
1143          break;
1144    
1145          /*-----------------------------------------------------------------*/
1146          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1147          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1148          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1149          count = current_state->count;  /* Already matched */
1150          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1151          if (clen > 0)
1152            {
1153            BOOL OK;
1154            switch (c)
1155              {
1156              case 0x09:      /* HT */
1157              case 0x20:      /* SPACE */
1158              case 0xa0:      /* NBSP */
1159              case 0x1680:    /* OGHAM SPACE MARK */
1160              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1161              case 0x2000:    /* EN QUAD */
1162              case 0x2001:    /* EM QUAD */
1163              case 0x2002:    /* EN SPACE */
1164              case 0x2003:    /* EM SPACE */
1165              case 0x2004:    /* THREE-PER-EM SPACE */
1166              case 0x2005:    /* FOUR-PER-EM SPACE */
1167              case 0x2006:    /* SIX-PER-EM SPACE */
1168              case 0x2007:    /* FIGURE SPACE */
1169              case 0x2008:    /* PUNCTUATION SPACE */
1170              case 0x2009:    /* THIN SPACE */
1171              case 0x200A:    /* HAIR SPACE */
1172              case 0x202f:    /* NARROW NO-BREAK SPACE */
1173              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1174              case 0x3000:    /* IDEOGRAPHIC SPACE */
1175              OK = TRUE;
1176              break;
1177    
1178              default:
1179              OK = FALSE;
1180              break;
1181              }
1182    
1183            if (OK == (d == OP_HSPACE))
1184              {
1185              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1186                {
1187                active_count--;           /* Remove non-match possibility */
1188                next_active_state--;
1189                }
1190              count++;
1191              ADD_NEW_DATA(-state_offset, count, 0);
1192              }
1193            }
1194          break;
1195    
1196          /*-----------------------------------------------------------------*/
1197    #ifdef SUPPORT_UCP
1198        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1199        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1200          case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1201        count = 4;        count = 4;
1202        goto QS1;        goto QS1;
1203    
1204        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1205        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1206          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1207        count = 0;        count = 0;
1208    
1209        QS1:        QS1:
# Line 1017  for (;;) Line 1242  for (;;)
1242            break;            break;
1243            }            }
1244    
1245          if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }          if (OK == (d == OP_PROP))
1246              {
1247              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1248                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1249                {
1250                active_count--;           /* Remove non-match possibility */
1251                next_active_state--;
1252                }
1253              ADD_NEW(state_offset + count, 0);
1254              }
1255          }          }
1256        break;        break;
1257    
1258        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1259        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1260        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1261          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1262        count = 2;        count = 2;
1263        goto QS2;        goto QS2;
1264    
1265        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1266        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1267          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1268        count = 0;        count = 0;
1269    
1270        QS2:        QS2:
# Line 1038  for (;;) Line 1274  for (;;)
1274          {          {
1275          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1276          int ncount = 0;          int ncount = 0;
1277            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1278                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1279              {
1280              active_count--;           /* Remove non-match possibility */
1281              next_active_state--;
1282              }
1283          while (nptr < end_subject)          while (nptr < end_subject)
1284            {            {
1285            int nd;            int nd;
# Line 1050  for (;;) Line 1292  for (;;)
1292          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1293          }          }
1294        break;        break;
1295    #endif
1296    
1297          /*-----------------------------------------------------------------*/
1298          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1299          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1300          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1301          count = 2;
1302          goto QS3;
1303    
1304          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1305          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1306          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1307          count = 0;
1308    
1309          QS3:
1310          ADD_ACTIVE(state_offset + 2, 0);
1311          if (clen > 0)
1312            {
1313            int ncount = 0;
1314            switch (c)
1315              {
1316              case 0x000d:
1317              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1318              /* Fall through */
1319              case 0x000a:
1320              case 0x000b:
1321              case 0x000c:
1322              case 0x0085:
1323              case 0x2028:
1324              case 0x2029:
1325              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1326                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1327                {
1328                active_count--;           /* Remove non-match possibility */
1329                next_active_state--;
1330                }
1331              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1332              break;
1333              default:
1334              break;
1335              }
1336            }
1337          break;
1338    
1339        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1340          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1341          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1342          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1343          count = 2;
1344          goto QS4;
1345    
1346          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1347          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1348          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1349          count = 0;
1350    
1351          QS4:
1352          ADD_ACTIVE(state_offset + 2, 0);
1353          if (clen > 0)
1354            {
1355            BOOL OK;
1356            switch (c)
1357              {
1358              case 0x000a:
1359              case 0x000b:
1360              case 0x000c:
1361              case 0x000d:
1362              case 0x0085:
1363              case 0x2028:
1364              case 0x2029:
1365              OK = TRUE;
1366              break;
1367    
1368              default:
1369              OK = FALSE;
1370              break;
1371              }
1372            if (OK == (d == OP_VSPACE))
1373              {
1374              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1375                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1376                {
1377                active_count--;           /* Remove non-match possibility */
1378                next_active_state--;
1379                }
1380              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1381              }
1382            }
1383          break;
1384    
1385          /*-----------------------------------------------------------------*/
1386          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1387          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1388          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1389          count = 2;
1390          goto QS5;
1391    
1392          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1393          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1394          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1395          count = 0;
1396    
1397          QS5:
1398          ADD_ACTIVE(state_offset + 2, 0);
1399          if (clen > 0)
1400            {
1401            BOOL OK;
1402            switch (c)
1403              {
1404              case 0x09:      /* HT */
1405              case 0x20:      /* SPACE */
1406              case 0xa0:      /* NBSP */
1407              case 0x1680:    /* OGHAM SPACE MARK */
1408              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1409              case 0x2000:    /* EN QUAD */
1410              case 0x2001:    /* EM QUAD */
1411              case 0x2002:    /* EN SPACE */
1412              case 0x2003:    /* EM SPACE */
1413              case 0x2004:    /* THREE-PER-EM SPACE */
1414              case 0x2005:    /* FOUR-PER-EM SPACE */
1415              case 0x2006:    /* SIX-PER-EM SPACE */
1416              case 0x2007:    /* FIGURE SPACE */
1417              case 0x2008:    /* PUNCTUATION SPACE */
1418              case 0x2009:    /* THIN SPACE */
1419              case 0x200A:    /* HAIR SPACE */
1420              case 0x202f:    /* NARROW NO-BREAK SPACE */
1421              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1422              case 0x3000:    /* IDEOGRAPHIC SPACE */
1423              OK = TRUE;
1424              break;
1425    
1426              default:
1427              OK = FALSE;
1428              break;
1429              }
1430    
1431            if (OK == (d == OP_HSPACE))
1432              {
1433              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1434                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1435                {
1436                active_count--;           /* Remove non-match possibility */
1437                next_active_state--;
1438                }
1439              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1440              }
1441            }
1442          break;
1443    
1444          /*-----------------------------------------------------------------*/
1445    #ifdef SUPPORT_UCP
1446        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1447        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1448        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1449          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1450        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1451          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1452        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1093  for (;;) Line 1485  for (;;)
1485    
1486          if (OK == (d == OP_PROP))          if (OK == (d == OP_PROP))
1487            {            {
1488              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1489                {
1490                active_count--;           /* Remove non-match possibility */
1491                next_active_state--;
1492                }
1493            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1494              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 6, 0); }
1495            else            else
# Line 1105  for (;;) Line 1502  for (;;)
1502        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1503        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1504        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1505          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1506        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1507          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1508        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1112  for (;;) Line 1510  for (;;)
1510          {          {
1511          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1512          int ncount = 0;          int ncount = 0;
1513            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1514              {
1515              active_count--;           /* Remove non-match possibility */
1516              next_active_state--;
1517              }
1518          while (nptr < end_subject)          while (nptr < end_subject)
1519            {            {
1520            int nd;            int nd;
# Line 1127  for (;;) Line 1530  for (;;)
1530            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1531          }          }
1532        break;        break;
1533    #endif
1534    
1535          /*-----------------------------------------------------------------*/
1536          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1537          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1538          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1539          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1540          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1541            { ADD_ACTIVE(state_offset + 4, 0); }
1542          count = current_state->count;  /* Number already matched */
1543          if (clen > 0)
1544            {
1545            int ncount = 0;
1546            switch (c)
1547              {
1548              case 0x000d:
1549              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1550              /* Fall through */
1551              case 0x000a:
1552              case 0x000b:
1553              case 0x000c:
1554              case 0x0085:
1555              case 0x2028:
1556              case 0x2029:
1557              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1558                {
1559                active_count--;           /* Remove non-match possibility */
1560                next_active_state--;
1561                }
1562              if (++count >= GET2(code, 1))
1563                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1564              else
1565                { ADD_NEW_DATA(-state_offset, count, ncount); }
1566              break;
1567              default:
1568              break;
1569              }
1570            }
1571          break;
1572    
1573          /*-----------------------------------------------------------------*/
1574          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1575          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1576          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1577          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1578          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1579            { ADD_ACTIVE(state_offset + 4, 0); }
1580          count = current_state->count;  /* Number already matched */
1581          if (clen > 0)
1582            {
1583            BOOL OK;
1584            switch (c)
1585              {
1586              case 0x000a:
1587              case 0x000b:
1588              case 0x000c:
1589              case 0x000d:
1590              case 0x0085:
1591              case 0x2028:
1592              case 0x2029:
1593              OK = TRUE;
1594              break;
1595    
1596              default:
1597              OK = FALSE;
1598              }
1599    
1600            if (OK == (d == OP_VSPACE))
1601              {
1602              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1603                {
1604                active_count--;           /* Remove non-match possibility */
1605                next_active_state--;
1606                }
1607              if (++count >= GET2(code, 1))
1608                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1609              else
1610                { ADD_NEW_DATA(-state_offset, count, 0); }
1611              }
1612            }
1613          break;
1614    
1615          /*-----------------------------------------------------------------*/
1616          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1617          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1618          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1619          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1620          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1621            { ADD_ACTIVE(state_offset + 4, 0); }
1622          count = current_state->count;  /* Number already matched */
1623          if (clen > 0)
1624            {
1625            BOOL OK;
1626            switch (c)
1627              {
1628              case 0x09:      /* HT */
1629              case 0x20:      /* SPACE */
1630              case 0xa0:      /* NBSP */
1631              case 0x1680:    /* OGHAM SPACE MARK */
1632              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1633              case 0x2000:    /* EN QUAD */
1634              case 0x2001:    /* EM QUAD */
1635              case 0x2002:    /* EN SPACE */
1636              case 0x2003:    /* EM SPACE */
1637              case 0x2004:    /* THREE-PER-EM SPACE */
1638              case 0x2005:    /* FOUR-PER-EM SPACE */
1639              case 0x2006:    /* SIX-PER-EM SPACE */
1640              case 0x2007:    /* FIGURE SPACE */
1641              case 0x2008:    /* PUNCTUATION SPACE */
1642              case 0x2009:    /* THIN SPACE */
1643              case 0x200A:    /* HAIR SPACE */
1644              case 0x202f:    /* NARROW NO-BREAK SPACE */
1645              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1646              case 0x3000:    /* IDEOGRAPHIC SPACE */
1647              OK = TRUE;
1648              break;
1649    
1650              default:
1651              OK = FALSE;
1652              break;
1653              }
1654    
1655            if (OK == (d == OP_HSPACE))
1656              {
1657              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1658                {
1659                active_count--;           /* Remove non-match possibility */
1660                next_active_state--;
1661                }
1662              if (++count >= GET2(code, 1))
1663                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1664              else
1665                { ADD_NEW_DATA(-state_offset, count, 0); }
1666              }
1667            }
1668          break;
1669    
1670  /* ========================================================================== */  /* ========================================================================== */
1671        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
# Line 1148  for (;;) Line 1687  for (;;)
1687          {          {
1688          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1689            {            {
1690            int othercase;            unsigned int othercase;
1691            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1692    
1693            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
# Line 1157  for (;;) Line 1696  for (;;)
1696  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1697            othercase = _pcre_ucp_othercase(c);            othercase = _pcre_ucp_othercase(c);
1698  #else  #else
1699            othercase = -1;            othercase = NOTACHAR;
1700  #endif  #endif
1701    
1702            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
# Line 1198  for (;;) Line 1737  for (;;)
1737  #endif  #endif
1738    
1739        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1740          /* This is a tricky like EXTUNI because it too can match more than one
1741          character (when CR is followed by LF). In this case, set up a negative
1742          state to wait for one character to pass before continuing. */
1743    
1744          case OP_ANYNL:
1745          if (clen > 0) switch(c)
1746            {
1747            case 0x000a:
1748            case 0x000b:
1749            case 0x000c:
1750            case 0x0085:
1751            case 0x2028:
1752            case 0x2029:
1753            ADD_NEW(state_offset + 1, 0);
1754            break;
1755            case 0x000d:
1756            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1757              {
1758              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1759              }
1760            else
1761              {
1762              ADD_NEW(state_offset + 1, 0);
1763              }
1764            break;
1765            }
1766          break;
1767    
1768          /*-----------------------------------------------------------------*/
1769          case OP_NOT_VSPACE:
1770          if (clen > 0) switch(c)
1771            {
1772            case 0x000a:
1773            case 0x000b:
1774            case 0x000c:
1775            case 0x000d:
1776            case 0x0085:
1777            case 0x2028:
1778            case 0x2029:
1779            break;
1780    
1781            default:
1782            ADD_NEW(state_offset + 1, 0);
1783            break;
1784            }
1785          break;
1786    
1787          /*-----------------------------------------------------------------*/
1788          case OP_VSPACE:
1789          if (clen > 0) switch(c)
1790            {
1791            case 0x000a:
1792            case 0x000b:
1793            case 0x000c:
1794            case 0x000d:
1795            case 0x0085:
1796            case 0x2028:
1797            case 0x2029:
1798            ADD_NEW(state_offset + 1, 0);
1799            break;
1800    
1801            default: break;
1802            }
1803          break;
1804    
1805          /*-----------------------------------------------------------------*/
1806          case OP_NOT_HSPACE:
1807          if (clen > 0) switch(c)
1808            {
1809            case 0x09:      /* HT */
1810            case 0x20:      /* SPACE */
1811            case 0xa0:      /* NBSP */
1812            case 0x1680:    /* OGHAM SPACE MARK */
1813            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1814            case 0x2000:    /* EN QUAD */
1815            case 0x2001:    /* EM QUAD */
1816            case 0x2002:    /* EN SPACE */
1817            case 0x2003:    /* EM SPACE */
1818            case 0x2004:    /* THREE-PER-EM SPACE */
1819            case 0x2005:    /* FOUR-PER-EM SPACE */
1820            case 0x2006:    /* SIX-PER-EM SPACE */
1821            case 0x2007:    /* FIGURE SPACE */
1822            case 0x2008:    /* PUNCTUATION SPACE */
1823            case 0x2009:    /* THIN SPACE */
1824            case 0x200A:    /* HAIR SPACE */
1825            case 0x202f:    /* NARROW NO-BREAK SPACE */
1826            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1827            case 0x3000:    /* IDEOGRAPHIC SPACE */
1828            break;
1829    
1830            default:
1831            ADD_NEW(state_offset + 1, 0);
1832            break;
1833            }
1834          break;
1835    
1836          /*-----------------------------------------------------------------*/
1837          case OP_HSPACE:
1838          if (clen > 0) switch(c)
1839            {
1840            case 0x09:      /* HT */
1841            case 0x20:      /* SPACE */
1842            case 0xa0:      /* NBSP */
1843            case 0x1680:    /* OGHAM SPACE MARK */
1844            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1845            case 0x2000:    /* EN QUAD */
1846            case 0x2001:    /* EM QUAD */
1847            case 0x2002:    /* EN SPACE */
1848            case 0x2003:    /* EM SPACE */
1849            case 0x2004:    /* THREE-PER-EM SPACE */
1850            case 0x2005:    /* FOUR-PER-EM SPACE */
1851            case 0x2006:    /* SIX-PER-EM SPACE */
1852            case 0x2007:    /* FIGURE SPACE */
1853            case 0x2008:    /* PUNCTUATION SPACE */
1854            case 0x2009:    /* THIN SPACE */
1855            case 0x200A:    /* HAIR SPACE */
1856            case 0x202f:    /* NARROW NO-BREAK SPACE */
1857            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1858            case 0x3000:    /* IDEOGRAPHIC SPACE */
1859            ADD_NEW(state_offset + 1, 0);
1860            break;
1861            }
1862          break;
1863    
1864          /*-----------------------------------------------------------------*/
1865        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1866        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1867        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1205  for (;;) Line 1869  for (;;)
1869        case OP_NOT:        case OP_NOT:
1870        if (clen > 0)        if (clen > 0)
1871          {          {
1872          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1873          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1874          }          }
1875        break;        break;
# Line 1213  for (;;) Line 1877  for (;;)
1877        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1878        case OP_PLUS:        case OP_PLUS:
1879        case OP_MINPLUS:        case OP_MINPLUS:
1880          case OP_POSPLUS:
1881        case OP_NOTPLUS:        case OP_NOTPLUS:
1882        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1883          case OP_NOTPOSPLUS:
1884        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1885        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1886        if (clen > 0)        if (clen > 0)
1887          {          {
1888          int otherd = -1;          unsigned int otherd = NOTACHAR;
1889          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1890            {            {
1891  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1234  for (;;) Line 1900  for (;;)
1900            otherd = fcc[d];            otherd = fcc[d];
1901            }            }
1902          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1903            { count++; ADD_NEW(state_offset, count); }            {
1904              if (count > 0 &&
1905                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1906                {
1907                active_count--;             /* Remove non-match possibility */
1908                next_active_state--;
1909                }
1910              count++;
1911              ADD_NEW(state_offset, count);
1912              }
1913          }          }
1914        break;        break;
1915    
1916        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1917        case OP_QUERY:        case OP_QUERY:
1918        case OP_MINQUERY:        case OP_MINQUERY:
1919          case OP_POSQUERY:
1920        case OP_NOTQUERY:        case OP_NOTQUERY:
1921        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1922          case OP_NOTPOSQUERY:
1923        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1924        if (clen > 0)        if (clen > 0)
1925          {          {
1926          int otherd = -1;          unsigned int otherd = NOTACHAR;
1927          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1928            {            {
1929  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1261  for (;;) Line 1938  for (;;)
1938            otherd = fcc[d];            otherd = fcc[d];
1939            }            }
1940          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1941            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1942              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1943                {
1944                active_count--;            /* Remove non-match possibility */
1945                next_active_state--;
1946                }
1947              ADD_NEW(state_offset + dlen + 1, 0);
1948              }
1949          }          }
1950        break;        break;
1951    
1952        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1953        case OP_STAR:        case OP_STAR:
1954        case OP_MINSTAR:        case OP_MINSTAR:
1955          case OP_POSSTAR:
1956        case OP_NOTSTAR:        case OP_NOTSTAR:
1957        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1958          case OP_NOTPOSSTAR:
1959        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1960        if (clen > 0)        if (clen > 0)
1961          {          {
1962          int otherd = -1;          unsigned int otherd = NOTACHAR;
1963          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1964            {            {
1965  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1288  for (;;) Line 1974  for (;;)
1974            otherd = fcc[d];            otherd = fcc[d];
1975            }            }
1976          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1977            { ADD_NEW(state_offset, 0); }            {
1978              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
1979                {
1980                active_count--;            /* Remove non-match possibility */
1981                next_active_state--;
1982                }
1983              ADD_NEW(state_offset, 0);
1984              }
1985          }          }
1986        break;        break;
1987    
1988        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1989        case OP_EXACT:        case OP_EXACT:
1990          case OP_NOTEXACT:
1991          count = current_state->count;  /* Number already matched */
1992          if (clen > 0)
1993            {
1994            unsigned int otherd = NOTACHAR;
1995            if ((ims & PCRE_CASELESS) != 0)
1996              {
1997    #ifdef SUPPORT_UTF8
1998              if (utf8 && d >= 128)
1999                {
2000    #ifdef SUPPORT_UCP
2001                otherd = _pcre_ucp_othercase(d);
2002    #endif  /* SUPPORT_UCP */
2003                }
2004              else
2005    #endif  /* SUPPORT_UTF8 */
2006              otherd = fcc[d];
2007              }
2008            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2009              {
2010              if (++count >= GET2(code, 1))
2011                { ADD_NEW(state_offset + dlen + 3, 0); }
2012              else
2013                { ADD_NEW(state_offset, count); }
2014              }
2015            }
2016          break;
2017    
2018          /*-----------------------------------------------------------------*/
2019        case OP_UPTO:        case OP_UPTO:
2020        case OP_MINUPTO:        case OP_MINUPTO:
2021        case OP_NOTEXACT:        case OP_POSUPTO:
2022        case OP_NOTUPTO:        case OP_NOTUPTO:
2023        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2024        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
2025          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
2026        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2027        if (clen > 0)        if (clen > 0)
2028          {          {
2029          int otherd = -1;          unsigned int otherd = NOTACHAR;
2030          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
2031            {            {
2032  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1320  for (;;) Line 2042  for (;;)
2042            }            }
2043          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2044            {            {
2045              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2046                {
2047                active_count--;             /* Remove non-match possibility */
2048                next_active_state--;
2049                }
2050            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2051              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
2052            else            else
# Line 1448  for (;;) Line 2175  for (;;)
2175    
2176        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2177        case OP_COND:        case OP_COND:
2178          case OP_SCOND:
2179          {          {
2180          int local_offsets[1000];          int local_offsets[1000];
2181          int local_workspace[1000];          int local_workspace[1000];
2182          int condcode = code[LINK_SIZE+1];          int condcode = code[LINK_SIZE+1];
2183    
2184          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Back reference conditions are not supported */
         means "test if in a recursion". */  
2185    
2186          if (condcode == OP_CREF)          if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
2187    
2188            /* The DEFINE condition is always false */
2189    
2190            if (condcode == OP_DEF)
2191              {
2192              ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
2193              }
2194    
2195            /* The only supported version of OP_RREF is for the value RREF_ANY,
2196            which means "test if in any recursion". We can't test for specifically
2197            recursed groups. */
2198    
2199            else if (condcode == OP_RREF)
2200            {            {
2201            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2202            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2203            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2204              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
2205            }            }
# Line 1728  is not anchored. Line 2468  is not anchored.
2468    
2469  Arguments:  Arguments:
2470    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2471    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2472    subject         points to the subject string    subject         points to the subject string
2473    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2474    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1744  Returns: > 0 => number of match Line 2484  Returns: > 0 => number of match
2484                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2485  */  */
2486    
2487  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
2488  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2489    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2490    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 1834  md->end_subject = end_subject; Line 2574  md->end_subject = end_subject;
2574  md->moptions = options;  md->moptions = options;
2575  md->poptions = re->options;  md->poptions = re->options;
2576    
2577  /* Handle different types of newline. The two bits give four cases. If nothing  /* Handle different types of newline. The three bits give eight cases. If
2578  is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2579    
2580  switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2581           PCRE_NEWLINE_CRLF)           PCRE_NEWLINE_BITS)
2582    {    {
2583    default:              newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
2584    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
2585    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = '\n'; break;
2586    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2587         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2588      case PCRE_NEWLINE_ANY: newline = -1; break;
2589      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2590      default: return PCRE_ERROR_BADNEWLINE;
2591    }    }
2592    
2593  if (newline > 255)  if (newline == -2)
2594      {
2595      md->nltype = NLTYPE_ANYCRLF;
2596      }
2597    else if (newline < 0)
2598    {    {
2599    md->nllen = 2;    md->nltype = NLTYPE_ANY;
   md->nl[0] = (newline >> 8) & 255;  
   md->nl[1] = newline & 255;  
2600    }    }
2601  else  else
2602    {    {
2603    md->nllen = 1;    md->nltype = NLTYPE_FIXED;
2604    md->nl[0] = newline;    if (newline > 255)
2605        {
2606        md->nllen = 2;
2607        md->nl[0] = (newline >> 8) & 255;
2608        md->nl[1] = newline & 255;
2609        }
2610      else
2611        {
2612        md->nllen = 1;
2613        md->nl[0] = newline;
2614        }
2615    }    }
2616    
2617  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 1946  for (;;) Line 2701  for (;;)
2701      if (firstline)      if (firstline)
2702        {        {
2703        const uschar *t = current_subject;        const uschar *t = current_subject;
2704        while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2705        end_subject = t;        end_subject = t;
2706        }        }
2707    
# Line 1965  for (;;) Line 2720  for (;;)
2720    
2721      else if (startline)      else if (startline)
2722        {        {
2723        if (current_subject > md->start_subject + md->nllen +        if (current_subject > md->start_subject + start_offset)
           start_offset)  
2724          {          {
2725          while (current_subject <= end_subject &&          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2726                 !IS_NEWLINE(current_subject - md->nllen))            current_subject++;
2727    
2728            /* If we have just passed a CR and the newline option is ANY or
2729            ANYCRLF, and we are now at a LF, advance the match position by one more
2730            character. */
2731    
2732            if (current_subject[-1] == '\r' &&
2733                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2734                 current_subject < end_subject &&
2735                 *current_subject == '\n')
2736            current_subject++;            current_subject++;
2737          }          }
2738        }        }
# Line 2070  for (;;) Line 2833  for (;;)
2833    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2834    and firstline is set. */    and firstline is set. */
2835    
2836    if (firstline &&    if (firstline && IS_NEWLINE(current_subject)) break;
       current_subject <= end_subject - md->nllen &&  
       IS_NEWLINE(current_subject)) break;  
2837    current_subject++;    current_subject++;
2838    if (utf8)    if (utf8)
2839      {      {
# Line 2080  for (;;) Line 2841  for (;;)
2841        current_subject++;        current_subject++;
2842      }      }
2843    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2844    }  
2845      /* If we have just passed a CR and the newline option is CRLF or ANY or
2846      ANYCRLF, and we are now at a LF, advance the match position by one more
2847      character. */
2848    
2849      if (current_subject[-1] == '\r' &&
2850           (md->nltype == NLTYPE_ANY ||
2851            md->nltype == NLTYPE_ANYCRLF ||
2852            md->nllen == 2) &&
2853           current_subject < end_subject &&
2854           *current_subject == '\n')
2855        current_subject++;
2856    
2857      }   /* "Bumpalong" loop */
2858    
2859  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2860  }  }

Legend:
Removed from v.91  
changed lines
  Added in v.211

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12