/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
43  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
44    applications. */
45    
46    
47  #define NLBLOCK md           /* The block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
48    #define PSSTART start_subject  /* Field containing processed string start */
49    #define PSEND   end_subject    /* Field containing processed string end */
50    
51  #include "pcre_internal.h"  #include "pcre_internal.h"
52    
53    
# Line 58  compatible, but it has advantages in cer Line 62  compatible, but it has advantages in cer
62  *************************************************/  *************************************************/
63    
64  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
65  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
66  enough. */  enough. */
67    
68  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  #define OP_PROP_EXTRA 100
69  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_EXTUNI_EXTRA 120
70    #define OP_ANYNL_EXTRA 140
71    
72    
73  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
# Line 75  static uschar coptable[] = { Line 80  static uschar coptable[] = {
80    0,                             /* End                                    */    0,                             /* End                                    */
81    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */
82    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
83    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */
84    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
85    1,                             /* Char                                   */    1,                             /* Char                                   */
86    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 83  static uschar coptable[] = { Line 88  static uschar coptable[] = {
88    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
89    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
90    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
91      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
92    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
93    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
94    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
95      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
96    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
97    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
98    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
99      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
100    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
101    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
102    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 107  static uschar coptable[] = { Line 115  static uschar coptable[] = {
115    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
116    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
117    0,                             /* Reverse                                */    0,                             /* Reverse                                */
118    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
119    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
120    0,                             /* CREF                                   */    0,                             /* CREF                                   */
121    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0,                             /* RREF                                   */
122    0,                             /* BRANUMBER                              */    0,                             /* DEF                                    */
123    0                              /* BRA                                    */    0, 0                           /* BRAZERO, BRAMINZERO                    */
124  };  };
125    
126  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
# Line 278  stateblock *next_active_state, *next_new Line 286  stateblock *next_active_state, *next_new
286    
287  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
288  const uschar *ptr;  const uschar *ptr;
289  const uschar *end_code;  const uschar *end_code, *first_op;
290    
291  int active_count, new_count, match_count;  int active_count, new_count, match_count;
292    
# Line 291  const uschar *start_code = md->start_cod Line 299  const uschar *start_code = md->start_cod
299    
300  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
301  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
302    #else
303    BOOL utf8 = FALSE;
304  #endif  #endif
305    
306  rlevel++;  rlevel++;
# Line 314  active_states = (stateblock *)(workspace Line 324  active_states = (stateblock *)(workspace
324  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
325  new_count = 0;  new_count = 0;
326    
327    first_op = this_start_code + 1 + LINK_SIZE +
328      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
329    
330  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
331  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
332  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 323  If the first opcode in the first alterna Line 336  If the first opcode in the first alterna
336  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
337  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
338    
339  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
340    {    {
341    int max_back = 0;    int max_back = 0;
342    int gone_back;    int gone_back;
# Line 405  else Line 418  else
418    
419    else    else
420      {      {
421        int length = 1 + LINK_SIZE +
422          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
423      do      do
424        {        {
425        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
426        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
427          length = 1 + LINK_SIZE;
428        }        }
429      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
430      }      }
# Line 461  for (;;) Line 477  for (;;)
477    
478    if (ptr < end_subject)    if (ptr < end_subject)
479      {      {
480      clen = 1;      clen = 1;        /* Number of bytes in the character */
481  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
482      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
483  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 469  for (;;) Line 485  for (;;)
485      }      }
486    else    else
487      {      {
488      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
489      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
490      }      }
491    
492    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 488  for (;;) Line 504  for (;;)
504    
505  #ifdef DEBUG  #ifdef DEBUG
506      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
507      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
508        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
509          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
510  #endif  #endif
# Line 532  for (;;) Line 548  for (;;)
548    
549      code = start_code + state_offset;      code = start_code + state_offset;
550      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
551    
552      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
553      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 554  for (;;) Line 569  for (;;)
569        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
570        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
571          {          {
572          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
573          if (d >= OP_NOTPROP)            {
574            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
575              case OP_NOTPROP:
576              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
577              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
578              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
579              default: break;
580              }
581          }          }
582        }        }
583      else      else
584        {        {
585        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
586        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
587        }        }
588    
589    
# Line 624  for (;;) Line 645  for (;;)
645    
646        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
647        case OP_BRA:        case OP_BRA:
648          case OP_SBRA:
649        do        do
650          {          {
651          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 633  for (;;) Line 655  for (;;)
655        break;        break;
656    
657        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
658          case OP_CBRA:
659          case OP_SCBRA:
660          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
661          code += GET(code, 1);
662          while (*code == OP_ALT)
663            {
664            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
665            code += GET(code, 1);
666            }
667          break;
668    
669          /*-----------------------------------------------------------------*/
670        case OP_BRAZERO:        case OP_BRAZERO:
671        case OP_BRAMINZERO:        case OP_BRAMINZERO:
672        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 642  for (;;) Line 676  for (;;)
676        break;        break;
677    
678        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
       case OP_BRANUMBER:  
       ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);  
       break;  
   
       /*-----------------------------------------------------------------*/  
679        case OP_CIRC:        case OP_CIRC:
680        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
681            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
             ptr >= start_subject + md->nllen &&  
682              ptr != end_subject &&              ptr != end_subject &&
683              IS_NEWLINE(ptr - md->nllen)))              WAS_NEWLINE(ptr)))
684          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
685        break;        break;
686    
# Line 686  for (;;) Line 714  for (;;)
714    
715        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
716        case OP_ANY:        case OP_ANY:
717        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 ||        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
                        ptr > end_subject - md->nllen ||  
                        !IS_NEWLINE(ptr)))  
718          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
719        break;        break;
720    
721        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
722        case OP_EODN:        case OP_EODN:
723        if (clen == 0 ||        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
            (ptr == end_subject - md->nllen && IS_NEWLINE(ptr)))  
724          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
725        break;        break;
726    
# Line 704  for (;;) Line 729  for (;;)
729        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
730          {          {
731          if (clen == 0 ||          if (clen == 0 ||
732              (ptr <= end_subject - md->nllen && IS_NEWLINE(ptr) &&              (IS_NEWLINE(ptr) &&
733                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
734              ))              ))
735            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
736          }          }
737        else if ((ims & PCRE_MULTILINE) != 0 &&        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
                ptr <= end_subject - md->nllen && IS_NEWLINE(ptr))  
738          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
739        break;        break;
740    
# Line 816  for (;;) Line 840  for (;;)
840    
841        case OP_TYPEPLUS:        case OP_TYPEPLUS:
842        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
843          case OP_TYPEPOSPLUS:
844        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
845        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
846        if (clen > 0)        if (clen > 0)
# Line 824  for (;;) Line 849  for (;;)
849              (c < 256 &&              (c < 256 &&
850                (d != OP_ANY ||                (d != OP_ANY ||
851                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
852                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
853                ) &&                ) &&
854                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
855            {            {
856              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
857                {
858                active_count--;            /* Remove non-match possibility */
859                next_active_state--;
860                }
861            count++;            count++;
862            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
863            }            }
# Line 838  for (;;) Line 867  for (;;)
867        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
868        case OP_TYPEQUERY:        case OP_TYPEQUERY:
869        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
870          case OP_TYPEPOSQUERY:
871        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
872        if (clen > 0)        if (clen > 0)
873          {          {
# Line 845  for (;;) Line 875  for (;;)
875              (c < 256 &&              (c < 256 &&
876                (d != OP_ANY ||                (d != OP_ANY ||
877                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
878                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
879                ) &&                ) &&
880                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
881            {            {
882              if (codevalue == OP_TYPEPOSQUERY)
883                {
884                active_count--;            /* Remove non-match possibility */
885                next_active_state--;
886                }
887            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
888            }            }
889          }          }
# Line 858  for (;;) Line 892  for (;;)
892        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
893        case OP_TYPESTAR:        case OP_TYPESTAR:
894        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
895          case OP_TYPEPOSSTAR:
896        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
897        if (clen > 0)        if (clen > 0)
898          {          {
# Line 865  for (;;) Line 900  for (;;)
900              (c < 256 &&              (c < 256 &&
901                (d != OP_ANY ||                (d != OP_ANY ||
902                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
903                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
904                ) &&                ) &&
905                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
906            {            {
907              if (codevalue == OP_TYPEPOSSTAR)
908                {
909                active_count--;            /* Remove non-match possibility */
910                next_active_state--;
911                }
912            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
913            }            }
914          }          }
# Line 877  for (;;) Line 916  for (;;)
916    
917        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
918        case OP_TYPEEXACT:        case OP_TYPEEXACT:
919          count = current_state->count;  /* Number already matched */
920          if (clen > 0)
921            {
922            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
923                (c < 256 &&
924                  (d != OP_ANY ||
925                   (ims & PCRE_DOTALL) != 0 ||
926                   !IS_NEWLINE(ptr)
927                  ) &&
928                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
929              {
930              if (++count >= GET2(code, 1))
931                { ADD_NEW(state_offset + 4, 0); }
932              else
933                { ADD_NEW(state_offset, count); }
934              }
935            }
936          break;
937    
938          /*-----------------------------------------------------------------*/
939        case OP_TYPEUPTO:        case OP_TYPEUPTO:
940        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
941        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
942          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
943        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
944        if (clen > 0)        if (clen > 0)
945          {          {
# Line 888  for (;;) Line 947  for (;;)
947              (c < 256 &&              (c < 256 &&
948                (d != OP_ANY ||                (d != OP_ANY ||
949                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
950                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
951                ) &&                ) &&
952                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
953            {            {
954              if (codevalue == OP_TYPEPOSUPTO)
955                {
956                active_count--;           /* Remove non-match possibility */
957                next_active_state--;
958                }
959            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
960              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
961            else            else
# Line 903  for (;;) Line 966  for (;;)
966    
967  /* ========================================================================== */  /* ========================================================================== */
968        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
969        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
970        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
971        d variable. */        is in the d variable. */
972    
973        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
974        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
975          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
976        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
977        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
978        if (clen > 0)        if (clen > 0)
# Line 944  for (;;) Line 1008  for (;;)
1008            break;            break;
1009            }            }
1010    
1011          if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }          if (OK == (d == OP_PROP))
1012              {
1013              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1014                {
1015                active_count--;           /* Remove non-match possibility */
1016                next_active_state--;
1017                }
1018              count++;
1019              ADD_NEW(state_offset, count);
1020              }
1021          }          }
1022        break;        break;
1023    
1024        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1025        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1026        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1027          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1028        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1029        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1030        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1031          {          {
1032          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1033          int ncount = 0;          int ncount = 0;
1034            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1035              {
1036              active_count--;           /* Remove non-match possibility */
1037              next_active_state--;
1038              }
1039          while (nptr < end_subject)          while (nptr < end_subject)
1040            {            {
1041            int nd;            int nd;
# Line 972  for (;;) Line 1051  for (;;)
1051        break;        break;
1052    
1053        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1054          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1055          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1056          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1057          count = current_state->count;  /* Already matched */
1058          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1059          if (clen > 0)
1060            {
1061            int ncount = 0;
1062            switch (c)
1063              {
1064              case 0x000d:
1065              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1066              /* Fall through */
1067              case 0x000a:
1068              case 0x000b:
1069              case 0x000c:
1070              case 0x0085:
1071              case 0x2028:
1072              case 0x2029:
1073              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1074                {
1075                active_count--;           /* Remove non-match possibility */
1076                next_active_state--;
1077                }
1078              count++;
1079              ADD_NEW_DATA(-state_offset, count, ncount);
1080              break;
1081              default:
1082              break;
1083              }
1084            }
1085          break;
1086    
1087          /*-----------------------------------------------------------------*/
1088        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1089        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1090          case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1091        count = 4;        count = 4;
1092        goto QS1;        goto QS1;
1093    
1094        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1095        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1096          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1097        count = 0;        count = 0;
1098    
1099        QS1:        QS1:
# Line 1017  for (;;) Line 1132  for (;;)
1132            break;            break;
1133            }            }
1134    
1135          if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }          if (OK == (d == OP_PROP))
1136              {
1137              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1138                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1139                {
1140                active_count--;           /* Remove non-match possibility */
1141                next_active_state--;
1142                }
1143              ADD_NEW(state_offset + count, 0);
1144              }
1145          }          }
1146        break;        break;
1147    
1148        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1149        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1150        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1151          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1152        count = 2;        count = 2;
1153        goto QS2;        goto QS2;
1154    
1155        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1156        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1157          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1158        count = 0;        count = 0;
1159    
1160        QS2:        QS2:
# Line 1038  for (;;) Line 1164  for (;;)
1164          {          {
1165          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1166          int ncount = 0;          int ncount = 0;
1167            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1168                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1169              {
1170              active_count--;           /* Remove non-match possibility */
1171              next_active_state--;
1172              }
1173          while (nptr < end_subject)          while (nptr < end_subject)
1174            {            {
1175            int nd;            int nd;
# Line 1052  for (;;) Line 1184  for (;;)
1184        break;        break;
1185    
1186        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1187          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1188          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1189          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1190          count = 2;
1191          goto QS3;
1192    
1193          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1194          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1195          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1196          count = 0;
1197    
1198          QS3:
1199          ADD_ACTIVE(state_offset + 2, 0);
1200          if (clen > 0)
1201            {
1202            int ncount = 0;
1203            switch (c)
1204              {
1205              case 0x000d:
1206              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1207              /* Fall through */
1208              case 0x000a:
1209              case 0x000b:
1210              case 0x000c:
1211              case 0x0085:
1212              case 0x2028:
1213              case 0x2029:
1214              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1215                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1216                {
1217                active_count--;           /* Remove non-match possibility */
1218                next_active_state--;
1219                }
1220              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1221              break;
1222              default:
1223              break;
1224              }
1225            }
1226          break;
1227    
1228          /*-----------------------------------------------------------------*/
1229        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1230        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1231        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1232          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1233        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1234          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1235        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1093  for (;;) Line 1268  for (;;)
1268    
1269          if (OK == (d == OP_PROP))          if (OK == (d == OP_PROP))
1270            {            {
1271              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1272                {
1273                active_count--;           /* Remove non-match possibility */
1274                next_active_state--;
1275                }
1276            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1277              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 6, 0); }
1278            else            else
# Line 1105  for (;;) Line 1285  for (;;)
1285        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1286        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1287        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1288          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1289        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1290          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1291        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1112  for (;;) Line 1293  for (;;)
1293          {          {
1294          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1295          int ncount = 0;          int ncount = 0;
1296            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1297              {
1298              active_count--;           /* Remove non-match possibility */
1299              next_active_state--;
1300              }
1301          while (nptr < end_subject)          while (nptr < end_subject)
1302            {            {
1303            int nd;            int nd;
# Line 1128  for (;;) Line 1314  for (;;)
1314          }          }
1315        break;        break;
1316    
1317          /*-----------------------------------------------------------------*/
1318          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1319          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1320          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1321          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1322          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1323            { ADD_ACTIVE(state_offset + 4, 0); }
1324          count = current_state->count;  /* Number already matched */
1325          if (clen > 0)
1326            {
1327            int ncount = 0;
1328            switch (c)
1329              {
1330              case 0x000d:
1331              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1332              /* Fall through */
1333              case 0x000a:
1334              case 0x000b:
1335              case 0x000c:
1336              case 0x0085:
1337              case 0x2028:
1338              case 0x2029:
1339              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1340                {
1341                active_count--;           /* Remove non-match possibility */
1342                next_active_state--;
1343                }
1344              if (++count >= GET2(code, 1))
1345                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1346              else
1347                { ADD_NEW_DATA(-state_offset, count, ncount); }
1348              break;
1349              default:
1350              break;
1351              }
1352            }
1353          break;
1354    
1355  /* ========================================================================== */  /* ========================================================================== */
1356        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
1357        to the current subject character; it is loaded into d. We still get        to the current subject character; it is loaded into d. We still get
# Line 1148  for (;;) Line 1372  for (;;)
1372          {          {
1373          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1374            {            {
1375            int othercase;            unsigned int othercase;
1376            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1377    
1378            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
# Line 1157  for (;;) Line 1381  for (;;)
1381  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1382            othercase = _pcre_ucp_othercase(c);            othercase = _pcre_ucp_othercase(c);
1383  #else  #else
1384            othercase = -1;            othercase = NOTACHAR;
1385  #endif  #endif
1386    
1387            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
# Line 1198  for (;;) Line 1422  for (;;)
1422  #endif  #endif
1423    
1424        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1425          /* This is a tricky like EXTUNI because it too can match more than one
1426          character (when CR is followed by LF). In this case, set up a negative
1427          state to wait for one character to pass before continuing. */
1428    
1429          case OP_ANYNL:
1430          if (clen > 0) switch(c)
1431            {
1432            case 0x000a:
1433            case 0x000b:
1434            case 0x000c:
1435            case 0x0085:
1436            case 0x2028:
1437            case 0x2029:
1438            ADD_NEW(state_offset + 1, 0);
1439            break;
1440            case 0x000d:
1441            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1442              {
1443              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1444              }
1445            else
1446              {
1447              ADD_NEW(state_offset + 1, 0);
1448              }
1449            break;
1450            }
1451          break;
1452    
1453          /*-----------------------------------------------------------------*/
1454        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1455        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1456        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1205  for (;;) Line 1458  for (;;)
1458        case OP_NOT:        case OP_NOT:
1459        if (clen > 0)        if (clen > 0)
1460          {          {
1461          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1462          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1463          }          }
1464        break;        break;
# Line 1213  for (;;) Line 1466  for (;;)
1466        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1467        case OP_PLUS:        case OP_PLUS:
1468        case OP_MINPLUS:        case OP_MINPLUS:
1469          case OP_POSPLUS:
1470        case OP_NOTPLUS:        case OP_NOTPLUS:
1471        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1472          case OP_NOTPOSPLUS:
1473        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1474        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1475        if (clen > 0)        if (clen > 0)
1476          {          {
1477          int otherd = -1;          unsigned int otherd = NOTACHAR;
1478          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1479            {            {
1480  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1234  for (;;) Line 1489  for (;;)
1489            otherd = fcc[d];            otherd = fcc[d];
1490            }            }
1491          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1492            { count++; ADD_NEW(state_offset, count); }            {
1493              if (count > 0 &&
1494                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1495                {
1496                active_count--;             /* Remove non-match possibility */
1497                next_active_state--;
1498                }
1499              count++;
1500              ADD_NEW(state_offset, count);
1501              }
1502          }          }
1503        break;        break;
1504    
1505        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1506        case OP_QUERY:        case OP_QUERY:
1507        case OP_MINQUERY:        case OP_MINQUERY:
1508          case OP_POSQUERY:
1509        case OP_NOTQUERY:        case OP_NOTQUERY:
1510        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1511          case OP_NOTPOSQUERY:
1512        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1513        if (clen > 0)        if (clen > 0)
1514          {          {
1515          int otherd = -1;          unsigned int otherd = NOTACHAR;
1516          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1517            {            {
1518  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1261  for (;;) Line 1527  for (;;)
1527            otherd = fcc[d];            otherd = fcc[d];
1528            }            }
1529          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1530            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1531              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1532                {
1533                active_count--;            /* Remove non-match possibility */
1534                next_active_state--;
1535                }
1536              ADD_NEW(state_offset + dlen + 1, 0);
1537              }
1538          }          }
1539        break;        break;
1540    
1541        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1542        case OP_STAR:        case OP_STAR:
1543        case OP_MINSTAR:        case OP_MINSTAR:
1544          case OP_POSSTAR:
1545        case OP_NOTSTAR:        case OP_NOTSTAR:
1546        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1547          case OP_NOTPOSSTAR:
1548        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1549        if (clen > 0)        if (clen > 0)
1550          {          {
1551          int otherd = -1;          unsigned int otherd = NOTACHAR;
1552          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1553            {            {
1554  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1288  for (;;) Line 1563  for (;;)
1563            otherd = fcc[d];            otherd = fcc[d];
1564            }            }
1565          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1566            { ADD_NEW(state_offset, 0); }            {
1567              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
1568                {
1569                active_count--;            /* Remove non-match possibility */
1570                next_active_state--;
1571                }
1572              ADD_NEW(state_offset, 0);
1573              }
1574          }          }
1575        break;        break;
1576    
1577        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1578        case OP_EXACT:        case OP_EXACT:
1579          case OP_NOTEXACT:
1580          count = current_state->count;  /* Number already matched */
1581          if (clen > 0)
1582            {
1583            unsigned int otherd = NOTACHAR;
1584            if ((ims & PCRE_CASELESS) != 0)
1585              {
1586    #ifdef SUPPORT_UTF8
1587              if (utf8 && d >= 128)
1588                {
1589    #ifdef SUPPORT_UCP
1590                otherd = _pcre_ucp_othercase(d);
1591    #endif  /* SUPPORT_UCP */
1592                }
1593              else
1594    #endif  /* SUPPORT_UTF8 */
1595              otherd = fcc[d];
1596              }
1597            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1598              {
1599              if (++count >= GET2(code, 1))
1600                { ADD_NEW(state_offset + dlen + 3, 0); }
1601              else
1602                { ADD_NEW(state_offset, count); }
1603              }
1604            }
1605          break;
1606    
1607          /*-----------------------------------------------------------------*/
1608        case OP_UPTO:        case OP_UPTO:
1609        case OP_MINUPTO:        case OP_MINUPTO:
1610        case OP_NOTEXACT:        case OP_POSUPTO:
1611        case OP_NOTUPTO:        case OP_NOTUPTO:
1612        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
1613        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
1614          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
1615        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1616        if (clen > 0)        if (clen > 0)
1617          {          {
1618          int otherd = -1;          unsigned int otherd = NOTACHAR;
1619          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1620            {            {
1621  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1320  for (;;) Line 1631  for (;;)
1631            }            }
1632          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1633            {            {
1634              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
1635                {
1636                active_count--;             /* Remove non-match possibility */
1637                next_active_state--;
1638                }
1639            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1640              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
1641            else            else
# Line 1448  for (;;) Line 1764  for (;;)
1764    
1765        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1766        case OP_COND:        case OP_COND:
1767          case OP_SCOND:
1768          {          {
1769          int local_offsets[1000];          int local_offsets[1000];
1770          int local_workspace[1000];          int local_workspace[1000];
1771          int condcode = code[LINK_SIZE+1];          int condcode = code[LINK_SIZE+1];
1772    
1773          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Back reference conditions are not supported */
1774          means "test if in a recursion". */  
1775            if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
1776    
1777          if (condcode == OP_CREF)          /* The DEFINE condition is always false */
1778    
1779            if (condcode == OP_DEF)
1780              {
1781              ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
1782              }
1783    
1784            /* The only supported version of OP_RREF is for the value RREF_ANY,
1785            which means "test if in any recursion". We can't test for specifically
1786            recursed groups. */
1787    
1788            else if (condcode == OP_RREF)
1789            {            {
1790            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
1791            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
1792            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
1793              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
1794            }            }
# Line 1837  md->poptions = re->options; Line 2166  md->poptions = re->options;
2166  /* Handle different types of newline. The two bits give four cases. If nothing  /* Handle different types of newline. The two bits give four cases. If nothing
2167  is set at run time, whatever was used at compile time applies. */  is set at run time, whatever was used at compile time applies. */
2168    
2169  switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) &
2170           PCRE_NEWLINE_CRLF)           PCRE_NEWLINE_BITS)
2171    {    {
2172    default:              newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
2173    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
2174    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = '\n'; break;
2175    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2176         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2177      case PCRE_NEWLINE_ANY: newline = -1; break;
2178      default: return PCRE_ERROR_BADNEWLINE;
2179    }    }
2180    
2181  if (newline > 255)  if (newline < 0)
2182    {    {
2183    md->nllen = 2;    md->nltype = NLTYPE_ANY;
   md->nl[0] = (newline >> 8) & 255;  
   md->nl[1] = newline & 255;  
2184    }    }
2185  else  else
2186    {    {
2187    md->nllen = 1;    md->nltype = NLTYPE_FIXED;
2188    md->nl[0] = newline;    if (newline > 255)
2189        {
2190        md->nllen = 2;
2191        md->nl[0] = (newline >> 8) & 255;
2192        md->nl[1] = newline & 255;
2193        }
2194      else
2195        {
2196        md->nllen = 1;
2197        md->nl[0] = newline;
2198        }
2199    }    }
2200    
2201  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 1946  for (;;) Line 2285  for (;;)
2285      if (firstline)      if (firstline)
2286        {        {
2287        const uschar *t = current_subject;        const uschar *t = current_subject;
2288        while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2289        end_subject = t;        end_subject = t;
2290        }        }
2291    
# Line 1965  for (;;) Line 2304  for (;;)
2304    
2305      else if (startline)      else if (startline)
2306        {        {
2307        if (current_subject > md->start_subject + md->nllen +        if (current_subject > md->start_subject + start_offset)
           start_offset)  
2308          {          {
2309          while (current_subject <= end_subject &&          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
                !IS_NEWLINE(current_subject - md->nllen))  
2310            current_subject++;            current_subject++;
2311          }          }
2312        }        }
# Line 2070  for (;;) Line 2407  for (;;)
2407    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2408    and firstline is set. */    and firstline is set. */
2409    
2410    if (firstline &&    if (firstline && IS_NEWLINE(current_subject)) break;
       current_subject <= end_subject - md->nllen &&  
       IS_NEWLINE(current_subject)) break;  
2411    current_subject++;    current_subject++;
2412    if (utf8)    if (utf8)
2413      {      {
# Line 2080  for (;;) Line 2415  for (;;)
2415        current_subject++;        current_subject++;
2416      }      }
2417    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2418    }  
2419      /* If we have just passed a CR and the newline option is CRLF or ANY, and we
2420      are now at a LF, advance the match position by one more character. */
2421    
2422      if (current_subject[-1] == '\r' &&
2423           (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
2424           current_subject < end_subject &&
2425           *current_subject == '\n')
2426        current_subject++;
2427    
2428      }   /* "Bumpalong" loop */
2429    
2430  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2431  }  }

Legend:
Removed from v.91  
changed lines
  Added in v.93

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12