/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 145 by ph10, Wed Apr 4 14:06:52 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
43  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
44    applications. */
45    
46    
47    #define NLBLOCK md             /* Block containing newline information */
48    #define PSSTART start_subject  /* Field containing processed string start */
49    #define PSEND   end_subject    /* Field containing processed string end */
50    
51  #include "pcre_internal.h"  #include "pcre_internal.h"
52    
53    
# Line 57  compatible, but it has advantages in cer Line 62  compatible, but it has advantages in cer
62  *************************************************/  *************************************************/
63    
64  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
65  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
66  enough. */  enough. */
67    
68  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  #define OP_PROP_EXTRA 100
69  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_EXTUNI_EXTRA 120
70    #define OP_ANYNL_EXTRA 140
71    
72    
73  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
# Line 74  static uschar coptable[] = { Line 80  static uschar coptable[] = {
80    0,                             /* End                                    */    0,                             /* End                                    */
81    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */
82    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
83    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */
84    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
85    1,                             /* Char                                   */    1,                             /* Char                                   */
86    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 82  static uschar coptable[] = { Line 88  static uschar coptable[] = {
88    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
89    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
90    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
91      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
92    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
93    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
94    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
95      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
96    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
97    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
98    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
99      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
100    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
101    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
102    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 106  static uschar coptable[] = { Line 115  static uschar coptable[] = {
115    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
116    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
117    0,                             /* Reverse                                */    0,                             /* Reverse                                */
118    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
119    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
120    0,                             /* CREF                                   */    0,                             /* CREF                                   */
121    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0,                             /* RREF                                   */
122    0,                             /* BRANUMBER                              */    0,                             /* DEF                                    */
123    0                              /* BRA                                    */    0, 0                           /* BRAZERO, BRAMINZERO                    */
124  };  };
125    
126  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
# Line 277  stateblock *next_active_state, *next_new Line 286  stateblock *next_active_state, *next_new
286    
287  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
288  const uschar *ptr;  const uschar *ptr;
289  const uschar *end_code;  const uschar *end_code, *first_op;
290    
291  int active_count, new_count, match_count;  int active_count, new_count, match_count;
292    
# Line 290  const uschar *start_code = md->start_cod Line 299  const uschar *start_code = md->start_cod
299    
300  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
301  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
302    #else
303    BOOL utf8 = FALSE;
304  #endif  #endif
305    
306  rlevel++;  rlevel++;
# Line 313  active_states = (stateblock *)(workspace Line 324  active_states = (stateblock *)(workspace
324  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
325  new_count = 0;  new_count = 0;
326    
327    first_op = this_start_code + 1 + LINK_SIZE +
328      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
329    
330  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
331  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
332  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 322  If the first opcode in the first alterna Line 336  If the first opcode in the first alterna
336  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
337  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
338    
339  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
340    {    {
341    int max_back = 0;    int max_back = 0;
342    int gone_back;    int gone_back;
# Line 404  else Line 418  else
418    
419    else    else
420      {      {
421        int length = 1 + LINK_SIZE +
422          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
423      do      do
424        {        {
425        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
426        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
427          length = 1 + LINK_SIZE;
428        }        }
429      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
430      }      }
# Line 423  ptr = current_subject; Line 440  ptr = current_subject;
440  for (;;)  for (;;)
441    {    {
442    int i, j;    int i, j;
443    int c, d, clen, dlen;    int clen, dlen;
444      unsigned int c, d;
445    
446    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
447    new state list. */    new state list. */
# Line 459  for (;;) Line 477  for (;;)
477    
478    if (ptr < end_subject)    if (ptr < end_subject)
479      {      {
480      clen = 1;      clen = 1;        /* Number of bytes in the character */
481  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
482      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
483  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 467  for (;;) Line 485  for (;;)
485      }      }
486    else    else
487      {      {
488      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
489      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
490      }      }
491    
492    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 486  for (;;) Line 504  for (;;)
504    
505  #ifdef DEBUG  #ifdef DEBUG
506      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
507      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
508        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
509          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
510  #endif  #endif
# Line 530  for (;;) Line 548  for (;;)
548    
549      code = start_code + state_offset;      code = start_code + state_offset;
550      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
551    
552      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
553      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 552  for (;;) Line 569  for (;;)
569        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
570        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
571          {          {
572          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
573          if (d >= OP_NOTPROP)            {
574            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
575              case OP_NOTPROP:
576              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
577              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
578              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
579              default: break;
580              }
581          }          }
582        }        }
583      else      else
584        {        {
585        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
586        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
587        }        }
588    
589    
# Line 622  for (;;) Line 645  for (;;)
645    
646        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
647        case OP_BRA:        case OP_BRA:
648          case OP_SBRA:
649        do        do
650          {          {
651          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 631  for (;;) Line 655  for (;;)
655        break;        break;
656    
657        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
658          case OP_CBRA:
659          case OP_SCBRA:
660          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
661          code += GET(code, 1);
662          while (*code == OP_ALT)
663            {
664            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
665            code += GET(code, 1);
666            }
667          break;
668    
669          /*-----------------------------------------------------------------*/
670        case OP_BRAZERO:        case OP_BRAZERO:
671        case OP_BRAMINZERO:        case OP_BRAMINZERO:
672        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 640  for (;;) Line 676  for (;;)
676        break;        break;
677    
678        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
       case OP_BRANUMBER:  
       ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);  
       break;  
   
       /*-----------------------------------------------------------------*/  
679        case OP_CIRC:        case OP_CIRC:
680        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
681            ((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE))            ((ims & PCRE_MULTILINE) != 0 &&
682                ptr != end_subject &&
683                WAS_NEWLINE(ptr)))
684          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
685        break;        break;
686    
# Line 681  for (;;) Line 714  for (;;)
714    
715        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
716        case OP_ANY:        case OP_ANY:
717        if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0))        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
718          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
719        break;        break;
720    
721        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
722        case OP_EODN:        case OP_EODN:
723        if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject))        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
724          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
725        break;        break;
726    
# Line 695  for (;;) Line 728  for (;;)
728        case OP_DOLL:        case OP_DOLL:
729        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
730          {          {
731          if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject ||          if (clen == 0 ||
732                                  (ims & PCRE_MULTILINE) != 0)))              (IS_NEWLINE(ptr) &&
733                   ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
734                ))
735            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
736          }          }
737        else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0)        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
738          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
739        break;        break;
740    
# Line 805  for (;;) Line 840  for (;;)
840    
841        case OP_TYPEPLUS:        case OP_TYPEPLUS:
842        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
843          case OP_TYPEPOSPLUS:
844        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
845        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
846        if (clen > 0)        if (clen > 0)
847          {          {
848          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
849              (c < 256 &&              (c < 256 &&
850                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
851                   (ims & PCRE_DOTALL) != 0 ||
852                   !IS_NEWLINE(ptr)
853                  ) &&
854                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
855            {            {
856              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
857                {
858                active_count--;            /* Remove non-match possibility */
859                next_active_state--;
860                }
861            count++;            count++;
862            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
863            }            }
# Line 823  for (;;) Line 867  for (;;)
867        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
868        case OP_TYPEQUERY:        case OP_TYPEQUERY:
869        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
870          case OP_TYPEPOSQUERY:
871        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
872        if (clen > 0)        if (clen > 0)
873          {          {
874          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
875              (c < 256 &&              (c < 256 &&
876                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
877                   (ims & PCRE_DOTALL) != 0 ||
878                   !IS_NEWLINE(ptr)
879                  ) &&
880                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
881            {            {
882              if (codevalue == OP_TYPEPOSQUERY)
883                {
884                active_count--;            /* Remove non-match possibility */
885                next_active_state--;
886                }
887            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
888            }            }
889          }          }
# Line 839  for (;;) Line 892  for (;;)
892        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
893        case OP_TYPESTAR:        case OP_TYPESTAR:
894        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
895          case OP_TYPEPOSSTAR:
896        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
897        if (clen > 0)        if (clen > 0)
898          {          {
899          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
900              (c < 256 &&              (c < 256 &&
901                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
902                   (ims & PCRE_DOTALL) != 0 ||
903                   !IS_NEWLINE(ptr)
904                  ) &&
905                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
906            {            {
907              if (codevalue == OP_TYPEPOSSTAR)
908                {
909                active_count--;            /* Remove non-match possibility */
910                next_active_state--;
911                }
912            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
913            }            }
914          }          }
# Line 854  for (;;) Line 916  for (;;)
916    
917        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
918        case OP_TYPEEXACT:        case OP_TYPEEXACT:
919          count = current_state->count;  /* Number already matched */
920          if (clen > 0)
921            {
922            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
923                (c < 256 &&
924                  (d != OP_ANY ||
925                   (ims & PCRE_DOTALL) != 0 ||
926                   !IS_NEWLINE(ptr)
927                  ) &&
928                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
929              {
930              if (++count >= GET2(code, 1))
931                { ADD_NEW(state_offset + 4, 0); }
932              else
933                { ADD_NEW(state_offset, count); }
934              }
935            }
936          break;
937    
938          /*-----------------------------------------------------------------*/
939        case OP_TYPEUPTO:        case OP_TYPEUPTO:
940        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
941        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
942          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
943        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
944        if (clen > 0)        if (clen > 0)
945          {          {
946          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
947              (c < 256 &&              (c < 256 &&
948                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
949                   (ims & PCRE_DOTALL) != 0 ||
950                   !IS_NEWLINE(ptr)
951                  ) &&
952                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
953            {            {
954              if (codevalue == OP_TYPEPOSUPTO)
955                {
956                active_count--;           /* Remove non-match possibility */
957                next_active_state--;
958                }
959            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
960              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
961            else            else
# Line 876  for (;;) Line 966  for (;;)
966    
967  /* ========================================================================== */  /* ========================================================================== */
968        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
969        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
970        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
971        d variable. */        is in the d variable. */
972    
973        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
974        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
975          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
976        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
977        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
978        if (clen > 0)        if (clen > 0)
# Line 917  for (;;) Line 1008  for (;;)
1008            break;            break;
1009            }            }
1010    
1011          if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }          if (OK == (d == OP_PROP))
1012              {
1013              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1014                {
1015                active_count--;           /* Remove non-match possibility */
1016                next_active_state--;
1017                }
1018              count++;
1019              ADD_NEW(state_offset, count);
1020              }
1021          }          }
1022        break;        break;
1023    
1024        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1025        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1026        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1027          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1028        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1029        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1030        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1031          {          {
1032          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1033          int ncount = 0;          int ncount = 0;
1034            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1035              {
1036              active_count--;           /* Remove non-match possibility */
1037              next_active_state--;
1038              }
1039          while (nptr < end_subject)          while (nptr < end_subject)
1040            {            {
1041            int nd;            int nd;
# Line 945  for (;;) Line 1051  for (;;)
1051        break;        break;
1052    
1053        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1054          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1055          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1056          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1057          count = current_state->count;  /* Already matched */
1058          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1059          if (clen > 0)
1060            {
1061            int ncount = 0;
1062            switch (c)
1063              {
1064              case 0x000d:
1065              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1066              /* Fall through */
1067              case 0x000a:
1068              case 0x000b:
1069              case 0x000c:
1070              case 0x0085:
1071              case 0x2028:
1072              case 0x2029:
1073              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1074                {
1075                active_count--;           /* Remove non-match possibility */
1076                next_active_state--;
1077                }
1078              count++;
1079              ADD_NEW_DATA(-state_offset, count, ncount);
1080              break;
1081              default:
1082              break;
1083              }
1084            }
1085          break;
1086    
1087          /*-----------------------------------------------------------------*/
1088        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1089        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1090          case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1091        count = 4;        count = 4;
1092        goto QS1;        goto QS1;
1093    
1094        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1095        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1096          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1097        count = 0;        count = 0;
1098    
1099        QS1:        QS1:
# Line 990  for (;;) Line 1132  for (;;)
1132            break;            break;
1133            }            }
1134    
1135          if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }          if (OK == (d == OP_PROP))
1136              {
1137              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1138                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1139                {
1140                active_count--;           /* Remove non-match possibility */
1141                next_active_state--;
1142                }
1143              ADD_NEW(state_offset + count, 0);
1144              }
1145          }          }
1146        break;        break;
1147    
1148        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1149        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1150        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1151          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1152        count = 2;        count = 2;
1153        goto QS2;        goto QS2;
1154    
1155        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1156        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1157          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1158        count = 0;        count = 0;
1159    
1160        QS2:        QS2:
# Line 1011  for (;;) Line 1164  for (;;)
1164          {          {
1165          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1166          int ncount = 0;          int ncount = 0;
1167            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1168                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1169              {
1170              active_count--;           /* Remove non-match possibility */
1171              next_active_state--;
1172              }
1173          while (nptr < end_subject)          while (nptr < end_subject)
1174            {            {
1175            int nd;            int nd;
# Line 1025  for (;;) Line 1184  for (;;)
1184        break;        break;
1185    
1186        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1187          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1188          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1189          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1190          count = 2;
1191          goto QS3;
1192    
1193          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1194          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1195          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1196          count = 0;
1197    
1198          QS3:
1199          ADD_ACTIVE(state_offset + 2, 0);
1200          if (clen > 0)
1201            {
1202            int ncount = 0;
1203            switch (c)
1204              {
1205              case 0x000d:
1206              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1207              /* Fall through */
1208              case 0x000a:
1209              case 0x000b:
1210              case 0x000c:
1211              case 0x0085:
1212              case 0x2028:
1213              case 0x2029:
1214              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1215                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1216                {
1217                active_count--;           /* Remove non-match possibility */
1218                next_active_state--;
1219                }
1220              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1221              break;
1222              default:
1223              break;
1224              }
1225            }
1226          break;
1227    
1228          /*-----------------------------------------------------------------*/
1229        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1230        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1231        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1232          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1233        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1234          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1235        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1066  for (;;) Line 1268  for (;;)
1268    
1269          if (OK == (d == OP_PROP))          if (OK == (d == OP_PROP))
1270            {            {
1271              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1272                {
1273                active_count--;           /* Remove non-match possibility */
1274                next_active_state--;
1275                }
1276            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1277              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 6, 0); }
1278            else            else
# Line 1078  for (;;) Line 1285  for (;;)
1285        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1286        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1287        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1288          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1289        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1290          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1291        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1085  for (;;) Line 1293  for (;;)
1293          {          {
1294          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1295          int ncount = 0;          int ncount = 0;
1296            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1297              {
1298              active_count--;           /* Remove non-match possibility */
1299              next_active_state--;
1300              }
1301          while (nptr < end_subject)          while (nptr < end_subject)
1302            {            {
1303            int nd;            int nd;
# Line 1101  for (;;) Line 1314  for (;;)
1314          }          }
1315        break;        break;
1316    
1317          /*-----------------------------------------------------------------*/
1318          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1319          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1320          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1321          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1322          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1323            { ADD_ACTIVE(state_offset + 4, 0); }
1324          count = current_state->count;  /* Number already matched */
1325          if (clen > 0)
1326            {
1327            int ncount = 0;
1328            switch (c)
1329              {
1330              case 0x000d:
1331              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1332              /* Fall through */
1333              case 0x000a:
1334              case 0x000b:
1335              case 0x000c:
1336              case 0x0085:
1337              case 0x2028:
1338              case 0x2029:
1339              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1340                {
1341                active_count--;           /* Remove non-match possibility */
1342                next_active_state--;
1343                }
1344              if (++count >= GET2(code, 1))
1345                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1346              else
1347                { ADD_NEW_DATA(-state_offset, count, ncount); }
1348              break;
1349              default:
1350              break;
1351              }
1352            }
1353          break;
1354    
1355  /* ========================================================================== */  /* ========================================================================== */
1356        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
1357        to the current subject character; it is loaded into d. We still get        to the current subject character; it is loaded into d. We still get
# Line 1121  for (;;) Line 1372  for (;;)
1372          {          {
1373          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1374            {            {
1375            int othercase;            unsigned int othercase;
1376            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1377    
1378            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
# Line 1130  for (;;) Line 1381  for (;;)
1381  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1382            othercase = _pcre_ucp_othercase(c);            othercase = _pcre_ucp_othercase(c);
1383  #else  #else
1384            othercase = -1;            othercase = NOTACHAR;
1385  #endif  #endif
1386    
1387            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
# Line 1171  for (;;) Line 1422  for (;;)
1422  #endif  #endif
1423    
1424        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1425          /* This is a tricky like EXTUNI because it too can match more than one
1426          character (when CR is followed by LF). In this case, set up a negative
1427          state to wait for one character to pass before continuing. */
1428    
1429          case OP_ANYNL:
1430          if (clen > 0) switch(c)
1431            {
1432            case 0x000a:
1433            case 0x000b:
1434            case 0x000c:
1435            case 0x0085:
1436            case 0x2028:
1437            case 0x2029:
1438            ADD_NEW(state_offset + 1, 0);
1439            break;
1440            case 0x000d:
1441            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1442              {
1443              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1444              }
1445            else
1446              {
1447              ADD_NEW(state_offset + 1, 0);
1448              }
1449            break;
1450            }
1451          break;
1452    
1453          /*-----------------------------------------------------------------*/
1454        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1455        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1456        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1178  for (;;) Line 1458  for (;;)
1458        case OP_NOT:        case OP_NOT:
1459        if (clen > 0)        if (clen > 0)
1460          {          {
1461          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1462          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1463          }          }
1464        break;        break;
# Line 1186  for (;;) Line 1466  for (;;)
1466        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1467        case OP_PLUS:        case OP_PLUS:
1468        case OP_MINPLUS:        case OP_MINPLUS:
1469          case OP_POSPLUS:
1470        case OP_NOTPLUS:        case OP_NOTPLUS:
1471        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1472          case OP_NOTPOSPLUS:
1473        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1474        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1475        if (clen > 0)        if (clen > 0)
1476          {          {
1477          int otherd = -1;          unsigned int otherd = NOTACHAR;
1478          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1479            {            {
1480  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1207  for (;;) Line 1489  for (;;)
1489            otherd = fcc[d];            otherd = fcc[d];
1490            }            }
1491          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1492            { count++; ADD_NEW(state_offset, count); }            {
1493              if (count > 0 &&
1494                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1495                {
1496                active_count--;             /* Remove non-match possibility */
1497                next_active_state--;
1498                }
1499              count++;
1500              ADD_NEW(state_offset, count);
1501              }
1502          }          }
1503        break;        break;
1504    
1505        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1506        case OP_QUERY:        case OP_QUERY:
1507        case OP_MINQUERY:        case OP_MINQUERY:
1508          case OP_POSQUERY:
1509        case OP_NOTQUERY:        case OP_NOTQUERY:
1510        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1511          case OP_NOTPOSQUERY:
1512        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1513        if (clen > 0)        if (clen > 0)
1514          {          {
1515          int otherd = -1;          unsigned int otherd = NOTACHAR;
1516          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1517            {            {
1518  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1519            if (utf8 && d >= 128)            if (utf8 && d >= 128)
# Line 1234  for (;;) Line 1527  for (;;)
1527            otherd = fcc[d];            otherd = fcc[d];
1528            }            }
1529          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1530            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1531              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1532                {
1533                active_count--;            /* Remove non-match possibility */
1534                next_active_state--;
1535                }
1536              ADD_NEW(state_offset + dlen + 1, 0);
1537              }
1538          }          }
1539        break;        break;
1540    
1541        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1542        case OP_STAR:        case OP_STAR:
1543        case OP_MINSTAR:        case OP_MINSTAR:
1544          case OP_POSSTAR:
1545        case OP_NOTSTAR:        case OP_NOTSTAR:
1546        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1547          case OP_NOTPOSSTAR:
1548        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1549        if (clen > 0)        if (clen > 0)
1550          {          {
1551          int otherd = -1;          unsigned int otherd = NOTACHAR;
1552          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1553            {            {
1554  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1555            if (utf8 && d >= 128)            if (utf8 && d >= 128)
# Line 1261  for (;;) Line 1563  for (;;)
1563            otherd = fcc[d];            otherd = fcc[d];
1564            }            }
1565          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1566            { ADD_NEW(state_offset, 0); }            {
1567              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
1568                {
1569                active_count--;            /* Remove non-match possibility */
1570                next_active_state--;
1571                }
1572              ADD_NEW(state_offset, 0);
1573              }
1574          }          }
1575        break;        break;
1576    
1577        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1578        case OP_EXACT:        case OP_EXACT:
1579          case OP_NOTEXACT:
1580          count = current_state->count;  /* Number already matched */
1581          if (clen > 0)
1582            {
1583            unsigned int otherd = NOTACHAR;
1584            if ((ims & PCRE_CASELESS) != 0)
1585              {
1586    #ifdef SUPPORT_UTF8
1587              if (utf8 && d >= 128)
1588                {
1589    #ifdef SUPPORT_UCP
1590                otherd = _pcre_ucp_othercase(d);
1591    #endif  /* SUPPORT_UCP */
1592                }
1593              else
1594    #endif  /* SUPPORT_UTF8 */
1595              otherd = fcc[d];
1596              }
1597            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1598              {
1599              if (++count >= GET2(code, 1))
1600                { ADD_NEW(state_offset + dlen + 3, 0); }
1601              else
1602                { ADD_NEW(state_offset, count); }
1603              }
1604            }
1605          break;
1606    
1607          /*-----------------------------------------------------------------*/
1608        case OP_UPTO:        case OP_UPTO:
1609        case OP_MINUPTO:        case OP_MINUPTO:
1610        case OP_NOTEXACT:        case OP_POSUPTO:
1611        case OP_NOTUPTO:        case OP_NOTUPTO:
1612        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
1613        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
1614          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
1615        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1616        if (clen > 0)        if (clen > 0)
1617          {          {
1618          int otherd = -1;          unsigned int otherd = NOTACHAR;
1619          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1620            {            {
1621  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1293  for (;;) Line 1631  for (;;)
1631            }            }
1632          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1633            {            {
1634              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
1635                {
1636                active_count--;             /* Remove non-match possibility */
1637                next_active_state--;
1638                }
1639            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1640              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
1641            else            else
# Line 1370  for (;;) Line 1713  for (;;)
1713              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 5, 0); }
1714            if (isinclass)            if (isinclass)
1715              {              {
1716              if (++count >= GET2(ecode, 3))              int max = GET2(ecode, 3);
1717                if (++count >= max && max != 0)   /* Max 0 => no limit */
1718                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 5, 0); }
1719              else              else
1720                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
# Line 1420  for (;;) Line 1764  for (;;)
1764    
1765        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1766        case OP_COND:        case OP_COND:
1767          case OP_SCOND:
1768          {          {
1769          int local_offsets[1000];          int local_offsets[1000];
1770          int local_workspace[1000];          int local_workspace[1000];
1771          int condcode = code[LINK_SIZE+1];          int condcode = code[LINK_SIZE+1];
1772    
1773          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Back reference conditions are not supported */
1774          means "test if in a recursion". */  
1775            if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
1776    
1777            /* The DEFINE condition is always false */
1778    
1779          if (condcode == OP_CREF)          if (condcode == OP_DEF)
1780              {
1781              ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
1782              }
1783    
1784            /* The only supported version of OP_RREF is for the value RREF_ANY,
1785            which means "test if in any recursion". We can't test for specifically
1786            recursed groups. */
1787    
1788            else if (condcode == OP_RREF)
1789            {            {
1790            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
1791            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
1792            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
1793              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
1794            }            }
# Line 1670  for (;;) Line 2027  for (;;)
2027      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
2028        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
2029        rlevel*2-2, SP));        rlevel*2-2, SP));
2030      return match_count;      break;        /* In effect, "return", but see the comment below */
2031      }      }
2032    
2033    /* One or more states are active for the next character. */    /* One or more states are active for the next character. */
# Line 1678  for (;;) Line 2035  for (;;)
2035    ptr += clen;    /* Advance to next subject character */    ptr += clen;    /* Advance to next subject character */
2036    }               /* Loop to move along the subject string */    }               /* Loop to move along the subject string */
2037    
2038  /* Control never gets here, but we must keep the compiler happy. */  /* Control gets here from "break" a few lines above. We do it this way because
2039    if we use "return" above, we have compiler trouble. Some compilers warn if
2040    there's nothing here because they think the function doesn't return a value. On
2041    the other hand, if we put a dummy statement here, some more clever compilers
2042    complain that it can't be reached. Sigh. */
2043    
2044  DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n"  return match_count;
   "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP));  
 return PCRE_ERROR_NOMATCH;  
2045  }  }
2046    
2047    
# Line 1698  is not anchored. Line 2057  is not anchored.
2057    
2058  Arguments:  Arguments:
2059    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2060    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2061    subject         points to the subject string    subject         points to the subject string
2062    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2063    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1714  Returns: > 0 => number of match Line 2073  Returns: > 0 => number of match
2073                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2074  */  */
2075    
2076  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
2077  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2078    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2079    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
2080  {  {
2081  real_pcre *re = (real_pcre *)argument_re;  real_pcre *re = (real_pcre *)argument_re;
2082  dfa_match_data match_block;  dfa_match_data match_block;
2083    dfa_match_data *md = &match_block;
2084  BOOL utf8, anchored, startline, firstline;  BOOL utf8, anchored, startline, firstline;
2085  const uschar *current_subject, *end_subject, *lcc;  const uschar *current_subject, *end_subject, *lcc;
2086    
# Line 1735  BOOL req_byte_caseless = FALSE; Line 2095  BOOL req_byte_caseless = FALSE;
2095  int first_byte = -1;  int first_byte = -1;
2096  int req_byte = -1;  int req_byte = -1;
2097  int req_byte2 = -1;  int req_byte2 = -1;
2098    int newline;
2099    
2100  /* Plausibility checks */  /* Plausibility checks */
2101    
# Line 1749  flipping, so we scan the extra_data bloc Line 2110  flipping, so we scan the extra_data bloc
2110  match block, so we must initialize them beforehand. However, the other fields  match block, so we must initialize them beforehand. However, the other fields
2111  in the match block must not be set until after the byte flipping. */  in the match block must not be set until after the byte flipping. */
2112    
2113  match_block.tables = re->tables;  md->tables = re->tables;
2114  match_block.callout_data = NULL;  md->callout_data = NULL;
2115    
2116  if (extra_data != NULL)  if (extra_data != NULL)
2117    {    {
# Line 1761  if (extra_data != NULL) Line 2122  if (extra_data != NULL)
2122    if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
2123      return PCRE_ERROR_DFA_UMLIMIT;      return PCRE_ERROR_DFA_UMLIMIT;
2124    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
2125      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
2126    if ((flags & PCRE_EXTRA_TABLES) != 0)    if ((flags & PCRE_EXTRA_TABLES) != 0)
2127      match_block.tables = extra_data->tables;      md->tables = extra_data->tables;
2128    }    }
2129    
2130  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 1784  current_subject = (const unsigned char * Line 2145  current_subject = (const unsigned char *
2145  end_subject = (const unsigned char *)subject + length;  end_subject = (const unsigned char *)subject + length;
2146  req_byte_ptr = current_subject - 1;  req_byte_ptr = current_subject - 1;
2147    
2148    #ifdef SUPPORT_UTF8
2149  utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = (re->options & PCRE_UTF8) != 0;
2150    #else
2151    utf8 = FALSE;
2152    #endif
2153    
2154  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
2155    (re->options & PCRE_ANCHORED) != 0;    (re->options & PCRE_ANCHORED) != 0;
2156    
2157  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
2158    
2159  match_block.start_code = (const uschar *)argument_re +  md->start_code = (const uschar *)argument_re +
2160      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
2161  match_block.start_subject = (const unsigned char *)subject;  md->start_subject = (const unsigned char *)subject;
2162  match_block.end_subject = end_subject;  md->end_subject = end_subject;
2163  match_block.moptions = options;  md->moptions = options;
2164  match_block.poptions = re->options;  md->poptions = re->options;
2165    
2166    /* Handle different types of newline. The three bits give eight cases. If
2167    nothing is set at run time, whatever was used at compile time applies. */
2168    
2169    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2170             PCRE_NEWLINE_BITS)
2171      {
2172      case 0: newline = NEWLINE; break;   /* Compile-time default */
2173      case PCRE_NEWLINE_CR: newline = '\r'; break;
2174      case PCRE_NEWLINE_LF: newline = '\n'; break;
2175      case PCRE_NEWLINE_CR+
2176           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2177      case PCRE_NEWLINE_ANY: newline = -1; break;
2178      default: return PCRE_ERROR_BADNEWLINE;
2179      }
2180    
2181    if (newline < 0)
2182      {
2183      md->nltype = NLTYPE_ANY;
2184      }
2185    else
2186      {
2187      md->nltype = NLTYPE_FIXED;
2188      if (newline > 255)
2189        {
2190        md->nllen = 2;
2191        md->nl[0] = (newline >> 8) & 255;
2192        md->nl[1] = newline & 255;
2193        }
2194      else
2195        {
2196        md->nllen = 1;
2197        md->nl[0] = newline;
2198        }
2199      }
2200    
2201  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
2202  back the character offset. */  back the character offset. */
# Line 1822  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 2222  if (utf8 && (options & PCRE_NO_UTF8_CHEC
2222  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
2223  in other programs later. */  in other programs later. */
2224    
2225  if (match_block.tables == NULL) match_block.tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = _pcre_default_tables;
2226    
2227  /* The lower casing table and the "must be at the start of a line" flag are  /* The lower casing table and the "must be at the start of a line" flag are
2228  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2229    
2230  lcc = match_block.tables + lcc_offset;  lcc = md->tables + lcc_offset;
2231  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->options & PCRE_STARTLINE) != 0;
2232  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2233    
# Line 1860  if ((re->options & PCRE_REQCHSET) != 0) Line 2260  if ((re->options & PCRE_REQCHSET) != 0)
2260    {    {
2261    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2262    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
2263    req_byte2 = (match_block.tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
2264    }    }
2265    
2266  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 1885  for (;;) Line 2285  for (;;)
2285      if (firstline)      if (firstline)
2286        {        {
2287        const uschar *t = current_subject;        const uschar *t = current_subject;
2288        while (t < save_end_subject && *t != '\n') t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2289        end_subject = t;        end_subject = t;
2290        }        }
2291    
# Line 1900  for (;;) Line 2300  for (;;)
2300            current_subject++;            current_subject++;
2301        }        }
2302    
2303      /* Or to just after \n for a multiline match if possible */      /* Or to just after a linebreak for a multiline match if possible */
2304    
2305      else if (startline)      else if (startline)
2306        {        {
2307        if (current_subject > match_block.start_subject + start_offset)        if (current_subject > md->start_subject + start_offset)
2308          {          {
2309          while (current_subject < end_subject && current_subject[-1] != NEWLINE)          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2310              current_subject++;
2311    
2312            /* If we have just passed a CR and the newline option is ANY, and we
2313            are now at a LF, advance the match position by one more character. */
2314    
2315            if (current_subject[-1] == '\r' &&
2316                 md->nltype == NLTYPE_ANY &&
2317                 current_subject < end_subject &&
2318                 *current_subject == '\n')
2319            current_subject++;            current_subject++;
2320          }          }
2321        }        }
# Line 1987  for (;;) Line 2396  for (;;)
2396    /* OK, now we can do the business */    /* OK, now we can do the business */
2397    
2398    rc = internal_dfa_exec(    rc = internal_dfa_exec(
2399      &match_block,                              /* fixed match data */      md,                                /* fixed match data */
2400      match_block.start_code,                    /* this subexpression's code */      md->start_code,                    /* this subexpression's code */
2401      current_subject,                           /* where we currently are */      current_subject,                   /* where we currently are */
2402      start_offset,                              /* start offset in subject */      start_offset,                      /* start offset in subject */
2403      offsets,                                   /* offset vector */      offsets,                           /* offset vector */
2404      offsetcount,                               /* size of same */      offsetcount,                       /* size of same */
2405      workspace,                                 /* workspace vector */      workspace,                         /* workspace vector */
2406      wscount,                                   /* size of same */      wscount,                           /* size of same */
2407      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
2408      0,                                         /* function recurse level */      0,                                 /* function recurse level */
2409      0);                                        /* regex recurse level */      0);                                /* regex recurse level */
2410    
2411    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
2412    on only if not anchored. */    on only if not anchored. */
# Line 2007  for (;;) Line 2416  for (;;)
2416    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2417    and firstline is set. */    and firstline is set. */
2418    
2419    if (firstline && *current_subject == NEWLINE) break;    if (firstline && IS_NEWLINE(current_subject)) break;
2420    current_subject++;    current_subject++;
   
 #ifdef SUPPORT_UTF8  
2421    if (utf8)    if (utf8)
2422      {      {
2423      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
2424        current_subject++;        current_subject++;
2425      }      }
 #endif  
   
2426    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2427    }  
2428      /* If we have just passed a CR and the newline option is CRLF or ANY, and we
2429      are now at a LF, advance the match position by one more character. */
2430    
2431      if (current_subject[-1] == '\r' &&
2432           (md->nltype == NLTYPE_ANY || md->nllen == 2) &&
2433           current_subject < end_subject &&
2434           *current_subject == '\n')
2435        current_subject++;
2436    
2437      }   /* "Bumpalong" loop */
2438    
2439  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2440  }  }

Legend:
Removed from v.87  
changed lines
  Added in v.145

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12