/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 151 by ph10, Tue Apr 17 15:07:29 2007 UTC revision 428 by ph10, Mon Aug 31 17:10:26 2009 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* PCRE is a library of functions to support regular expressions whose syntax  /* PCRE is a library of functions to support regular expressions whose syntax
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language (but see
7    below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 44  FSM). This is NOT Perl- compatible, but Line 45  FSM). This is NOT Perl- compatible, but
45  applications. */  applications. */
46    
47    
48    #ifdef HAVE_CONFIG_H
49    #include "config.h"
50    #endif
51    
52  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
53  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
54  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 56  applications. */ Line 61  applications. */
61  #define SP "                   "  #define SP "                   "
62    
63    
   
64  /*************************************************  /*************************************************
65  *      Code parameters and static tables         *  *      Code parameters and static tables         *
66  *************************************************/  *************************************************/
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 20 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
71    never stored, so we push them well clear of the normal opcodes. */
72    
73  #define OP_PROP_EXTRA 100  #define OP_PROP_EXTRA       300
74  #define OP_EXTUNI_EXTRA 120  #define OP_EXTUNI_EXTRA     320
75  #define OP_ANYNL_EXTRA 140  #define OP_ANYNL_EXTRA      340
76    #define OP_HSPACE_EXTRA     360
77    #define OP_VSPACE_EXTRA     380
78    
79    
80  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
81  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
82  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
83  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
84  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
85    that follow must also be modified. */
86    
87  static uschar coptable[] = {  static const uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90    0, 0,                          /* Any, Anybyte                           */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91    0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */    0, 0, 0,                       /* Any, AllAny, Anybyte                   */
92      0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
95    1,                             /* Char                                   */    1,                             /* Char                                   */
96    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 120  static uschar coptable[] = { Line 130  static uschar coptable[] = {
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131    0,                             /* RREF                                   */    0,                             /* RREF                                   */
132    0,                             /* DEF                                    */    0,                             /* DEF                                    */
133    0, 0                           /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134      0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135      0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static const uschar toptable1[] = {
142    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
146    0                               /* OP_ANY */    0, 0                            /* OP_ANY, OP_ALLANY */
147  };  };
148    
149  static uschar toptable2[] = {  static const uschar toptable2[] = {
150    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
154    1                               /* OP_ANY */    1, 1                            /* OP_ANY, OP_ALLANY */
155  };  };
156    
157    
# Line 211  Arguments: Line 223  Arguments:
223    rlevel            function call recursion level    rlevel            function call recursion level
224    recursing         regex recursive call level    recursing         regex recursive call level
225    
226  Returns:            > 0 =>  Returns:            > 0 => number of match offset pairs placed in offsets
227                      = 0 =>                      = 0 => offsets overflowed; longest matches are present
228                       -1 => failed to match                       -1 => failed to match
229                     < -1 => some kind of unexpected problem                     < -1 => some kind of unexpected problem
230    
# Line 442  for (;;) Line 454  for (;;)
454    int i, j;    int i, j;
455    int clen, dlen;    int clen, dlen;
456    unsigned int c, d;    unsigned int c, d;
457      int forced_fail = 0;
458      int reached_end = 0;
459    
460    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
461    new state list. */    new state list. */
# Line 499  for (;;) Line 513  for (;;)
513      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
514      const uschar *code;      const uschar *code;
515      int state_offset = current_state->offset;      int state_offset = current_state->offset;
516      int count, codevalue;      int count, codevalue, rrc;
 #ifdef SUPPORT_UCP  
     int chartype, script;  
 #endif  
517    
518  #ifdef DEBUG  #ifdef DEBUG
519      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 557  for (;;) Line 568  for (;;)
568      permitted.      permitted.
569    
570      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
571      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
572      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
573      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
574      opcodes. */      */
575    
576      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
577        {        {
# Line 578  for (;;) Line 589  for (;;)
589            case OP_PROP: codevalue += OP_PROP_EXTRA; break;            case OP_PROP: codevalue += OP_PROP_EXTRA; break;
590            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
591            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
592              case OP_NOT_HSPACE:
593              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
594              case OP_NOT_VSPACE:
595              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
596            default: break;            default: break;
597            }            }
598          }          }
# Line 611  for (;;) Line 626  for (;;)
626            ADD_ACTIVE(state_offset - GET(code, 1), 0);            ADD_ACTIVE(state_offset - GET(code, 1), 0);
627            }            }
628          }          }
629        else if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)        else
630          {          {
631          if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;          reached_end++;    /* Count branches that reach the end */
632            else if (match_count > 0 && ++match_count * 2 >= offsetcount)          if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)
633              match_count = 0;            {
634          count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
635          if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));              else if (match_count > 0 && ++match_count * 2 >= offsetcount)
636          if (offsetcount >= 2)                match_count = 0;
637            {            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
638            offsets[0] = current_subject - start_subject;            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
639            offsets[1] = ptr - start_subject;            if (offsetcount >= 2)
640            DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              {
641              offsets[1] - offsets[0], current_subject));              offsets[0] = current_subject - start_subject;
642            }              offsets[1] = ptr - start_subject;
643          if ((md->moptions & PCRE_DFA_SHORTEST) != 0)              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
644            {                offsets[1] - offsets[0], current_subject));
645            DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"              }
646              "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
647              match_count, rlevel*2-2, SP));              {
648            return match_count;              DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
649            }                "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
650                  match_count, rlevel*2-2, SP));
651                return match_count;
652                }
653              }
654          }          }
655        break;        break;
656    
# Line 678  for (;;) Line 697  for (;;)
697        break;        break;
698    
699        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
700          case OP_SKIPZERO:
701          code += 1 + GET(code, 2);
702          while (*code == OP_ALT) code += GET(code, 1);
703          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
704          break;
705    
706          /*-----------------------------------------------------------------*/
707        case OP_CIRC:        case OP_CIRC:
708        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
709            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
# Line 716  for (;;) Line 742  for (;;)
742    
743        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
744        case OP_ANY:        case OP_ANY:
745        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))        if (clen > 0 && !IS_NEWLINE(ptr))
746            { ADD_NEW(state_offset + 1, 0); }
747          break;
748    
749          /*-----------------------------------------------------------------*/
750          case OP_ALLANY:
751          if (clen > 0)
752          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
753        break;        break;
754    
# Line 731  for (;;) Line 763  for (;;)
763        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
764          {          {
765          if (clen == 0 ||          if (clen == 0 ||
766              (IS_NEWLINE(ptr) &&              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
767                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
768              ))              ))
769            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 776  for (;;) Line 808  for (;;)
808            }            }
809          else left_word = 0;          else left_word = 0;
810    
811          if (clen > 0) right_word = c < 256 && (ctypes[c] & ctype_word) != 0;          if (clen > 0)
812            else right_word = 0;            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
813            else              /* This is a fudge to ensure that if this is the */
814              {               /* last item in the pattern, we don't count it as */
815              reached_end--;  /* reached, thus disabling a partial match. */
816              right_word = 0;
817              }
818    
819          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
820            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 796  for (;;) Line 833  for (;;)
833        if (clen > 0)        if (clen > 0)
834          {          {
835          BOOL OK;          BOOL OK;
836          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
837          switch(code[1])          switch(code[1])
838            {            {
839            case PT_ANY:            case PT_ANY:
# Line 804  for (;;) Line 841  for (;;)
841            break;            break;
842    
843            case PT_LAMP:            case PT_LAMP:
844            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
845            break;            break;
846    
847            case PT_GC:            case PT_GC:
848            OK = category == code[2];            OK = _pcre_ucp_gentype[prop->chartype] == code[2];
849            break;            break;
850    
851            case PT_PC:            case PT_PC:
852            OK = chartype == code[2];            OK = prop->chartype == code[2];
853            break;            break;
854    
855            case PT_SC:            case PT_SC:
856            OK = script == code[2];            OK = prop->script == code[2];
857            break;            break;
858    
859            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 836  for (;;) Line 873  for (;;)
873  /* ========================================================================== */  /* ========================================================================== */
874        /* These opcodes likewise inspect the subject character, but have an        /* These opcodes likewise inspect the subject character, but have an
875        argument that is not a data character. It is one of these opcodes:        argument that is not a data character. It is one of these opcodes:
876        OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,        OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
877        OP_NOT_WORDCHAR. The value is loaded into d. */        OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
878    
879        case OP_TYPEPLUS:        case OP_TYPEPLUS:
880        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
# Line 848  for (;;) Line 885  for (;;)
885          {          {
886          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
887              (c < 256 &&              (c < 256 &&
888                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
889                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
890            {            {
891            if (count > 0 && codevalue == OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_TYPEPOSPLUS)
# Line 874  for (;;) Line 908  for (;;)
908          {          {
909          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
910              (c < 256 &&              (c < 256 &&
911                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
912                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
913            {            {
914            if (codevalue == OP_TYPEPOSQUERY)            if (codevalue == OP_TYPEPOSQUERY)
# Line 899  for (;;) Line 930  for (;;)
930          {          {
931          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
932              (c < 256 &&              (c < 256 &&
933                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
934                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
935            {            {
936            if (codevalue == OP_TYPEPOSSTAR)            if (codevalue == OP_TYPEPOSSTAR)
# Line 922  for (;;) Line 950  for (;;)
950          {          {
951          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
952              (c < 256 &&              (c < 256 &&
953                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
954                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
955            {            {
956            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
# Line 946  for (;;) Line 971  for (;;)
971          {          {
972          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
973              (c < 256 &&              (c < 256 &&
974                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                !IS_NEWLINE(ptr)  
               ) &&  
975                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
976            {            {
977            if (codevalue == OP_TYPEPOSUPTO)            if (codevalue == OP_TYPEPOSUPTO)
# Line 980  for (;;) Line 1002  for (;;)
1002        if (clen > 0)        if (clen > 0)
1003          {          {
1004          BOOL OK;          BOOL OK;
1005          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1006          switch(code[2])          switch(code[2])
1007            {            {
1008            case PT_ANY:            case PT_ANY:
# Line 988  for (;;) Line 1010  for (;;)
1010            break;            break;
1011    
1012            case PT_LAMP:            case PT_LAMP:
1013            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1014            break;            break;
1015    
1016            case PT_GC:            case PT_GC:
1017            OK = category == code[3];            OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1018            break;            break;
1019    
1020            case PT_PC:            case PT_PC:
1021            OK = chartype == code[3];            OK = prop->chartype == code[3];
1022            break;            break;
1023    
1024            case PT_SC:            case PT_SC:
1025            OK = script == code[3];            OK = prop->script == code[3];
1026            break;            break;
1027    
1028            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1029  for (;;) Line 1051  for (;;)
1051        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1052        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1053        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1054        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1055          {          {
1056          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1057          int ncount = 0;          int ncount = 0;
# Line 1043  for (;;) Line 1065  for (;;)
1065            int nd;            int nd;
1066            int ndlen = 1;            int ndlen = 1;
1067            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1068            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1069            ncount++;            ncount++;
1070            nptr += ndlen;            nptr += ndlen;
1071            }            }
# Line 1064  for (;;) Line 1086  for (;;)
1086          int ncount = 0;          int ncount = 0;
1087          switch (c)          switch (c)
1088            {            {
1089              case 0x000b:
1090              case 0x000c:
1091              case 0x0085:
1092              case 0x2028:
1093              case 0x2029:
1094              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1095              goto ANYNL01;
1096    
1097            case 0x000d:            case 0x000d:
1098            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1099            /* Fall through */            /* Fall through */
1100    
1101              ANYNL01:
1102              case 0x000a:
1103              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1104                {
1105                active_count--;           /* Remove non-match possibility */
1106                next_active_state--;
1107                }
1108              count++;
1109              ADD_NEW_DATA(-state_offset, count, ncount);
1110              break;
1111    
1112              default:
1113              break;
1114              }
1115            }
1116          break;
1117    
1118          /*-----------------------------------------------------------------*/
1119          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1120          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1121          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1122          count = current_state->count;  /* Already matched */
1123          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1124          if (clen > 0)
1125            {
1126            BOOL OK;
1127            switch (c)
1128              {
1129            case 0x000a:            case 0x000a:
1130            case 0x000b:            case 0x000b:
1131            case 0x000c:            case 0x000c:
1132              case 0x000d:
1133            case 0x0085:            case 0x0085:
1134            case 0x2028:            case 0x2028:
1135            case 0x2029:            case 0x2029:
1136            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            OK = TRUE;
1137              break;
1138    
1139              default:
1140              OK = FALSE;
1141              break;
1142              }
1143    
1144            if (OK == (d == OP_VSPACE))
1145              {
1146              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1147              {              {
1148              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1149              next_active_state--;              next_active_state--;
1150              }              }
1151            count++;            count++;
1152            ADD_NEW_DATA(-state_offset, count, ncount);            ADD_NEW_DATA(-state_offset, count, 0);
1153              }
1154            }
1155          break;
1156    
1157          /*-----------------------------------------------------------------*/
1158          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1159          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1160          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1161          count = current_state->count;  /* Already matched */
1162          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1163          if (clen > 0)
1164            {
1165            BOOL OK;
1166            switch (c)
1167              {
1168              case 0x09:      /* HT */
1169              case 0x20:      /* SPACE */
1170              case 0xa0:      /* NBSP */
1171              case 0x1680:    /* OGHAM SPACE MARK */
1172              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1173              case 0x2000:    /* EN QUAD */
1174              case 0x2001:    /* EM QUAD */
1175              case 0x2002:    /* EN SPACE */
1176              case 0x2003:    /* EM SPACE */
1177              case 0x2004:    /* THREE-PER-EM SPACE */
1178              case 0x2005:    /* FOUR-PER-EM SPACE */
1179              case 0x2006:    /* SIX-PER-EM SPACE */
1180              case 0x2007:    /* FIGURE SPACE */
1181              case 0x2008:    /* PUNCTUATION SPACE */
1182              case 0x2009:    /* THIN SPACE */
1183              case 0x200A:    /* HAIR SPACE */
1184              case 0x202f:    /* NARROW NO-BREAK SPACE */
1185              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1186              case 0x3000:    /* IDEOGRAPHIC SPACE */
1187              OK = TRUE;
1188            break;            break;
1189    
1190            default:            default:
1191              OK = FALSE;
1192            break;            break;
1193            }            }
1194    
1195            if (OK == (d == OP_HSPACE))
1196              {
1197              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1198                {
1199                active_count--;           /* Remove non-match possibility */
1200                next_active_state--;
1201                }
1202              count++;
1203              ADD_NEW_DATA(-state_offset, count, 0);
1204              }
1205          }          }
1206        break;        break;
1207    
# Line 1106  for (;;) Line 1224  for (;;)
1224        if (clen > 0)        if (clen > 0)
1225          {          {
1226          BOOL OK;          BOOL OK;
1227          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1228          switch(code[2])          switch(code[2])
1229            {            {
1230            case PT_ANY:            case PT_ANY:
# Line 1114  for (;;) Line 1232  for (;;)
1232            break;            break;
1233    
1234            case PT_LAMP:            case PT_LAMP:
1235            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1236            break;            break;
1237    
1238            case PT_GC:            case PT_GC:
1239            OK = category == code[3];            OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1240            break;            break;
1241    
1242            case PT_PC:            case PT_PC:
1243            OK = chartype == code[3];            OK = prop->chartype == code[3];
1244            break;            break;
1245    
1246            case PT_SC:            case PT_SC:
1247            OK = script == code[3];            OK = prop->script == code[3];
1248            break;            break;
1249    
1250            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1164  for (;;) Line 1282  for (;;)
1282        QS2:        QS2:
1283    
1284        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1285        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1286          {          {
1287          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1288          int ncount = 0;          int ncount = 0;
# Line 1179  for (;;) Line 1297  for (;;)
1297            int nd;            int nd;
1298            int ndlen = 1;            int ndlen = 1;
1299            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1300            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1301            ncount++;            ncount++;
1302            nptr += ndlen;            nptr += ndlen;
1303            }            }
# Line 1207  for (;;) Line 1325  for (;;)
1325          int ncount = 0;          int ncount = 0;
1326          switch (c)          switch (c)
1327            {            {
1328              case 0x000b:
1329              case 0x000c:
1330              case 0x0085:
1331              case 0x2028:
1332              case 0x2029:
1333              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1334              goto ANYNL02;
1335    
1336            case 0x000d:            case 0x000d:
1337            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1338            /* Fall through */            /* Fall through */
1339    
1340              ANYNL02:
1341              case 0x000a:
1342              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1343                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1344                {
1345                active_count--;           /* Remove non-match possibility */
1346                next_active_state--;
1347                }
1348              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1349              break;
1350    
1351              default:
1352              break;
1353              }
1354            }
1355          break;
1356    
1357          /*-----------------------------------------------------------------*/
1358          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1359          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1360          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1361          count = 2;
1362          goto QS4;
1363    
1364          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1365          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1366          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1367          count = 0;
1368    
1369          QS4:
1370          ADD_ACTIVE(state_offset + 2, 0);
1371          if (clen > 0)
1372            {
1373            BOOL OK;
1374            switch (c)
1375              {
1376            case 0x000a:            case 0x000a:
1377            case 0x000b:            case 0x000b:
1378            case 0x000c:            case 0x000c:
1379              case 0x000d:
1380            case 0x0085:            case 0x0085:
1381            case 0x2028:            case 0x2028:
1382            case 0x2029:            case 0x2029:
1383            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            OK = TRUE;
1384                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)            break;
1385    
1386              default:
1387              OK = FALSE;
1388              break;
1389              }
1390            if (OK == (d == OP_VSPACE))
1391              {
1392              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1393                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1394              {              {
1395              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1396              next_active_state--;              next_active_state--;
1397              }              }
1398            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + count), 0, 0);
1399              }
1400            }
1401          break;
1402    
1403          /*-----------------------------------------------------------------*/
1404          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1405          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1406          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1407          count = 2;
1408          goto QS5;
1409    
1410          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1411          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1412          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1413          count = 0;
1414    
1415          QS5:
1416          ADD_ACTIVE(state_offset + 2, 0);
1417          if (clen > 0)
1418            {
1419            BOOL OK;
1420            switch (c)
1421              {
1422              case 0x09:      /* HT */
1423              case 0x20:      /* SPACE */
1424              case 0xa0:      /* NBSP */
1425              case 0x1680:    /* OGHAM SPACE MARK */
1426              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1427              case 0x2000:    /* EN QUAD */
1428              case 0x2001:    /* EM QUAD */
1429              case 0x2002:    /* EN SPACE */
1430              case 0x2003:    /* EM SPACE */
1431              case 0x2004:    /* THREE-PER-EM SPACE */
1432              case 0x2005:    /* FOUR-PER-EM SPACE */
1433              case 0x2006:    /* SIX-PER-EM SPACE */
1434              case 0x2007:    /* FIGURE SPACE */
1435              case 0x2008:    /* PUNCTUATION SPACE */
1436              case 0x2009:    /* THIN SPACE */
1437              case 0x200A:    /* HAIR SPACE */
1438              case 0x202f:    /* NARROW NO-BREAK SPACE */
1439              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1440              case 0x3000:    /* IDEOGRAPHIC SPACE */
1441              OK = TRUE;
1442            break;            break;
1443    
1444            default:            default:
1445              OK = FALSE;
1446            break;            break;
1447            }            }
1448    
1449            if (OK == (d == OP_HSPACE))
1450              {
1451              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1452                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1453                {
1454                active_count--;           /* Remove non-match possibility */
1455                next_active_state--;
1456                }
1457              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1458              }
1459          }          }
1460        break;        break;
1461    
# Line 1242  for (;;) Line 1471  for (;;)
1471        if (clen > 0)        if (clen > 0)
1472          {          {
1473          BOOL OK;          BOOL OK;
1474          int category = _pcre_ucp_findprop(c, &chartype, &script);          const ucd_record * prop = GET_UCD(c);
1475          switch(code[4])          switch(code[4])
1476            {            {
1477            case PT_ANY:            case PT_ANY:
# Line 1250  for (;;) Line 1479  for (;;)
1479            break;            break;
1480    
1481            case PT_LAMP:            case PT_LAMP:
1482            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1483            break;            break;
1484    
1485            case PT_GC:            case PT_GC:
1486            OK = category == code[5];            OK = _pcre_ucp_gentype[prop->chartype] == code[5];
1487            break;            break;
1488    
1489            case PT_PC:            case PT_PC:
1490            OK = chartype == code[5];            OK = prop->chartype == code[5];
1491            break;            break;
1492    
1493            case PT_SC:            case PT_SC:
1494            OK = script == code[5];            OK = prop->script == code[5];
1495            break;            break;
1496    
1497            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1295  for (;;) Line 1524  for (;;)
1524        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1525          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1526        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1527        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1528          {          {
1529          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1530          int ncount = 0;          int ncount = 0;
# Line 1309  for (;;) Line 1538  for (;;)
1538            int nd;            int nd;
1539            int ndlen = 1;            int ndlen = 1;
1540            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1541            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1542            ncount++;            ncount++;
1543            nptr += ndlen;            nptr += ndlen;
1544            }            }
# Line 1334  for (;;) Line 1563  for (;;)
1563          int ncount = 0;          int ncount = 0;
1564          switch (c)          switch (c)
1565            {            {
1566              case 0x000b:
1567              case 0x000c:
1568              case 0x0085:
1569              case 0x2028:
1570              case 0x2029:
1571              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1572              goto ANYNL03;
1573    
1574            case 0x000d:            case 0x000d:
1575            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1576            /* Fall through */            /* Fall through */
1577    
1578              ANYNL03:
1579              case 0x000a:
1580              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1581                {
1582                active_count--;           /* Remove non-match possibility */
1583                next_active_state--;
1584                }
1585              if (++count >= GET2(code, 1))
1586                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1587              else
1588                { ADD_NEW_DATA(-state_offset, count, ncount); }
1589              break;
1590    
1591              default:
1592              break;
1593              }
1594            }
1595          break;
1596    
1597          /*-----------------------------------------------------------------*/
1598          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1599          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1600          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1601          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1602          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1603            { ADD_ACTIVE(state_offset + 4, 0); }
1604          count = current_state->count;  /* Number already matched */
1605          if (clen > 0)
1606            {
1607            BOOL OK;
1608            switch (c)
1609              {
1610            case 0x000a:            case 0x000a:
1611            case 0x000b:            case 0x000b:
1612            case 0x000c:            case 0x000c:
1613              case 0x000d:
1614            case 0x0085:            case 0x0085:
1615            case 0x2028:            case 0x2028:
1616            case 0x2029:            case 0x2029:
1617            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            OK = TRUE;
1618              break;
1619    
1620              default:
1621              OK = FALSE;
1622              }
1623    
1624            if (OK == (d == OP_VSPACE))
1625              {
1626              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1627              {              {
1628              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1629              next_active_state--;              next_active_state--;
1630              }              }
1631            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1632              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1633            else            else
1634              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, 0); }
1635              }
1636            }
1637          break;
1638    
1639          /*-----------------------------------------------------------------*/
1640          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1641          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1642          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1643          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1644          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1645            { ADD_ACTIVE(state_offset + 4, 0); }
1646          count = current_state->count;  /* Number already matched */
1647          if (clen > 0)
1648            {
1649            BOOL OK;
1650            switch (c)
1651              {
1652              case 0x09:      /* HT */
1653              case 0x20:      /* SPACE */
1654              case 0xa0:      /* NBSP */
1655              case 0x1680:    /* OGHAM SPACE MARK */
1656              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1657              case 0x2000:    /* EN QUAD */
1658              case 0x2001:    /* EM QUAD */
1659              case 0x2002:    /* EN SPACE */
1660              case 0x2003:    /* EM SPACE */
1661              case 0x2004:    /* THREE-PER-EM SPACE */
1662              case 0x2005:    /* FOUR-PER-EM SPACE */
1663              case 0x2006:    /* SIX-PER-EM SPACE */
1664              case 0x2007:    /* FIGURE SPACE */
1665              case 0x2008:    /* PUNCTUATION SPACE */
1666              case 0x2009:    /* THIN SPACE */
1667              case 0x200A:    /* HAIR SPACE */
1668              case 0x202f:    /* NARROW NO-BREAK SPACE */
1669              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1670              case 0x3000:    /* IDEOGRAPHIC SPACE */
1671              OK = TRUE;
1672            break;            break;
1673    
1674            default:            default:
1675              OK = FALSE;
1676            break;            break;
1677            }            }
1678    
1679            if (OK == (d == OP_HSPACE))
1680              {
1681              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1682                {
1683                active_count--;           /* Remove non-match possibility */
1684                next_active_state--;
1685                }
1686              if (++count >= GET2(code, 1))
1687                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1688              else
1689                { ADD_NEW_DATA(-state_offset, count, 0); }
1690              }
1691          }          }
1692        break;        break;
1693    
# Line 1386  for (;;) Line 1718  for (;;)
1718            other case of the character. */            other case of the character. */
1719    
1720  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1721            othercase = _pcre_ucp_othercase(c);            othercase = UCD_OTHERCASE(c);
1722  #else  #else
1723            othercase = NOTACHAR;            othercase = NOTACHAR;
1724  #endif  #endif
# Line 1411  for (;;) Line 1743  for (;;)
1743        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
1744    
1745        case OP_EXTUNI:        case OP_EXTUNI:
1746        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1747          {          {
1748          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1749          int ncount = 0;          int ncount = 0;
# Line 1419  for (;;) Line 1751  for (;;)
1751            {            {
1752            int nclen = 1;            int nclen = 1;
1753            GETCHARLEN(c, nptr, nclen);            GETCHARLEN(c, nptr, nclen);
1754            if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;            if (UCD_CATEGORY(c) != ucp_M) break;
1755            ncount++;            ncount++;
1756            nptr += nclen;            nptr += nclen;
1757            }            }
# Line 1436  for (;;) Line 1768  for (;;)
1768        case OP_ANYNL:        case OP_ANYNL:
1769        if (clen > 0) switch(c)        if (clen > 0) switch(c)
1770          {          {
         case 0x000a:  
1771          case 0x000b:          case 0x000b:
1772          case 0x000c:          case 0x000c:
1773          case 0x0085:          case 0x0085:
1774          case 0x2028:          case 0x2028:
1775          case 0x2029:          case 0x2029:
1776            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1777    
1778            case 0x000a:
1779          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1780          break;          break;
1781    
1782          case 0x000d:          case 0x000d:
1783          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1784            {            {
# Line 1458  for (;;) Line 1793  for (;;)
1793        break;        break;
1794    
1795        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1796          case OP_NOT_VSPACE:
1797          if (clen > 0) switch(c)
1798            {
1799            case 0x000a:
1800            case 0x000b:
1801            case 0x000c:
1802            case 0x000d:
1803            case 0x0085:
1804            case 0x2028:
1805            case 0x2029:
1806            break;
1807    
1808            default:
1809            ADD_NEW(state_offset + 1, 0);
1810            break;
1811            }
1812          break;
1813    
1814          /*-----------------------------------------------------------------*/
1815          case OP_VSPACE:
1816          if (clen > 0) switch(c)
1817            {
1818            case 0x000a:
1819            case 0x000b:
1820            case 0x000c:
1821            case 0x000d:
1822            case 0x0085:
1823            case 0x2028:
1824            case 0x2029:
1825            ADD_NEW(state_offset + 1, 0);
1826            break;
1827    
1828            default: break;
1829            }
1830          break;
1831    
1832          /*-----------------------------------------------------------------*/
1833          case OP_NOT_HSPACE:
1834          if (clen > 0) switch(c)
1835            {
1836            case 0x09:      /* HT */
1837            case 0x20:      /* SPACE */
1838            case 0xa0:      /* NBSP */
1839            case 0x1680:    /* OGHAM SPACE MARK */
1840            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1841            case 0x2000:    /* EN QUAD */
1842            case 0x2001:    /* EM QUAD */
1843            case 0x2002:    /* EN SPACE */
1844            case 0x2003:    /* EM SPACE */
1845            case 0x2004:    /* THREE-PER-EM SPACE */
1846            case 0x2005:    /* FOUR-PER-EM SPACE */
1847            case 0x2006:    /* SIX-PER-EM SPACE */
1848            case 0x2007:    /* FIGURE SPACE */
1849            case 0x2008:    /* PUNCTUATION SPACE */
1850            case 0x2009:    /* THIN SPACE */
1851            case 0x200A:    /* HAIR SPACE */
1852            case 0x202f:    /* NARROW NO-BREAK SPACE */
1853            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1854            case 0x3000:    /* IDEOGRAPHIC SPACE */
1855            break;
1856    
1857            default:
1858            ADD_NEW(state_offset + 1, 0);
1859            break;
1860            }
1861          break;
1862    
1863          /*-----------------------------------------------------------------*/
1864          case OP_HSPACE:
1865          if (clen > 0) switch(c)
1866            {
1867            case 0x09:      /* HT */
1868            case 0x20:      /* SPACE */
1869            case 0xa0:      /* NBSP */
1870            case 0x1680:    /* OGHAM SPACE MARK */
1871            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1872            case 0x2000:    /* EN QUAD */
1873            case 0x2001:    /* EM QUAD */
1874            case 0x2002:    /* EN SPACE */
1875            case 0x2003:    /* EM SPACE */
1876            case 0x2004:    /* THREE-PER-EM SPACE */
1877            case 0x2005:    /* FOUR-PER-EM SPACE */
1878            case 0x2006:    /* SIX-PER-EM SPACE */
1879            case 0x2007:    /* FIGURE SPACE */
1880            case 0x2008:    /* PUNCTUATION SPACE */
1881            case 0x2009:    /* THIN SPACE */
1882            case 0x200A:    /* HAIR SPACE */
1883            case 0x202f:    /* NARROW NO-BREAK SPACE */
1884            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1885            case 0x3000:    /* IDEOGRAPHIC SPACE */
1886            ADD_NEW(state_offset + 1, 0);
1887            break;
1888            }
1889          break;
1890    
1891          /*-----------------------------------------------------------------*/
1892        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1893        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1894        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1488  for (;;) Line 1919  for (;;)
1919            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1920              {              {
1921  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1922              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1923  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1924              }              }
1925            else            else
# Line 1526  for (;;) Line 1957  for (;;)
1957            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1958              {              {
1959  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1960              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1961  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1962              }              }
1963            else            else
# Line 1562  for (;;) Line 1993  for (;;)
1993            if (utf8 && d >= 128)            if (utf8 && d >= 128)
1994              {              {
1995  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1996              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
1997  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1998              }              }
1999            else            else
# Line 1594  for (;;) Line 2025  for (;;)
2025            if (utf8 && d >= 128)            if (utf8 && d >= 128)
2026              {              {
2027  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2028              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2029  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2030              }              }
2031            else            else
# Line 1629  for (;;) Line 2060  for (;;)
2060            if (utf8 && d >= 128)            if (utf8 && d >= 128)
2061              {              {
2062  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2063              otherd = _pcre_ucp_othercase(d);              otherd = UCD_OTHERCASE(d);
2064  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2065              }              }
2066            else            else
# Line 1737  for (;;) Line 2168  for (;;)
2168    
2169  /* ========================================================================== */  /* ========================================================================== */
2170        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2171        to use recursion in order to handle them. */        to use recursion in order to handle them. The "always failing" assertion
2172          (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2173          though the other "backtracking verbs" are not supported. */
2174    
2175          case OP_FAIL:
2176          forced_fail++;    /* Count FAILs for multiple states */
2177          break;
2178    
2179        case OP_ASSERT:        case OP_ASSERT:
2180        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
# Line 1775  for (;;) Line 2212  for (;;)
2212          {          {
2213          int local_offsets[1000];          int local_offsets[1000];
2214          int local_workspace[1000];          int local_workspace[1000];
2215          int condcode = code[LINK_SIZE+1];          int codelink = GET(code, 1);
2216            int condcode;
2217    
2218            /* Because of the way auto-callout works during compile, a callout item
2219            is inserted between OP_COND and an assertion condition. This does not
2220            happen for the other conditions. */
2221    
2222            if (code[LINK_SIZE+1] == OP_CALLOUT)
2223              {
2224              rrc = 0;
2225              if (pcre_callout != NULL)
2226                {
2227                pcre_callout_block cb;
2228                cb.version          = 1;   /* Version 1 of the callout block */
2229                cb.callout_number   = code[LINK_SIZE+2];
2230                cb.offset_vector    = offsets;
2231                cb.subject          = (PCRE_SPTR)start_subject;
2232                cb.subject_length   = end_subject - start_subject;
2233                cb.start_match      = current_subject - start_subject;
2234                cb.current_position = ptr - start_subject;
2235                cb.pattern_position = GET(code, LINK_SIZE + 3);
2236                cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2237                cb.capture_top      = 1;
2238                cb.capture_last     = -1;
2239                cb.callout_data     = md->callout_data;
2240                if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2241                }
2242              if (rrc > 0) break;                      /* Fail this thread */
2243              code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */
2244              }
2245    
2246            condcode = code[LINK_SIZE+1];
2247    
2248          /* Back reference conditions are not supported */          /* Back reference conditions are not supported */
2249    
# Line 1784  for (;;) Line 2252  for (;;)
2252          /* The DEFINE condition is always false */          /* The DEFINE condition is always false */
2253    
2254          if (condcode == OP_DEF)          if (condcode == OP_DEF)
2255            {            { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
           ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);  
           }  
2256    
2257          /* The only supported version of OP_RREF is for the value RREF_ANY,          /* The only supported version of OP_RREF is for the value RREF_ANY,
2258          which means "test if in any recursion". We can't test for specifically          which means "test if in any recursion". We can't test for specifically
# Line 1796  for (;;) Line 2262  for (;;)
2262            {            {
2263            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2264            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2265            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0)
2266              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2267              else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2268            }            }
2269    
2270          /* Otherwise, the condition is an assertion */          /* Otherwise, the condition is an assertion */
# Line 1827  for (;;) Line 2294  for (;;)
2294                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2295              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
2296            else            else
2297              { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2298            }            }
2299          }          }
2300        break;        break;
# Line 1979  for (;;) Line 2446  for (;;)
2446        /* Handle callouts */        /* Handle callouts */
2447    
2448        case OP_CALLOUT:        case OP_CALLOUT:
2449          rrc = 0;
2450        if (pcre_callout != NULL)        if (pcre_callout != NULL)
2451          {          {
         int rrc;  
2452          pcre_callout_block cb;          pcre_callout_block cb;
2453          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2454          cb.callout_number   = code[1];          cb.callout_number   = code[1];
# Line 1996  for (;;) Line 2463  for (;;)
2463          cb.capture_last     = -1;          cb.capture_last     = -1;
2464          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2465          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
         if (rrc == 0) { ADD_ACTIVE(state_offset + 2 + 2*LINK_SIZE, 0); }  
2466          }          }
2467          if (rrc == 0)
2468            { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
2469        break;        break;
2470    
2471    
# Line 2013  for (;;) Line 2481  for (;;)
2481    /* We have finished the processing at the current subject character. If no    /* We have finished the processing at the current subject character. If no
2482    new states have been set for the next character, we have found all the    new states have been set for the next character, we have found all the
2483    matches that we are going to find. If we are at the top level and partial    matches that we are going to find. If we are at the top level and partial
2484    matching has been requested, check for appropriate conditions. */    matching has been requested, check for appropriate conditions. The "forced_
2485      fail" variable counts the number of (*F) encountered for the character. If it
2486      is equal to the original active_count (saved in workspace[1]) it means that
2487      (*F) was found on every active state. In this case we don't want to give a
2488      partial match. */
2489    
2490    if (new_count <= 0)    if (new_count <= 0)
2491      {      {
2492      if (match_count < 0 &&                     /* No matches found */      if (rlevel == 1 &&                               /* Top level, and */
2493          rlevel == 1 &&                         /* Top level match function */          reached_end != workspace[1] &&               /* Not all reached end */
2494          (md->moptions & PCRE_PARTIAL) != 0 &&  /* Want partial matching */          forced_fail != workspace[1] &&               /* Not all forced fail & */
2495          ptr >= end_subject &&                  /* Reached end of subject */          (                                            /* either... */
2496          ptr > current_subject)                 /* Matched non-empty string */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
2497            ||                                           /* or... */
2498            ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
2499             match_count < 0)                            /* no matches */
2500            ) &&                                         /* And... */
2501            ptr >= end_subject &&                     /* Reached end of subject */
2502            ptr > current_subject)                    /* Matched non-empty string */
2503        {        {
2504        if (offsetcount >= 2)        if (offsetcount >= 2)
2505          {          {
# Line 2080  Returns: > 0 => number of match Line 2558  Returns: > 0 => number of match
2558                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2559  */  */
2560    
2561  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
2562  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2563    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2564    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 2170  md->end_subject = end_subject; Line 2648  md->end_subject = end_subject;
2648  md->moptions = options;  md->moptions = options;
2649  md->poptions = re->options;  md->poptions = re->options;
2650    
2651    /* If the BSR option is not set at match time, copy what was set
2652    at compile time. */
2653    
2654    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2655      {
2656      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2657        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2658    #ifdef BSR_ANYCRLF
2659      else md->moptions |= PCRE_BSR_ANYCRLF;
2660    #endif
2661      }
2662    
2663  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
2664  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2665    
# Line 2177  switch ((((options & PCRE_NEWLINE_BITS) Line 2667  switch ((((options & PCRE_NEWLINE_BITS)
2667           PCRE_NEWLINE_BITS)           PCRE_NEWLINE_BITS)
2668    {    {
2669    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
2670    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
2671    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
2672    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2673         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
2674    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
2675    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;    case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2676    default: return PCRE_ERROR_BADNEWLINE;    default: return PCRE_ERROR_BADNEWLINE;
# Line 2240  if (md->tables == NULL) md->tables = _pc Line 2730  if (md->tables == NULL) md->tables = _pc
2730  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2731    
2732  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2733  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2734  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2735    
2736  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 2251  studied, there may be a bitmap of possib Line 2741  studied, there may be a bitmap of possib
2741    
2742  if (!anchored)  if (!anchored)
2743    {    {
2744    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2745      {      {
2746      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2747      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 2268  if (!anchored) Line 2758  if (!anchored)
2758  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2759  character" set. */  character" set. */
2760    
2761  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2762    {    {
2763    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2764    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 2276  if ((re->options & PCRE_REQCHSET) != 0) Line 2766  if ((re->options & PCRE_REQCHSET) != 0)
2766    }    }
2767    
2768  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
2769  failed match. Unless restarting, optimize by moving to the first match  failed match. If not restarting, perform certain optimizations at the start of
2770  character if possible, when not anchored. Then unless wanting a partial match,  a match. */
 check for a required later character. */  
2771    
2772  for (;;)  for (;;)
2773    {    {
# Line 2288  for (;;) Line 2777  for (;;)
2777      {      {
2778      const uschar *save_end_subject = end_subject;      const uschar *save_end_subject = end_subject;
2779    
2780      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* If firstline is TRUE, the start of the match is constrained to the first
2781      start of the match is constrained to the first line of a multiline string.      line of a multiline string. Implement this by temporarily adjusting
2782      Implement this by temporarily adjusting end_subject so that we stop      end_subject so that we stop scanning at a newline. If the match fails at
2783      scanning at a newline. If the match fails at the newline, later code breaks      the newline, later code breaks this loop. */
     this loop. */  
2784    
2785      if (firstline)      if (firstline)
2786        {        {
2787        const uschar *t = current_subject;        USPTR t = current_subject;
2788    #ifdef SUPPORT_UTF8
2789          if (utf8)
2790            {
2791            while (t < md->end_subject && !IS_NEWLINE(t))
2792              {
2793              t++;
2794              while (t < end_subject && (*t & 0xc0) == 0x80) t++;
2795              }
2796            }
2797          else
2798    #endif
2799        while (t < md->end_subject && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2800        end_subject = t;        end_subject = t;
2801        }        }
2802    
2803      if (first_byte >= 0)      /* There are some optimizations that avoid running the match if a known
2804        starting point is not found, or if a known later character is not present.
2805        However, there is an option that disables these, for testing and for
2806        ensuring that all callouts do actually occur. */
2807    
2808        if ((options & PCRE_NO_START_OPTIMIZE) == 0)
2809        {        {
       if (first_byte_caseless)  
         while (current_subject < end_subject &&  
                lcc[*current_subject] != first_byte)  
           current_subject++;  
       else  
         while (current_subject < end_subject && *current_subject != first_byte)  
           current_subject++;  
       }  
2810    
2811      /* Or to just after a linebreak for a multiline match if possible */        /* Advance to a known first byte. */
2812    
2813      else if (startline)        if (first_byte >= 0)
       {  
       if (current_subject > md->start_subject + start_offset)  
2814          {          {
2815          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))          if (first_byte_caseless)
2816            current_subject++;            while (current_subject < end_subject &&
2817                     lcc[*current_subject] != first_byte)
2818                current_subject++;
2819            else
2820              while (current_subject < end_subject &&
2821                     *current_subject != first_byte)
2822                current_subject++;
2823            }
2824    
2825          /* Or to just after a linebreak for a multiline match if possible */
2826    
2827          /* If we have just passed a CR and the newline option is ANY or        else if (startline)
2828          ANYCRLF, and we are now at a LF, advance the match position by one more          {
2829          character. */          if (current_subject > md->start_subject + start_offset)
2830              {
2831          if (current_subject[-1] == '\r' &&  #ifdef SUPPORT_UTF8
2832               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&            if (utf8)
2833               current_subject < end_subject &&              {
2834               *current_subject == '\n')              while (current_subject < end_subject &&
2835            current_subject++;                     !WAS_NEWLINE(current_subject))
2836                  {
2837                  current_subject++;
2838                  while(current_subject < end_subject &&
2839                        (*current_subject & 0xc0) == 0x80)
2840                    current_subject++;
2841                  }
2842                }
2843              else
2844    #endif
2845              while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2846                current_subject++;
2847    
2848              /* If we have just passed a CR and the newline option is ANY or
2849              ANYCRLF, and we are now at a LF, advance the match position by one
2850              more character. */
2851    
2852              if (current_subject[-1] == CHAR_CR &&
2853                   (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2854                   current_subject < end_subject &&
2855                   *current_subject == CHAR_NL)
2856                current_subject++;
2857              }
2858          }          }
       }  
2859    
2860      /* Or to a non-unique first char after study */        /* Or to a non-unique first char after study */
2861    
2862      else if (start_bits != NULL)        else if (start_bits != NULL)
       {  
       while (current_subject < end_subject)  
2863          {          {
2864          register unsigned int c = *current_subject;          while (current_subject < end_subject)
2865          if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;            {
2866            else break;            register unsigned int c = *current_subject;
2867              if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
2868                else break;
2869              }
2870          }          }
2871        }        }
2872    
# Line 2363  for (;;) Line 2888  for (;;)
2888    showed up when somebody was matching /^C/ on a 32-megabyte string... so we    showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2889    don't do this when the string is sufficiently long.    don't do this when the string is sufficiently long.
2890    
2891    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested, and can
2892    */    also be explicitly deactivated. Furthermore, we have to disable when
2893      restarting after a partial match, because the required character may have
2894      already been matched. */
2895    
2896    if (req_byte >= 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
2897          req_byte >= 0 &&
2898        end_subject - current_subject < REQ_BYTE_MAX &&        end_subject - current_subject < REQ_BYTE_MAX &&
2899        (options & PCRE_PARTIAL) == 0)        (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_RESTART)) == 0)
2900      {      {
2901      register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);      register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
2902    
# Line 2438  for (;;) Line 2966  for (;;)
2966      }      }
2967    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2968    
2969    /* If we have just passed a CR and the newline option is CRLF or ANY or    /* If we have just passed a CR and we are now at a LF, and the pattern does
2970    ANYCRLF, and we are now at a LF, advance the match position by one more    not contain any explicit matches for \r or \n, and the newline option is CRLF
2971    character. */    or ANY or ANYCRLF, advance the match position by one more character. */
2972    
2973    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == CHAR_CR &&
2974         (md->nltype == NLTYPE_ANY ||        current_subject < end_subject &&
2975          md->nltype == NLTYPE_ANYCRLF ||        *current_subject == CHAR_NL &&
2976          md->nllen == 2) &&        (re->flags & PCRE_HASCRORLF) == 0 &&
2977         current_subject < end_subject &&          (md->nltype == NLTYPE_ANY ||
2978         *current_subject == '\n')           md->nltype == NLTYPE_ANYCRLF ||
2979             md->nllen == 2))
2980      current_subject++;      current_subject++;
2981    
2982    }   /* "Bumpalong" loop */    }   /* "Bumpalong" loop */

Legend:
Removed from v.151  
changed lines
  Added in v.428

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12