/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 459 by ph10, Sun Oct 4 09:21:39 2009 UTC
# Line 3  Line 3 
3  *************************************************/  *************************************************/
4    
5  /* PCRE is a library of functions to support regular expressions whose syntax  /* PCRE is a library of functions to support regular expressions whose syntax
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language (but see
7    below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 40  POSSIBILITY OF SUCH DAMAGE.
40    
41    
42  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
43  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
44  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
45    applications. */
46    
47    
48    /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
49    the performance of his patterns greatly. I could not use it as it stood, as it
50    was not thread safe, and made assumptions about pattern sizes. Also, it caused
51    test 7 to loop, and test 9 to crash with a segfault.
52    
53    The issue is the check for duplicate states, which is done by a simple linear
54    search up the state list. (Grep for "duplicate" below to find the code.) For
55    many patterns, there will never be many states active at one time, so a simple
56    linear search is fine. In patterns that have many active states, it might be a
57    bottleneck. The suggested code used an indexing scheme to remember which states
58    had previously been used for each character, and avoided the linear search when
59    it knew there was no chance of a duplicate. This was implemented when adding
60    states to the state lists.
61    
62    I wrote some thread-safe, not-limited code to try something similar at the time
63    of checking for duplicates (instead of when adding states), using index vectors
64    on the stack. It did give a 13% improvement with one specially constructed
65    pattern for certain subject strings, but on other strings and on many of the
66    simpler patterns in the test suite it did worse. The major problem, I think,
67    was the extra time to initialize the index. This had to be done for each call
68    of internal_dfa_exec(). (The supplied patch used a static vector, initialized
69    only once - I suspect this was the cause of the problems with the tests.)
70    
71    Overall, I concluded that the gains in some cases did not outweigh the losses
72    in others, so I abandoned this code. */
73    
74    
75    
76    #ifdef HAVE_CONFIG_H
77    #include "config.h"
78    #endif
79    
80    #define NLBLOCK md             /* Block containing newline information */
81    #define PSSTART start_subject  /* Field containing processed string start */
82    #define PSEND   end_subject    /* Field containing processed string end */
83    
84  #include "pcre_internal.h"  #include "pcre_internal.h"
85    
# Line 51  compatible, but it has advantages in cer Line 89  compatible, but it has advantages in cer
89  #define SP "                   "  #define SP "                   "
90    
91    
   
92  /*************************************************  /*************************************************
93  *      Code parameters and static tables         *  *      Code parameters and static tables         *
94  *************************************************/  *************************************************/
95    
96  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
97  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
98  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
99    never stored, so we push them well clear of the normal opcodes. */
100  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  
101  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_PROP_EXTRA       300
102    #define OP_EXTUNI_EXTRA     320
103    #define OP_ANYNL_EXTRA      340
104    #define OP_HSPACE_EXTRA     360
105    #define OP_VSPACE_EXTRA     380
106    
107    
108  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
109  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
110  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
111  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
112  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
113    that follow must also be modified. */
114    
115  static uschar coptable[] = {  static const uschar coptable[] = {
116    0,                             /* End                                    */    0,                             /* End                                    */
117    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
118    0, 0,                          /* Any, Anybyte                           */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
119      0, 0, 0,                       /* Any, AllAny, Anybyte                   */
120    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
121      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
122    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
123    1,                             /* Char                                   */    1,                             /* Char                                   */
124    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 82  static uschar coptable[] = { Line 126  static uschar coptable[] = {
126    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
127    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
128    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
129      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
130    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
131    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
132    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
133      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
134    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
135    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
136    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
137      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
138    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
139    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
140    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 106  static uschar coptable[] = { Line 153  static uschar coptable[] = {
153    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
154    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
155    0,                             /* Reverse                                */    0,                             /* Reverse                                */
156    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
157    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
158    0,                             /* CREF                                   */    0,                             /* CREF                                   */
159      0,                             /* RREF                                   */
160      0,                             /* DEF                                    */
161    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
162    0,                             /* BRANUMBER                              */    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
163    0                              /* BRA                                    */    0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
164  };  };
165    
166  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
167  and \w */  and \w */
168    
169  static uschar toptable1[] = {  static const uschar toptable1[] = {
170    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
171    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
172    ctype_space, ctype_space,    ctype_space, ctype_space,
173    ctype_word,  ctype_word,    ctype_word,  ctype_word,
174    0                               /* OP_ANY */    0, 0                            /* OP_ANY, OP_ALLANY */
175  };  };
176    
177  static uschar toptable2[] = {  static const uschar toptable2[] = {
178    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
179    ctype_digit, 0,    ctype_digit, 0,
180    ctype_space, 0,    ctype_space, 0,
181    ctype_word,  0,    ctype_word,  0,
182    1                               /* OP_ANY */    1, 1                            /* OP_ANY, OP_ALLANY */
183  };  };
184    
185    
# Line 202  Arguments: Line 251  Arguments:
251    rlevel            function call recursion level    rlevel            function call recursion level
252    recursing         regex recursive call level    recursing         regex recursive call level
253    
254  Returns:            > 0 =>  Returns:            > 0 => number of match offset pairs placed in offsets
255                      = 0 =>                      = 0 => offsets overflowed; longest matches are present
256                       -1 => failed to match                       -1 => failed to match
257                     < -1 => some kind of unexpected problem                     < -1 => some kind of unexpected problem
258    
# Line 277  stateblock *next_active_state, *next_new Line 326  stateblock *next_active_state, *next_new
326    
327  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
328  const uschar *ptr;  const uschar *ptr;
329  const uschar *end_code;  const uschar *end_code, *first_op;
330    
331  int active_count, new_count, match_count;  int active_count, new_count, match_count;
332    
# Line 288  const uschar *start_subject = md->start_ Line 337  const uschar *start_subject = md->start_
337  const uschar *end_subject = md->end_subject;  const uschar *end_subject = md->end_subject;
338  const uschar *start_code = md->start_code;  const uschar *start_code = md->start_code;
339    
340    #ifdef SUPPORT_UTF8
341  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
342    #else
343    BOOL utf8 = FALSE;
344    #endif
345    
346  rlevel++;  rlevel++;
347  offsetcount &= (-2);  offsetcount &= (-2);
# Line 311  active_states = (stateblock *)(workspace Line 364  active_states = (stateblock *)(workspace
364  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
365  new_count = 0;  new_count = 0;
366    
367    first_op = this_start_code + 1 + LINK_SIZE +
368      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
369    
370  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
371  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
372  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 320  If the first opcode in the first alterna Line 376  If the first opcode in the first alterna
376  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
377  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
378    
379  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
380    {    {
381    int max_back = 0;    int max_back = 0;
382    int gone_back;    int gone_back;
# Line 361  if (this_start_code[1+LINK_SIZE] == OP_R Line 417  if (this_start_code[1+LINK_SIZE] == OP_R
417        current_subject - start_subject : max_back;        current_subject - start_subject : max_back;
418      current_subject -= gone_back;      current_subject -= gone_back;
419      }      }
420    
421      /* Save the earliest consulted character */
422    
423      if (current_subject < md->start_used_ptr)
424        md->start_used_ptr = current_subject;
425    
426    /* Now we can process the individual branches. */    /* Now we can process the individual branches. */
427    
# Line 402  else Line 463  else
463    
464    else    else
465      {      {
466        int length = 1 + LINK_SIZE +
467          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
468      do      do
469        {        {
470        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
471        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
472          length = 1 + LINK_SIZE;
473        }        }
474      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
475      }      }
# Line 421  ptr = current_subject; Line 485  ptr = current_subject;
485  for (;;)  for (;;)
486    {    {
487    int i, j;    int i, j;
488    int c, d, clen, dlen;    int clen, dlen;
489      unsigned int c, d;
490      int forced_fail = 0;
491      int reached_end = 0;
492    
493    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
494    new state list. */    new state list. */
# Line 457  for (;;) Line 524  for (;;)
524    
525    if (ptr < end_subject)    if (ptr < end_subject)
526      {      {
527      clen = 1;      clen = 1;        /* Number of bytes in the character */
528  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
529      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
530  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 465  for (;;) Line 532  for (;;)
532      }      }
533    else    else
534      {      {
535      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
536      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
537      }      }
538    
539    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 479  for (;;) Line 546  for (;;)
546      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
547      const uschar *code;      const uschar *code;
548      int state_offset = current_state->offset;      int state_offset = current_state->offset;
549      int count, codevalue;      int count, codevalue, rrc;
     int chartype, othercase;  
550    
551  #ifdef DEBUG  #ifdef DEBUG
552      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
553      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
554        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
555          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
556  #endif  #endif
# Line 512  for (;;) Line 578  for (;;)
578          }          }
579        }        }
580    
581      /* Check for a duplicate state with the same count, and skip if found. */      /* Check for a duplicate state with the same count, and skip if found.
582        See the note at the head of this module about the possibility of improving
583        performance here. */
584    
585      for (j = 0; j < i; j++)      for (j = 0; j < i; j++)
586        {        {
# Line 528  for (;;) Line 596  for (;;)
596    
597      code = start_code + state_offset;      code = start_code + state_offset;
598      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
599    
600      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
601      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 536  for (;;) Line 603  for (;;)
603      permitted.      permitted.
604    
605      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
606      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
607      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
608      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
609      opcodes. */      */
610    
611      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
612        {        {
# Line 550  for (;;) Line 617  for (;;)
617        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
618        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
619          {          {
620          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
621          if (d >= OP_NOTPROP)            {
622            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
623              case OP_NOTPROP:
624              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
625              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
626              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
627              case OP_NOT_HSPACE:
628              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
629              case OP_NOT_VSPACE:
630              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
631              default: break;
632              }
633          }          }
634        }        }
635      else      else
636        {        {
637        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
638        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
639        }        }
640    
641    
# Line 570  for (;;) Line 647  for (;;)
647  /* ========================================================================== */  /* ========================================================================== */
648        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
649        on with the next opcode. Otherwise, unless we have an empty string and        on with the next opcode. Otherwise, unless we have an empty string and
650        PCRE_NOTEMPTY is set, save the match data, shifting up all previous        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
651          start of the subject, save the match data, shifting up all previous
652        matches so we always have the longest first. */        matches so we always have the longest first. */
653    
654        case OP_KET:        case OP_KET:
# Line 584  for (;;) Line 662  for (;;)
662            ADD_ACTIVE(state_offset - GET(code, 1), 0);            ADD_ACTIVE(state_offset - GET(code, 1), 0);
663            }            }
664          }          }
665        else if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)        else
666          {          {
667          if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;          reached_end++;    /* Count branches that reach the end */
668            else if (match_count > 0 && ++match_count * 2 >= offsetcount)          if (ptr > current_subject ||
669              match_count = 0;              ((md->moptions & PCRE_NOTEMPTY) == 0 &&
670          count = ((match_count == 0)? offsetcount : match_count * 2) - 2;                ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
671          if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));                  current_subject > start_subject + md->start_offset)))
672          if (offsetcount >= 2)            {
673            {            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
674            offsets[0] = current_subject - start_subject;              else if (match_count > 0 && ++match_count * 2 >= offsetcount)
675            offsets[1] = ptr - start_subject;                match_count = 0;
676            DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
677              offsets[1] - offsets[0], current_subject));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
678            }            if (offsetcount >= 2)
679          if ((md->moptions & PCRE_DFA_SHORTEST) != 0)              {
680            {              offsets[0] = current_subject - start_subject;
681            DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"              offsets[1] = ptr - start_subject;
682              "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
683              match_count, rlevel*2-2, SP));                offsets[1] - offsets[0], current_subject));
684            return match_count;              }
685            }            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
686                {
687                DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
688                  "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
689                  match_count, rlevel*2-2, SP));
690                return match_count;
691                }
692              }
693          }          }
694        break;        break;
695    
# Line 620  for (;;) Line 705  for (;;)
705    
706        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
707        case OP_BRA:        case OP_BRA:
708          case OP_SBRA:
709        do        do
710          {          {
711          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 629  for (;;) Line 715  for (;;)
715        break;        break;
716    
717        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
718          case OP_CBRA:
719          case OP_SCBRA:
720          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
721          code += GET(code, 1);
722          while (*code == OP_ALT)
723            {
724            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
725            code += GET(code, 1);
726            }
727          break;
728    
729          /*-----------------------------------------------------------------*/
730        case OP_BRAZERO:        case OP_BRAZERO:
731        case OP_BRAMINZERO:        case OP_BRAMINZERO:
732        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 638  for (;;) Line 736  for (;;)
736        break;        break;
737    
738        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
739        case OP_BRANUMBER:        case OP_SKIPZERO:
740        ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);        code += 1 + GET(code, 2);
741          while (*code == OP_ALT) code += GET(code, 1);
742          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
743        break;        break;
744    
745        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
746        case OP_CIRC:        case OP_CIRC:
747        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
748            ((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE))            ((ims & PCRE_MULTILINE) != 0 &&
749                ptr != end_subject &&
750                WAS_NEWLINE(ptr)))
751          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
752        break;        break;
753    
# Line 679  for (;;) Line 781  for (;;)
781    
782        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
783        case OP_ANY:        case OP_ANY:
784        if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0))        if (clen > 0 && !IS_NEWLINE(ptr))
785            { ADD_NEW(state_offset + 1, 0); }
786          break;
787    
788          /*-----------------------------------------------------------------*/
789          case OP_ALLANY:
790          if (clen > 0)
791          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
792        break;        break;
793    
794        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
795        case OP_EODN:        case OP_EODN:
796        if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject))        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
797          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
798        break;        break;
799    
# Line 693  for (;;) Line 801  for (;;)
801        case OP_DOLL:        case OP_DOLL:
802        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
803          {          {
804          if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject ||          if (clen == 0 ||
805                                  (ims & PCRE_MULTILINE) != 0)))              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
806                   ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
807                ))
808            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
809          }          }
810        else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0)        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
811          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
812        break;        break;
813    
# Line 729  for (;;) Line 839  for (;;)
839          if (ptr > start_subject)          if (ptr > start_subject)
840            {            {
841            const uschar *temp = ptr - 1;            const uschar *temp = ptr - 1;
842              if (temp < md->start_used_ptr) md->start_used_ptr = temp;
843  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
844            if (utf8) BACKCHAR(temp);            if (utf8) BACKCHAR(temp);
845  #endif  #endif
# Line 737  for (;;) Line 848  for (;;)
848            }            }
849          else left_word = 0;          else left_word = 0;
850    
851          if (clen > 0) right_word = c < 256 && (ctypes[c] & ctype_word) != 0;          if (clen > 0)
852            else right_word = 0;            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
853            else              /* This is a fudge to ensure that if this is the */
854              {               /* last item in the pattern, we don't count it as */
855              reached_end--;  /* reached, thus disabling a partial match. */
856              right_word = 0;
857              }
858    
859          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
860            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 746  for (;;) Line 862  for (;;)
862        break;        break;
863    
864    
 #ifdef SUPPORT_UCP  
   
865        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
866        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
867        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
868        */        */
869    
870    #ifdef SUPPORT_UCP
871        case OP_PROP:        case OP_PROP:
872        case OP_NOTPROP:        case OP_NOTPROP:
873        if (clen > 0)        if (clen > 0)
874          {          {
875          int rqdtype, category;          BOOL OK;
876          category = ucp_findchar(c, &chartype, &othercase);          const ucd_record * prop = GET_UCD(c);
877          rqdtype = code[1];          switch(code[1])
         if (rqdtype >= 128)  
878            {            {
879            if ((rqdtype - 128 == category) == (codevalue == OP_PROP))            case PT_ANY:
880              { ADD_NEW(state_offset + 2, 0); }            OK = TRUE;
881            }            break;
882          else  
883            {            case PT_LAMP:
884            if ((rqdtype == chartype) == (codevalue == OP_PROP))            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
885              { ADD_NEW(state_offset + 2, 0); }            break;
886    
887              case PT_GC:
888              OK = _pcre_ucp_gentype[prop->chartype] == code[2];
889              break;
890    
891              case PT_PC:
892              OK = prop->chartype == code[2];
893              break;
894    
895              case PT_SC:
896              OK = prop->script == code[2];
897              break;
898    
899              /* Should never occur, but keep compilers from grumbling. */
900    
901              default:
902              OK = codevalue != OP_PROP;
903              break;
904            }            }
905    
906            if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
907          }          }
908        break;        break;
909  #endif  #endif
# Line 779  for (;;) Line 913  for (;;)
913  /* ========================================================================== */  /* ========================================================================== */
914        /* These opcodes likewise inspect the subject character, but have an        /* These opcodes likewise inspect the subject character, but have an
915        argument that is not a data character. It is one of these opcodes:        argument that is not a data character. It is one of these opcodes:
916        OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,        OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
917        OP_NOT_WORDCHAR. The value is loaded into d. */        OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
918    
919        case OP_TYPEPLUS:        case OP_TYPEPLUS:
920        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
921          case OP_TYPEPOSPLUS:
922        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
923        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
924        if (clen > 0)        if (clen > 0)
925          {          {
926          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
927              (c < 256 &&              (c < 256 &&
928                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
929                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
930            {            {
931              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
932                {
933                active_count--;            /* Remove non-match possibility */
934                next_active_state--;
935                }
936            count++;            count++;
937            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
938            }            }
# Line 802  for (;;) Line 942  for (;;)
942        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
943        case OP_TYPEQUERY:        case OP_TYPEQUERY:
944        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
945          case OP_TYPEPOSQUERY:
946        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
947        if (clen > 0)        if (clen > 0)
948          {          {
949          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
950              (c < 256 &&              (c < 256 &&
951                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
952                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
953            {            {
954              if (codevalue == OP_TYPEPOSQUERY)
955                {
956                active_count--;            /* Remove non-match possibility */
957                next_active_state--;
958                }
959            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
960            }            }
961          }          }
# Line 818  for (;;) Line 964  for (;;)
964        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
965        case OP_TYPESTAR:        case OP_TYPESTAR:
966        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
967          case OP_TYPEPOSSTAR:
968        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
969        if (clen > 0)        if (clen > 0)
970          {          {
971          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
972              (c < 256 &&              (c < 256 &&
973                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
974                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
975            {            {
976              if (codevalue == OP_TYPEPOSSTAR)
977                {
978                active_count--;            /* Remove non-match possibility */
979                next_active_state--;
980                }
981            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
982            }            }
983          }          }
# Line 833  for (;;) Line 985  for (;;)
985    
986        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
987        case OP_TYPEEXACT:        case OP_TYPEEXACT:
988          count = current_state->count;  /* Number already matched */
989          if (clen > 0)
990            {
991            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
992                (c < 256 &&
993                  (d != OP_ANY || !IS_NEWLINE(ptr)) &&
994                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
995              {
996              if (++count >= GET2(code, 1))
997                { ADD_NEW(state_offset + 4, 0); }
998              else
999                { ADD_NEW(state_offset, count); }
1000              }
1001            }
1002          break;
1003    
1004          /*-----------------------------------------------------------------*/
1005        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1006        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1007        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
1008          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
1009        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1010        if (clen > 0)        if (clen > 0)
1011          {          {
1012          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1013              (c < 256 &&              (c < 256 &&
1014                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1015                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1016            {            {
1017              if (codevalue == OP_TYPEPOSUPTO)
1018                {
1019                active_count--;           /* Remove non-match possibility */
1020                next_active_state--;
1021                }
1022            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1023              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
1024            else            else
# Line 855  for (;;) Line 1029  for (;;)
1029    
1030  /* ========================================================================== */  /* ========================================================================== */
1031        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
1032        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
1033        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
1034        d variable. */        is in the d variable. */
1035    
1036    #ifdef SUPPORT_UCP
1037        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
1038        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
1039          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
1040        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
1041        if (count > 0) { ADD_ACTIVE(state_offset + 3, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
1042        if (clen > 0)        if (clen > 0)
1043          {          {
1044          int category = ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1045          int rqdtype = code[2];          const ucd_record * prop = GET_UCD(c);
1046          if ((d == OP_PROP) ==          switch(code[2])
1047              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
1048            { count++; ADD_NEW(state_offset, count); }            case PT_ANY:
1049              OK = TRUE;
1050              break;
1051    
1052              case PT_LAMP:
1053              OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1054              break;
1055    
1056              case PT_GC:
1057              OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1058              break;
1059    
1060              case PT_PC:
1061              OK = prop->chartype == code[3];
1062              break;
1063    
1064              case PT_SC:
1065              OK = prop->script == code[3];
1066              break;
1067    
1068              /* Should never occur, but keep compilers from grumbling. */
1069    
1070              default:
1071              OK = codevalue != OP_PROP;
1072              break;
1073              }
1074    
1075            if (OK == (d == OP_PROP))
1076              {
1077              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1078                {
1079                active_count--;           /* Remove non-match possibility */
1080                next_active_state--;
1081                }
1082              count++;
1083              ADD_NEW(state_offset, count);
1084              }
1085          }          }
1086        break;        break;
1087    
1088        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1089        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1090        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1091          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1092        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1093        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1094        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1095          {          {
1096          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1097          int ncount = 0;          int ncount = 0;
1098            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1099              {
1100              active_count--;           /* Remove non-match possibility */
1101              next_active_state--;
1102              }
1103          while (nptr < end_subject)          while (nptr < end_subject)
1104            {            {
1105            int nd;            int nd;
1106            int ndlen = 1;            int ndlen = 1;
1107            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1108            if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1109            ncount++;            ncount++;
1110            nptr += ndlen;            nptr += ndlen;
1111            }            }
# Line 895  for (;;) Line 1113  for (;;)
1113          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1114          }          }
1115        break;        break;
1116    #endif
1117    
1118        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1119          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1120          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1121          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1122          count = current_state->count;  /* Already matched */
1123          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1124          if (clen > 0)
1125            {
1126            int ncount = 0;
1127            switch (c)
1128              {
1129              case 0x000b:
1130              case 0x000c:
1131              case 0x0085:
1132              case 0x2028:
1133              case 0x2029:
1134              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1135              goto ANYNL01;
1136    
1137              case 0x000d:
1138              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1139              /* Fall through */
1140    
1141              ANYNL01:
1142              case 0x000a:
1143              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1144                {
1145                active_count--;           /* Remove non-match possibility */
1146                next_active_state--;
1147                }
1148              count++;
1149              ADD_NEW_DATA(-state_offset, count, ncount);
1150              break;
1151    
1152              default:
1153              break;
1154              }
1155            }
1156          break;
1157    
1158          /*-----------------------------------------------------------------*/
1159          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1160          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1161          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1162          count = current_state->count;  /* Already matched */
1163          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1164          if (clen > 0)
1165            {
1166            BOOL OK;
1167            switch (c)
1168              {
1169              case 0x000a:
1170              case 0x000b:
1171              case 0x000c:
1172              case 0x000d:
1173              case 0x0085:
1174              case 0x2028:
1175              case 0x2029:
1176              OK = TRUE;
1177              break;
1178    
1179              default:
1180              OK = FALSE;
1181              break;
1182              }
1183    
1184            if (OK == (d == OP_VSPACE))
1185              {
1186              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1187                {
1188                active_count--;           /* Remove non-match possibility */
1189                next_active_state--;
1190                }
1191              count++;
1192              ADD_NEW_DATA(-state_offset, count, 0);
1193              }
1194            }
1195          break;
1196    
1197          /*-----------------------------------------------------------------*/
1198          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1199          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1200          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1201          count = current_state->count;  /* Already matched */
1202          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1203          if (clen > 0)
1204            {
1205            BOOL OK;
1206            switch (c)
1207              {
1208              case 0x09:      /* HT */
1209              case 0x20:      /* SPACE */
1210              case 0xa0:      /* NBSP */
1211              case 0x1680:    /* OGHAM SPACE MARK */
1212              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1213              case 0x2000:    /* EN QUAD */
1214              case 0x2001:    /* EM QUAD */
1215              case 0x2002:    /* EN SPACE */
1216              case 0x2003:    /* EM SPACE */
1217              case 0x2004:    /* THREE-PER-EM SPACE */
1218              case 0x2005:    /* FOUR-PER-EM SPACE */
1219              case 0x2006:    /* SIX-PER-EM SPACE */
1220              case 0x2007:    /* FIGURE SPACE */
1221              case 0x2008:    /* PUNCTUATION SPACE */
1222              case 0x2009:    /* THIN SPACE */
1223              case 0x200A:    /* HAIR SPACE */
1224              case 0x202f:    /* NARROW NO-BREAK SPACE */
1225              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1226              case 0x3000:    /* IDEOGRAPHIC SPACE */
1227              OK = TRUE;
1228              break;
1229    
1230              default:
1231              OK = FALSE;
1232              break;
1233              }
1234    
1235            if (OK == (d == OP_HSPACE))
1236              {
1237              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1238                {
1239                active_count--;           /* Remove non-match possibility */
1240                next_active_state--;
1241                }
1242              count++;
1243              ADD_NEW_DATA(-state_offset, count, 0);
1244              }
1245            }
1246          break;
1247    
1248          /*-----------------------------------------------------------------*/
1249    #ifdef SUPPORT_UCP
1250        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1251        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1252        count = 3;        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1253          count = 4;
1254        goto QS1;        goto QS1;
1255    
1256        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1257        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1258          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1259        count = 0;        count = 0;
1260    
1261        QS1:        QS1:
1262    
1263        ADD_ACTIVE(state_offset + 3, 0);        ADD_ACTIVE(state_offset + 4, 0);
1264        if (clen > 0)        if (clen > 0)
1265          {          {
1266          int category = ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1267          int rqdtype = code[2];          const ucd_record * prop = GET_UCD(c);
1268          if ((d == OP_PROP) ==          switch(code[2])
1269              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
1270            { ADD_NEW(state_offset + count, 0); }            case PT_ANY:
1271              OK = TRUE;
1272              break;
1273    
1274              case PT_LAMP:
1275              OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1276              break;
1277    
1278              case PT_GC:
1279              OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1280              break;
1281    
1282              case PT_PC:
1283              OK = prop->chartype == code[3];
1284              break;
1285    
1286              case PT_SC:
1287              OK = prop->script == code[3];
1288              break;
1289    
1290              /* Should never occur, but keep compilers from grumbling. */
1291    
1292              default:
1293              OK = codevalue != OP_PROP;
1294              break;
1295              }
1296    
1297            if (OK == (d == OP_PROP))
1298              {
1299              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1300                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1301                {
1302                active_count--;           /* Remove non-match possibility */
1303                next_active_state--;
1304                }
1305              ADD_NEW(state_offset + count, 0);
1306              }
1307          }          }
1308        break;        break;
1309    
1310        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1311        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1312        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1313          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1314        count = 2;        count = 2;
1315        goto QS2;        goto QS2;
1316    
1317        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1318        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1319          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1320        count = 0;        count = 0;
1321    
1322        QS2:        QS2:
1323    
1324        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1325        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1326          {          {
1327          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1328          int ncount = 0;          int ncount = 0;
1329            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1330                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1331              {
1332              active_count--;           /* Remove non-match possibility */
1333              next_active_state--;
1334              }
1335          while (nptr < end_subject)          while (nptr < end_subject)
1336            {            {
1337            int nd;            int nd;
1338            int ndlen = 1;            int ndlen = 1;
1339            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1340            if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1341            ncount++;            ncount++;
1342            nptr += ndlen;            nptr += ndlen;
1343            }            }
1344          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1345          }          }
1346        break;        break;
1347    #endif
1348    
1349          /*-----------------------------------------------------------------*/
1350          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1351          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1352          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1353          count = 2;
1354          goto QS3;
1355    
1356          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1357          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1358          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1359          count = 0;
1360    
1361          QS3:
1362          ADD_ACTIVE(state_offset + 2, 0);
1363          if (clen > 0)
1364            {
1365            int ncount = 0;
1366            switch (c)
1367              {
1368              case 0x000b:
1369              case 0x000c:
1370              case 0x0085:
1371              case 0x2028:
1372              case 0x2029:
1373              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1374              goto ANYNL02;
1375    
1376              case 0x000d:
1377              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1378              /* Fall through */
1379    
1380              ANYNL02:
1381              case 0x000a:
1382              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1383                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1384                {
1385                active_count--;           /* Remove non-match possibility */
1386                next_active_state--;
1387                }
1388              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1389              break;
1390    
1391              default:
1392              break;
1393              }
1394            }
1395          break;
1396    
1397          /*-----------------------------------------------------------------*/
1398          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1399          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1400          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1401          count = 2;
1402          goto QS4;
1403    
1404          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1405          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1406          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1407          count = 0;
1408    
1409          QS4:
1410          ADD_ACTIVE(state_offset + 2, 0);
1411          if (clen > 0)
1412            {
1413            BOOL OK;
1414            switch (c)
1415              {
1416              case 0x000a:
1417              case 0x000b:
1418              case 0x000c:
1419              case 0x000d:
1420              case 0x0085:
1421              case 0x2028:
1422              case 0x2029:
1423              OK = TRUE;
1424              break;
1425    
1426              default:
1427              OK = FALSE;
1428              break;
1429              }
1430            if (OK == (d == OP_VSPACE))
1431              {
1432              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1433                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1434                {
1435                active_count--;           /* Remove non-match possibility */
1436                next_active_state--;
1437                }
1438              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1439              }
1440            }
1441          break;
1442    
1443          /*-----------------------------------------------------------------*/
1444          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1445          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1446          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1447          count = 2;
1448          goto QS5;
1449    
1450          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1451          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1452          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1453          count = 0;
1454    
1455          QS5:
1456          ADD_ACTIVE(state_offset + 2, 0);
1457          if (clen > 0)
1458            {
1459            BOOL OK;
1460            switch (c)
1461              {
1462              case 0x09:      /* HT */
1463              case 0x20:      /* SPACE */
1464              case 0xa0:      /* NBSP */
1465              case 0x1680:    /* OGHAM SPACE MARK */
1466              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1467              case 0x2000:    /* EN QUAD */
1468              case 0x2001:    /* EM QUAD */
1469              case 0x2002:    /* EN SPACE */
1470              case 0x2003:    /* EM SPACE */
1471              case 0x2004:    /* THREE-PER-EM SPACE */
1472              case 0x2005:    /* FOUR-PER-EM SPACE */
1473              case 0x2006:    /* SIX-PER-EM SPACE */
1474              case 0x2007:    /* FIGURE SPACE */
1475              case 0x2008:    /* PUNCTUATION SPACE */
1476              case 0x2009:    /* THIN SPACE */
1477              case 0x200A:    /* HAIR SPACE */
1478              case 0x202f:    /* NARROW NO-BREAK SPACE */
1479              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1480              case 0x3000:    /* IDEOGRAPHIC SPACE */
1481              OK = TRUE;
1482              break;
1483    
1484              default:
1485              OK = FALSE;
1486              break;
1487              }
1488    
1489            if (OK == (d == OP_HSPACE))
1490              {
1491              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1492                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1493                {
1494                active_count--;           /* Remove non-match possibility */
1495                next_active_state--;
1496                }
1497              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1498              }
1499            }
1500          break;
1501    
1502        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1503    #ifdef SUPPORT_UCP
1504        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1505        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1506        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1507          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1508        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1509          { ADD_ACTIVE(state_offset + 5, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1510        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1511        if (clen > 0)        if (clen > 0)
1512          {          {
1513          int category = ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1514          int rqdtype = code[4];          const ucd_record * prop = GET_UCD(c);
1515          if ((d == OP_PROP) ==          switch(code[4])
             (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))  
1516            {            {
1517              case PT_ANY:
1518              OK = TRUE;
1519              break;
1520    
1521              case PT_LAMP:
1522              OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1523              break;
1524    
1525              case PT_GC:
1526              OK = _pcre_ucp_gentype[prop->chartype] == code[5];
1527              break;
1528    
1529              case PT_PC:
1530              OK = prop->chartype == code[5];
1531              break;
1532    
1533              case PT_SC:
1534              OK = prop->script == code[5];
1535              break;
1536    
1537              /* Should never occur, but keep compilers from grumbling. */
1538    
1539              default:
1540              OK = codevalue != OP_PROP;
1541              break;
1542              }
1543    
1544            if (OK == (d == OP_PROP))
1545              {
1546              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1547                {
1548                active_count--;           /* Remove non-match possibility */
1549                next_active_state--;
1550                }
1551            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1552              { ADD_NEW(state_offset + 5, 0); }              { ADD_NEW(state_offset + 6, 0); }
1553            else            else
1554              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1555            }            }
# Line 975  for (;;) Line 1560  for (;;)
1560        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1561        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1562        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1563          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1564        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1565          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1566        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1567        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1568          {          {
1569          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1570          int ncount = 0;          int ncount = 0;
1571            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1572              {
1573              active_count--;           /* Remove non-match possibility */
1574              next_active_state--;
1575              }
1576          while (nptr < end_subject)          while (nptr < end_subject)
1577            {            {
1578            int nd;            int nd;
1579            int ndlen = 1;            int ndlen = 1;
1580            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1581            if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (UCD_CATEGORY(nd) != ucp_M) break;
1582            ncount++;            ncount++;
1583            nptr += ndlen;            nptr += ndlen;
1584            }            }
# Line 997  for (;;) Line 1588  for (;;)
1588            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1589          }          }
1590        break;        break;
1591    #endif
1592    
1593          /*-----------------------------------------------------------------*/
1594          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1595          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1596          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1597          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1598          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1599            { ADD_ACTIVE(state_offset + 4, 0); }
1600          count = current_state->count;  /* Number already matched */
1601          if (clen > 0)
1602            {
1603            int ncount = 0;
1604            switch (c)
1605              {
1606              case 0x000b:
1607              case 0x000c:
1608              case 0x0085:
1609              case 0x2028:
1610              case 0x2029:
1611              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1612              goto ANYNL03;
1613    
1614              case 0x000d:
1615              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1616              /* Fall through */
1617    
1618              ANYNL03:
1619              case 0x000a:
1620              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1621                {
1622                active_count--;           /* Remove non-match possibility */
1623                next_active_state--;
1624                }
1625              if (++count >= GET2(code, 1))
1626                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1627              else
1628                { ADD_NEW_DATA(-state_offset, count, ncount); }
1629              break;
1630    
1631              default:
1632              break;
1633              }
1634            }
1635          break;
1636    
1637          /*-----------------------------------------------------------------*/
1638          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1639          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1640          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1641          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1642          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1643            { ADD_ACTIVE(state_offset + 4, 0); }
1644          count = current_state->count;  /* Number already matched */
1645          if (clen > 0)
1646            {
1647            BOOL OK;
1648            switch (c)
1649              {
1650              case 0x000a:
1651              case 0x000b:
1652              case 0x000c:
1653              case 0x000d:
1654              case 0x0085:
1655              case 0x2028:
1656              case 0x2029:
1657              OK = TRUE;
1658              break;
1659    
1660              default:
1661              OK = FALSE;
1662              }
1663    
1664            if (OK == (d == OP_VSPACE))
1665              {
1666              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1667                {
1668                active_count--;           /* Remove non-match possibility */
1669                next_active_state--;
1670                }
1671              if (++count >= GET2(code, 1))
1672                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1673              else
1674                { ADD_NEW_DATA(-state_offset, count, 0); }
1675              }
1676            }
1677          break;
1678    
1679          /*-----------------------------------------------------------------*/
1680          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1681          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1682          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1683          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1684          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1685            { ADD_ACTIVE(state_offset + 4, 0); }
1686          count = current_state->count;  /* Number already matched */
1687          if (clen > 0)
1688            {
1689            BOOL OK;
1690            switch (c)
1691              {
1692              case 0x09:      /* HT */
1693              case 0x20:      /* SPACE */
1694              case 0xa0:      /* NBSP */
1695              case 0x1680:    /* OGHAM SPACE MARK */
1696              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1697              case 0x2000:    /* EN QUAD */
1698              case 0x2001:    /* EM QUAD */
1699              case 0x2002:    /* EN SPACE */
1700              case 0x2003:    /* EM SPACE */
1701              case 0x2004:    /* THREE-PER-EM SPACE */
1702              case 0x2005:    /* FOUR-PER-EM SPACE */
1703              case 0x2006:    /* SIX-PER-EM SPACE */
1704              case 0x2007:    /* FIGURE SPACE */
1705              case 0x2008:    /* PUNCTUATION SPACE */
1706              case 0x2009:    /* THIN SPACE */
1707              case 0x200A:    /* HAIR SPACE */
1708              case 0x202f:    /* NARROW NO-BREAK SPACE */
1709              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1710              case 0x3000:    /* IDEOGRAPHIC SPACE */
1711              OK = TRUE;
1712              break;
1713    
1714              default:
1715              OK = FALSE;
1716              break;
1717              }
1718    
1719            if (OK == (d == OP_HSPACE))
1720              {
1721              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1722                {
1723                active_count--;           /* Remove non-match possibility */
1724                next_active_state--;
1725                }
1726              if (++count >= GET2(code, 1))
1727                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1728              else
1729                { ADD_NEW_DATA(-state_offset, count, 0); }
1730              }
1731            }
1732          break;
1733    
1734  /* ========================================================================== */  /* ========================================================================== */
1735        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
# Line 1018  for (;;) Line 1751  for (;;)
1751          {          {
1752          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1753            {            {
1754              unsigned int othercase;
1755            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1756    
1757            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
1758            other case of the character, if there is one. The result of            other case of the character. */
           ucp_findchar() is < 0 if the char isn't found, and othercase is  
           returned as zero if there isn't another case. */  
1759    
1760  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1761            if (ucp_findchar(c, &chartype, &othercase) < 0)            othercase = UCD_OTHERCASE(c);
1762    #else
1763              othercase = NOTACHAR;
1764  #endif  #endif
             othercase = -1;  
1765    
1766            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
1767            }            }
# Line 1050  for (;;) Line 1783  for (;;)
1783        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
1784    
1785        case OP_EXTUNI:        case OP_EXTUNI:
1786        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1787          {          {
1788          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1789          int ncount = 0;          int ncount = 0;
# Line 1058  for (;;) Line 1791  for (;;)
1791            {            {
1792            int nclen = 1;            int nclen = 1;
1793            GETCHARLEN(c, nptr, nclen);            GETCHARLEN(c, nptr, nclen);
1794            if (ucp_findchar(c, &chartype, &othercase) != ucp_M) break;            if (UCD_CATEGORY(c) != ucp_M) break;
1795            ncount++;            ncount++;
1796            nptr += nclen;            nptr += nclen;
1797            }            }
# Line 1068  for (;;) Line 1801  for (;;)
1801  #endif  #endif
1802    
1803        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1804          /* This is a tricky like EXTUNI because it too can match more than one
1805          character (when CR is followed by LF). In this case, set up a negative
1806          state to wait for one character to pass before continuing. */
1807    
1808          case OP_ANYNL:
1809          if (clen > 0) switch(c)
1810            {
1811            case 0x000b:
1812            case 0x000c:
1813            case 0x0085:
1814            case 0x2028:
1815            case 0x2029:
1816            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1817    
1818            case 0x000a:
1819            ADD_NEW(state_offset + 1, 0);
1820            break;
1821    
1822            case 0x000d:
1823            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1824              {
1825              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1826              }
1827            else
1828              {
1829              ADD_NEW(state_offset + 1, 0);
1830              }
1831            break;
1832            }
1833          break;
1834    
1835          /*-----------------------------------------------------------------*/
1836          case OP_NOT_VSPACE:
1837          if (clen > 0) switch(c)
1838            {
1839            case 0x000a:
1840            case 0x000b:
1841            case 0x000c:
1842            case 0x000d:
1843            case 0x0085:
1844            case 0x2028:
1845            case 0x2029:
1846            break;
1847    
1848            default:
1849            ADD_NEW(state_offset + 1, 0);
1850            break;
1851            }
1852          break;
1853    
1854          /*-----------------------------------------------------------------*/
1855          case OP_VSPACE:
1856          if (clen > 0) switch(c)
1857            {
1858            case 0x000a:
1859            case 0x000b:
1860            case 0x000c:
1861            case 0x000d:
1862            case 0x0085:
1863            case 0x2028:
1864            case 0x2029:
1865            ADD_NEW(state_offset + 1, 0);
1866            break;
1867    
1868            default: break;
1869            }
1870          break;
1871    
1872          /*-----------------------------------------------------------------*/
1873          case OP_NOT_HSPACE:
1874          if (clen > 0) switch(c)
1875            {
1876            case 0x09:      /* HT */
1877            case 0x20:      /* SPACE */
1878            case 0xa0:      /* NBSP */
1879            case 0x1680:    /* OGHAM SPACE MARK */
1880            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1881            case 0x2000:    /* EN QUAD */
1882            case 0x2001:    /* EM QUAD */
1883            case 0x2002:    /* EN SPACE */
1884            case 0x2003:    /* EM SPACE */
1885            case 0x2004:    /* THREE-PER-EM SPACE */
1886            case 0x2005:    /* FOUR-PER-EM SPACE */
1887            case 0x2006:    /* SIX-PER-EM SPACE */
1888            case 0x2007:    /* FIGURE SPACE */
1889            case 0x2008:    /* PUNCTUATION SPACE */
1890            case 0x2009:    /* THIN SPACE */
1891            case 0x200A:    /* HAIR SPACE */
1892            case 0x202f:    /* NARROW NO-BREAK SPACE */
1893            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1894            case 0x3000:    /* IDEOGRAPHIC SPACE */
1895            break;
1896    
1897            default:
1898            ADD_NEW(state_offset + 1, 0);
1899            break;
1900            }
1901          break;
1902    
1903          /*-----------------------------------------------------------------*/
1904          case OP_HSPACE:
1905          if (clen > 0) switch(c)
1906            {
1907            case 0x09:      /* HT */
1908            case 0x20:      /* SPACE */
1909            case 0xa0:      /* NBSP */
1910            case 0x1680:    /* OGHAM SPACE MARK */
1911            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1912            case 0x2000:    /* EN QUAD */
1913            case 0x2001:    /* EM QUAD */
1914            case 0x2002:    /* EN SPACE */
1915            case 0x2003:    /* EM SPACE */
1916            case 0x2004:    /* THREE-PER-EM SPACE */
1917            case 0x2005:    /* FOUR-PER-EM SPACE */
1918            case 0x2006:    /* SIX-PER-EM SPACE */
1919            case 0x2007:    /* FIGURE SPACE */
1920            case 0x2008:    /* PUNCTUATION SPACE */
1921            case 0x2009:    /* THIN SPACE */
1922            case 0x200A:    /* HAIR SPACE */
1923            case 0x202f:    /* NARROW NO-BREAK SPACE */
1924            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1925            case 0x3000:    /* IDEOGRAPHIC SPACE */
1926            ADD_NEW(state_offset + 1, 0);
1927            break;
1928            }
1929          break;
1930    
1931          /*-----------------------------------------------------------------*/
1932        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1933        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1934        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1075  for (;;) Line 1936  for (;;)
1936        case OP_NOT:        case OP_NOT:
1937        if (clen > 0)        if (clen > 0)
1938          {          {
1939          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1940          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1941          }          }
1942        break;        break;
# Line 1083  for (;;) Line 1944  for (;;)
1944        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1945        case OP_PLUS:        case OP_PLUS:
1946        case OP_MINPLUS:        case OP_MINPLUS:
1947          case OP_POSPLUS:
1948        case OP_NOTPLUS:        case OP_NOTPLUS:
1949        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1950          case OP_NOTPOSPLUS:
1951        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1952        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1953        if (clen > 0)        if (clen > 0)
1954          {          {
1955          int otherd = -1;          unsigned int otherd = NOTACHAR;
1956          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1957            {            {
1958  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1959            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1960              {              {
1961  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1962              if (ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = UCD_OTHERCASE(d);
1963  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1964              }              }
1965            else            else
# Line 1104  for (;;) Line 1967  for (;;)
1967            otherd = fcc[d];            otherd = fcc[d];
1968            }            }
1969          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1970            { count++; ADD_NEW(state_offset, count); }            {
1971              if (count > 0 &&
1972                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1973                {
1974                active_count--;             /* Remove non-match possibility */
1975                next_active_state--;
1976                }
1977              count++;
1978              ADD_NEW(state_offset, count);
1979              }
1980          }          }
1981        break;        break;
1982    
1983        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1984        case OP_QUERY:        case OP_QUERY:
1985        case OP_MINQUERY:        case OP_MINQUERY:
1986          case OP_POSQUERY:
1987        case OP_NOTQUERY:        case OP_NOTQUERY:
1988        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1989          case OP_NOTPOSQUERY:
1990        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1991        if (clen > 0)        if (clen > 0)
1992          {          {
1993          int otherd = -1;          unsigned int otherd = NOTACHAR;
1994          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1995            {            {
1996  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1997            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1998              {              {
1999  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2000              if (ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = UCD_OTHERCASE(d);
2001  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2002              }              }
2003            else            else
# Line 1131  for (;;) Line 2005  for (;;)
2005            otherd = fcc[d];            otherd = fcc[d];
2006            }            }
2007          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2008            { ADD_NEW(state_offset + dlen + 1, 0); }            {
2009              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
2010                {
2011                active_count--;            /* Remove non-match possibility */
2012                next_active_state--;
2013                }
2014              ADD_NEW(state_offset + dlen + 1, 0);
2015              }
2016          }          }
2017        break;        break;
2018    
2019        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2020        case OP_STAR:        case OP_STAR:
2021        case OP_MINSTAR:        case OP_MINSTAR:
2022          case OP_POSSTAR:
2023        case OP_NOTSTAR:        case OP_NOTSTAR:
2024        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
2025          case OP_NOTPOSSTAR:
2026        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2027        if (clen > 0)        if (clen > 0)
2028          {          {
2029          int otherd = -1;          unsigned int otherd = NOTACHAR;
2030          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
2031            {            {
2032  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2033            if (utf8 && c >= 128)            if (utf8 && d >= 128)
2034              {              {
2035  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2036              if (ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = UCD_OTHERCASE(d);
2037  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2038              }              }
2039            else            else
# Line 1158  for (;;) Line 2041  for (;;)
2041            otherd = fcc[d];            otherd = fcc[d];
2042            }            }
2043          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2044            { ADD_NEW(state_offset, 0); }            {
2045              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
2046                {
2047                active_count--;            /* Remove non-match possibility */
2048                next_active_state--;
2049                }
2050              ADD_NEW(state_offset, 0);
2051              }
2052          }          }
2053        break;        break;
2054    
2055        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2056        case OP_EXACT:        case OP_EXACT:
2057          case OP_NOTEXACT:
2058          count = current_state->count;  /* Number already matched */
2059          if (clen > 0)
2060            {
2061            unsigned int otherd = NOTACHAR;
2062            if ((ims & PCRE_CASELESS) != 0)
2063              {
2064    #ifdef SUPPORT_UTF8
2065              if (utf8 && d >= 128)
2066                {
2067    #ifdef SUPPORT_UCP
2068                otherd = UCD_OTHERCASE(d);
2069    #endif  /* SUPPORT_UCP */
2070                }
2071              else
2072    #endif  /* SUPPORT_UTF8 */
2073              otherd = fcc[d];
2074              }
2075            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2076              {
2077              if (++count >= GET2(code, 1))
2078                { ADD_NEW(state_offset + dlen + 3, 0); }
2079              else
2080                { ADD_NEW(state_offset, count); }
2081              }
2082            }
2083          break;
2084    
2085          /*-----------------------------------------------------------------*/
2086        case OP_UPTO:        case OP_UPTO:
2087        case OP_MINUPTO:        case OP_MINUPTO:
2088        case OP_NOTEXACT:        case OP_POSUPTO:
2089        case OP_NOTUPTO:        case OP_NOTUPTO:
2090        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2091        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
2092          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
2093        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2094        if (clen > 0)        if (clen > 0)
2095          {          {
2096          int otherd = -1;          unsigned int otherd = NOTACHAR;
2097          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
2098            {            {
2099  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2100            if (utf8 && c >= 128)            if (utf8 && d >= 128)
2101              {              {
2102  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2103              if (ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = UCD_OTHERCASE(d);
2104  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2105              }              }
2106            else            else
# Line 1190  for (;;) Line 2109  for (;;)
2109            }            }
2110          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2111            {            {
2112              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2113                {
2114                active_count--;             /* Remove non-match possibility */
2115                next_active_state--;
2116                }
2117            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2118              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
2119            else            else
# Line 1267  for (;;) Line 2191  for (;;)
2191              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 5, 0); }
2192            if (isinclass)            if (isinclass)
2193              {              {
2194              if (++count >= GET2(ecode, 3))              int max = GET2(ecode, 3);
2195                if (++count >= max && max != 0)   /* Max 0 => no limit */
2196                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 5, 0); }
2197              else              else
2198                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
# Line 1283  for (;;) Line 2208  for (;;)
2208    
2209  /* ========================================================================== */  /* ========================================================================== */
2210        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2211        to use recursion in order to handle them. */        to use recursion in order to handle them. The "always failing" assertion
2212          (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2213          though the other "backtracking verbs" are not supported. */
2214    
2215          case OP_FAIL:
2216          forced_fail++;    /* Count FAILs for multiple states */
2217          break;
2218    
2219        case OP_ASSERT:        case OP_ASSERT:
2220        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
# Line 1317  for (;;) Line 2248  for (;;)
2248    
2249        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2250        case OP_COND:        case OP_COND:
2251          case OP_SCOND:
2252          {          {
2253          int local_offsets[1000];          int local_offsets[1000];
2254          int local_workspace[1000];          int local_workspace[1000];
2255          int condcode = code[LINK_SIZE+1];          int codelink = GET(code, 1);
2256            int condcode;
2257    
2258            /* Because of the way auto-callout works during compile, a callout item
2259            is inserted between OP_COND and an assertion condition. This does not
2260            happen for the other conditions. */
2261    
2262            if (code[LINK_SIZE+1] == OP_CALLOUT)
2263              {
2264              rrc = 0;
2265              if (pcre_callout != NULL)
2266                {
2267                pcre_callout_block cb;
2268                cb.version          = 1;   /* Version 1 of the callout block */
2269                cb.callout_number   = code[LINK_SIZE+2];
2270                cb.offset_vector    = offsets;
2271                cb.subject          = (PCRE_SPTR)start_subject;
2272                cb.subject_length   = end_subject - start_subject;
2273                cb.start_match      = current_subject - start_subject;
2274                cb.current_position = ptr - start_subject;
2275                cb.pattern_position = GET(code, LINK_SIZE + 3);
2276                cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2277                cb.capture_top      = 1;
2278                cb.capture_last     = -1;
2279                cb.callout_data     = md->callout_data;
2280                if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
2281                }
2282              if (rrc > 0) break;                      /* Fail this thread */
2283              code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */
2284              }
2285    
2286            condcode = code[LINK_SIZE+1];
2287    
2288            /* Back reference conditions are not supported */
2289    
2290            if (condcode == OP_CREF || condcode == OP_NCREF)
2291              return PCRE_ERROR_DFA_UCOND;
2292    
2293            /* The DEFINE condition is always false */
2294    
2295          /* The only supported version of OP_CREF is for the value 0xffff, which          if (condcode == OP_DEF)
2296          means "test if in a recursion". */            { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2297    
2298          if (condcode == OP_CREF)          /* The only supported version of OP_RREF is for the value RREF_ANY,
2299            which means "test if in any recursion". We can't test for specifically
2300            recursed groups. */
2301    
2302            else if (condcode == OP_RREF || condcode == OP_NRREF)
2303            {            {
2304            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2305            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2306            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0)
2307              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2308              else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2309            }            }
2310    
2311          /* Otherwise, the condition is an assertion */          /* Otherwise, the condition is an assertion */
# Line 1360  for (;;) Line 2335  for (;;)
2335                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2336              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
2337            else            else
2338              { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2339            }            }
2340          }          }
2341        break;        break;
# Line 1424  for (;;) Line 2399  for (;;)
2399        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2400        case OP_ONCE:        case OP_ONCE:
2401          {          {
         const uschar *endcode;  
2402          int local_offsets[2];          int local_offsets[2];
2403          int local_workspace[1000];          int local_workspace[1000];
2404    
# Line 1446  for (;;) Line 2420  for (;;)
2420            const uschar *end_subpattern = code;            const uschar *end_subpattern = code;
2421            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2422            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
           BOOL is_repeated;  
2423    
2424            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
2425              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
# Line 1514  for (;;) Line 2487  for (;;)
2487        /* Handle callouts */        /* Handle callouts */
2488    
2489        case OP_CALLOUT:        case OP_CALLOUT:
2490          rrc = 0;
2491        if (pcre_callout != NULL)        if (pcre_callout != NULL)
2492          {          {
         int rrc;  
2493          pcre_callout_block cb;          pcre_callout_block cb;
2494          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2495          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2496          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2497          cb.subject          = (char *)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
2498          cb.subject_length   = end_subject - start_subject;          cb.subject_length   = end_subject - start_subject;
2499          cb.start_match      = current_subject - start_subject;          cb.start_match      = current_subject - start_subject;
2500          cb.current_position = ptr - start_subject;          cb.current_position = ptr - start_subject;
# Line 1531  for (;;) Line 2504  for (;;)
2504          cb.capture_last     = -1;          cb.capture_last     = -1;
2505          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
2506          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
         if (rrc == 0) { ADD_ACTIVE(state_offset + 2 + 2*LINK_SIZE, 0); }  
2507          }          }
2508          if (rrc == 0)
2509            { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
2510        break;        break;
2511    
2512    
# Line 1548  for (;;) Line 2522  for (;;)
2522    /* We have finished the processing at the current subject character. If no    /* We have finished the processing at the current subject character. If no
2523    new states have been set for the next character, we have found all the    new states have been set for the next character, we have found all the
2524    matches that we are going to find. If we are at the top level and partial    matches that we are going to find. If we are at the top level and partial
2525    matching has been requested, check for appropriate conditions. */    matching has been requested, check for appropriate conditions. The "forced_
2526      fail" variable counts the number of (*F) encountered for the character. If it
2527      is equal to the original active_count (saved in workspace[1]) it means that
2528      (*F) was found on every active state. In this case we don't want to give a
2529      partial match. */
2530    
2531    if (new_count <= 0)    if (new_count <= 0)
2532      {      {
2533      if (match_count < 0 &&                     /* No matches found */      if (rlevel == 1 &&                               /* Top level, and */
2534          rlevel == 1 &&                         /* Top level match function */          reached_end != workspace[1] &&               /* Not all reached end */
2535          (md->moptions & PCRE_PARTIAL) != 0 &&  /* Want partial matching */          forced_fail != workspace[1] &&               /* Not all forced fail & */
2536          ptr >= end_subject &&                  /* Reached end of subject */          (                                            /* either... */
2537          ptr > current_subject)                 /* Matched non-empty string */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
2538            ||                                           /* or... */
2539            ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
2540             match_count < 0)                            /* no matches */
2541            ) &&                                         /* And... */
2542            ptr >= end_subject &&                     /* Reached end of subject */
2543            ptr > current_subject)                    /* Matched non-empty string */
2544        {        {
2545        if (offsetcount >= 2)        if (offsetcount >= 2)
2546          {          {
2547          offsets[0] = current_subject - start_subject;          offsets[0] = md->start_used_ptr - start_subject;
2548          offsets[1] = end_subject - start_subject;          offsets[1] = end_subject - start_subject;
2549          }          }
2550        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
# Line 1569  for (;;) Line 2553  for (;;)
2553      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
2554        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
2555        rlevel*2-2, SP));        rlevel*2-2, SP));
2556      return match_count;      break;        /* In effect, "return", but see the comment below */
2557      }      }
2558    
2559    /* One or more states are active for the next character. */    /* One or more states are active for the next character. */
# Line 1577  for (;;) Line 2561  for (;;)
2561    ptr += clen;    /* Advance to next subject character */    ptr += clen;    /* Advance to next subject character */
2562    }               /* Loop to move along the subject string */    }               /* Loop to move along the subject string */
2563    
2564  /* Control never gets here, but we must keep the compiler happy. */  /* Control gets here from "break" a few lines above. We do it this way because
2565    if we use "return" above, we have compiler trouble. Some compilers warn if
2566    there's nothing here because they think the function doesn't return a value. On
2567    the other hand, if we put a dummy statement here, some more clever compilers
2568    complain that it can't be reached. Sigh. */
2569    
2570  DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n"  return match_count;
   "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP));  
 return PCRE_ERROR_NOMATCH;  
2571  }  }
2572    
2573    
# Line 1597  is not anchored. Line 2583  is not anchored.
2583    
2584  Arguments:  Arguments:
2585    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2586    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2587    subject         points to the subject string    subject         points to the subject string
2588    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2589    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1613  Returns: > 0 => number of match Line 2599  Returns: > 0 => number of match
2599                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2600  */  */
2601    
2602  EXPORT int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
2603  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2604    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2605    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
2606  {  {
2607  real_pcre *re = (real_pcre *)argument_re;  real_pcre *re = (real_pcre *)argument_re;
2608  dfa_match_data match_block;  dfa_match_data match_block;
2609    dfa_match_data *md = &match_block;
2610  BOOL utf8, anchored, startline, firstline;  BOOL utf8, anchored, startline, firstline;
2611  const uschar *current_subject, *end_subject, *lcc;  const uschar *current_subject, *end_subject, *lcc;
2612    
# Line 1634  BOOL req_byte_caseless = FALSE; Line 2621  BOOL req_byte_caseless = FALSE;
2621  int first_byte = -1;  int first_byte = -1;
2622  int req_byte = -1;  int req_byte = -1;
2623  int req_byte2 = -1;  int req_byte2 = -1;
2624    int newline;
2625    
2626  /* Plausibility checks */  /* Plausibility checks */
2627    
# Line 1648  flipping, so we scan the extra_data bloc Line 2636  flipping, so we scan the extra_data bloc
2636  match block, so we must initialize them beforehand. However, the other fields  match block, so we must initialize them beforehand. However, the other fields
2637  in the match block must not be set until after the byte flipping. */  in the match block must not be set until after the byte flipping. */
2638    
2639  match_block.tables = re->tables;  md->tables = re->tables;
2640  match_block.callout_data = NULL;  md->callout_data = NULL;
2641    
2642  if (extra_data != NULL)  if (extra_data != NULL)
2643    {    {
# Line 1657  if (extra_data != NULL) Line 2645  if (extra_data != NULL)
2645    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
2646      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
2647    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
2648      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
2649        return PCRE_ERROR_DFA_UMLIMIT;
2650    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
2651      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
2652    if ((flags & PCRE_EXTRA_TABLES) != 0)    if ((flags & PCRE_EXTRA_TABLES) != 0)
2653      match_block.tables = extra_data->tables;      md->tables = extra_data->tables;
2654    }    }
2655    
2656  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
2657  test for a regex that was compiled on a host of opposite endianness. If this is  test for a regex that was compiled on a host of opposite endianness. If this is
2658  the case, flipped values are put in internal_re and internal_study if there was  the case, flipped values are put in internal_re and internal_study if there was
# Line 1681  current_subject = (const unsigned char * Line 2671  current_subject = (const unsigned char *
2671  end_subject = (const unsigned char *)subject + length;  end_subject = (const unsigned char *)subject + length;
2672  req_byte_ptr = current_subject - 1;  req_byte_ptr = current_subject - 1;
2673    
2674    #ifdef SUPPORT_UTF8
2675  utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = (re->options & PCRE_UTF8) != 0;
2676  anchored = (options & PCRE_ANCHORED) != 0 || (re->options & PCRE_ANCHORED) != 0;  #else
2677    utf8 = FALSE;
2678    #endif
2679    
2680    anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
2681      (re->options & PCRE_ANCHORED) != 0;
2682    
2683  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
2684    
2685  match_block.start_code = (const uschar *)argument_re +  md->start_code = (const uschar *)argument_re +
2686      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
2687  match_block.start_subject = (const unsigned char *)subject;  md->start_subject = (const unsigned char *)subject;
2688  match_block.end_subject = end_subject;  md->end_subject = end_subject;
2689  match_block.moptions = options;  md->start_offset = start_offset;
2690  match_block.poptions = re->options;  md->moptions = options;
2691    md->poptions = re->options;
2692    
2693    /* If the BSR option is not set at match time, copy what was set
2694    at compile time. */
2695    
2696    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2697      {
2698      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2699        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2700    #ifdef BSR_ANYCRLF
2701      else md->moptions |= PCRE_BSR_ANYCRLF;
2702    #endif
2703      }
2704    
2705    /* Handle different types of newline. The three bits give eight cases. If
2706    nothing is set at run time, whatever was used at compile time applies. */
2707    
2708    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2709             PCRE_NEWLINE_BITS)
2710      {
2711      case 0: newline = NEWLINE; break;   /* Compile-time default */
2712      case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
2713      case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
2714      case PCRE_NEWLINE_CR+
2715           PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
2716      case PCRE_NEWLINE_ANY: newline = -1; break;
2717      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2718      default: return PCRE_ERROR_BADNEWLINE;
2719      }
2720    
2721    if (newline == -2)
2722      {
2723      md->nltype = NLTYPE_ANYCRLF;
2724      }
2725    else if (newline < 0)
2726      {
2727      md->nltype = NLTYPE_ANY;
2728      }
2729    else
2730      {
2731      md->nltype = NLTYPE_FIXED;
2732      if (newline > 255)
2733        {
2734        md->nllen = 2;
2735        md->nl[0] = (newline >> 8) & 255;
2736        md->nl[1] = newline & 255;
2737        }
2738      else
2739        {
2740        md->nllen = 1;
2741        md->nl[0] = newline;
2742        }
2743      }
2744    
2745  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
2746  back the character offset. */  back the character offset. */
# Line 1717  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 2766  if (utf8 && (options & PCRE_NO_UTF8_CHEC
2766  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
2767  in other programs later. */  in other programs later. */
2768    
2769  if (match_block.tables == NULL) match_block.tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = _pcre_default_tables;
2770    
2771  /* The lower casing table and the "must be at the start of a line" flag are  /* The lower casing table and the "must be at the start of a line" flag are
2772  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2773    
2774  lcc = match_block.tables + lcc_offset;  lcc = md->tables + lcc_offset;
2775  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2776  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2777    
2778  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 1734  studied, there may be a bitmap of possib Line 2783  studied, there may be a bitmap of possib
2783    
2784  if (!anchored)  if (!anchored)
2785    {    {
2786    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2787      {      {
2788      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2789      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 1742  if (!anchored) Line 2791  if (!anchored)
2791      }      }
2792    else    else
2793      {      {
2794      if (startline && study != NULL &&      if (!startline && study != NULL &&
2795           (study->options & PCRE_STUDY_MAPPED) != 0)           (study->flags & PCRE_STUDY_MAPPED) != 0)
2796        start_bits = study->start_bits;        start_bits = study->start_bits;
2797      }      }
2798    }    }
# Line 1751  if (!anchored) Line 2800  if (!anchored)
2800  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2801  character" set. */  character" set. */
2802    
2803  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2804    {    {
2805    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2806    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
2807    req_byte2 = (match_block.tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
2808    }    }
2809    
2810  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
2811  failed match. Unless restarting, optimize by moving to the first match  failed match. If not restarting, perform certain optimizations at the start of
2812  character if possible, when not anchored. Then unless wanting a partial match,  a match. */
 check for a required later character. */  
2813    
2814  for (;;)  for (;;)
2815    {    {
# Line 1771  for (;;) Line 2819  for (;;)
2819      {      {
2820      const uschar *save_end_subject = end_subject;      const uschar *save_end_subject = end_subject;
2821    
2822      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* If firstline is TRUE, the start of the match is constrained to the first
2823      start of the match is constrained to the first line of a multiline string.      line of a multiline string. Implement this by temporarily adjusting
2824      Implement this by temporarily adjusting end_subject so that we stop scanning      end_subject so that we stop scanning at a newline. If the match fails at
2825      at a newline. If the match fails at the newline, later code breaks this loop.      the newline, later code breaks this loop. */
     */  
2826    
2827      if (firstline)      if (firstline)
2828        {        {
2829        const uschar *t = current_subject;        USPTR t = current_subject;
2830        while (t < save_end_subject && *t != '\n') t++;  #ifdef SUPPORT_UTF8
2831        end_subject = t;        if (utf8)
       }  
   
     if (first_byte >= 0)  
       {  
       if (first_byte_caseless)  
         while (current_subject < end_subject &&  
                lcc[*current_subject] != first_byte)  
           current_subject++;  
       else  
         while (current_subject < end_subject && *current_subject != first_byte)  
           current_subject++;  
       }  
   
     /* Or to just after \n for a multiline match if possible */  
   
     else if (startline)  
       {  
       if (current_subject > match_block.start_subject + start_offset)  
2832          {          {
2833          while (current_subject < end_subject && current_subject[-1] != NEWLINE)          while (t < md->end_subject && !IS_NEWLINE(t))
2834            current_subject++;            {
2835              t++;
2836              while (t < end_subject && (*t & 0xc0) == 0x80) t++;
2837              }
2838          }          }
2839          else
2840    #endif
2841          while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2842          end_subject = t;
2843        }        }
2844    
2845      /* Or to a non-unique first char after study */      /* There are some optimizations that avoid running the match if a known
2846        starting point is not found. However, there is an option that disables
2847        these, for testing and for ensuring that all callouts do actually occur. */
2848    
2849      else if (start_bits != NULL)      if ((options & PCRE_NO_START_OPTIMIZE) == 0)
2850        {        {
2851        while (current_subject < end_subject)        /* Advance to a known first byte. */
2852    
2853          if (first_byte >= 0)
2854          {          {
2855          register unsigned int c = *current_subject;          if (first_byte_caseless)
2856          if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;            while (current_subject < end_subject &&
2857            else break;                   lcc[*current_subject] != first_byte)
2858                current_subject++;
2859            else
2860              while (current_subject < end_subject &&
2861                     *current_subject != first_byte)
2862                current_subject++;
2863          }          }
       }  
   
     /* Restore fudged end_subject */  
2864    
2865      end_subject = save_end_subject;        /* Or to just after a linebreak for a multiline match if possible */
     }  
   
   /* If req_byte is set, we know that that character must appear in the subject  
   for the match to succeed. If the first character is set, req_byte must be  
   later in the subject; otherwise the test starts at the match point. This  
   optimization can save a huge amount of work in patterns with nested unlimited  
   repeats that aren't going to match. Writing separate code for cased/caseless  
   versions makes it go faster, as does using an autoincrement and backing off  
   on a match.  
   
   HOWEVER: when the subject string is very, very long, searching to its end can  
   take a long time, and give bad performance on quite ordinary patterns. This  
   showed up when somebody was matching /^C/ on a 32-megabyte string... so we  
   don't do this when the string is sufficiently long.  
   
   ALSO: this processing is disabled when partial matching is requested.  
   */  
   
   if (req_byte >= 0 &&  
       end_subject - current_subject < REQ_BYTE_MAX &&  
       (options & PCRE_PARTIAL) == 0)  
     {  
     register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);  
2866    
2867      /* We don't need to repeat the search if we haven't yet reached the        else if (startline)
     place we found it at last time. */  
   
     if (p > req_byte_ptr)  
       {  
       if (req_byte_caseless)  
2868          {          {
2869          while (p < end_subject)          if (current_subject > md->start_subject + start_offset)
2870            {            {
2871            register int pp = *p++;  #ifdef SUPPORT_UTF8
2872            if (pp == req_byte || pp == req_byte2) { p--; break; }            if (utf8)
2873                {
2874                while (current_subject < end_subject &&
2875                       !WAS_NEWLINE(current_subject))
2876                  {
2877                  current_subject++;
2878                  while(current_subject < end_subject &&
2879                        (*current_subject & 0xc0) == 0x80)
2880                    current_subject++;
2881                  }
2882                }
2883              else
2884    #endif
2885              while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2886                current_subject++;
2887    
2888              /* If we have just passed a CR and the newline option is ANY or
2889              ANYCRLF, and we are now at a LF, advance the match position by one
2890              more character. */
2891    
2892              if (current_subject[-1] == CHAR_CR &&
2893                   (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2894                   current_subject < end_subject &&
2895                   *current_subject == CHAR_NL)
2896                current_subject++;
2897            }            }
2898          }          }
2899        else  
2900          /* Or to a non-unique first char after study */
2901    
2902          else if (start_bits != NULL)
2903          {          {
2904          while (p < end_subject)          while (current_subject < end_subject)
2905            {            {
2906            if (*p++ == req_byte) { p--; break; }            register unsigned int c = *current_subject;
2907              if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
2908                else break;
2909            }            }
2910          }          }
2911          }
2912    
2913        /* If we can't find the required character, break the matching loop,      /* Restore fudged end_subject */
       which will cause a return or PCRE_ERROR_NOMATCH. */  
   
       if (p >= end_subject) break;  
2914    
2915        /* If we have found the required character, save the point where we      end_subject = save_end_subject;
       found it, so that we don't search again next time round the loop if  
       the start hasn't passed this character yet. */  
2916    
2917        req_byte_ptr = p;      /* The following two optimizations are disabled for partial matching or if
2918        disabling is explicitly requested (and of course, by the test above, this
2919        code is not obeyed when restarting after a partial match). */
2920    
2921        if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
2922            (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
2923          {
2924          /* If the pattern was studied, a minimum subject length may be set. This
2925          is a lower bound; no actual string of that length may actually match the
2926          pattern. Although the value is, strictly, in characters, we treat it as
2927          bytes to avoid spending too much time in this optimization. */
2928    
2929          if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
2930              end_subject - current_subject < study->minlength)
2931            return PCRE_ERROR_NOMATCH;
2932    
2933          /* If req_byte is set, we know that that character must appear in the
2934          subject for the match to succeed. If the first character is set, req_byte
2935          must be later in the subject; otherwise the test starts at the match
2936          point. This optimization can save a huge amount of work in patterns with
2937          nested unlimited repeats that aren't going to match. Writing separate
2938          code for cased/caseless versions makes it go faster, as does using an
2939          autoincrement and backing off on a match.
2940    
2941          HOWEVER: when the subject string is very, very long, searching to its end
2942          can take a long time, and give bad performance on quite ordinary
2943          patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
2944          string... so we don't do this when the string is sufficiently long. */
2945    
2946          if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
2947            {
2948            register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
2949    
2950            /* We don't need to repeat the search if we haven't yet reached the
2951            place we found it at last time. */
2952    
2953            if (p > req_byte_ptr)
2954              {
2955              if (req_byte_caseless)
2956                {
2957                while (p < end_subject)
2958                  {
2959                  register int pp = *p++;
2960                  if (pp == req_byte || pp == req_byte2) { p--; break; }
2961                  }
2962                }
2963              else
2964                {
2965                while (p < end_subject)
2966                  {
2967                  if (*p++ == req_byte) { p--; break; }
2968                  }
2969                }
2970    
2971              /* If we can't find the required character, break the matching loop,
2972              which will cause a return or PCRE_ERROR_NOMATCH. */
2973    
2974              if (p >= end_subject) break;
2975    
2976              /* If we have found the required character, save the point where we
2977              found it, so that we don't search again next time round the loop if
2978              the start hasn't passed this character yet. */
2979    
2980              req_byte_ptr = p;
2981              }
2982            }
2983        }        }
2984      }      }   /* End of optimizations that are done when not restarting */
2985    
2986    /* OK, now we can do the business */    /* OK, now we can do the business */
2987    
2988      md->start_used_ptr = current_subject;
2989    
2990    rc = internal_dfa_exec(    rc = internal_dfa_exec(
2991      &match_block,                              /* fixed match data */      md,                                /* fixed match data */
2992      match_block.start_code,                    /* this subexpression's code */      md->start_code,                    /* this subexpression's code */
2993      current_subject,                           /* where we currently are */      current_subject,                   /* where we currently are */
2994      start_offset,                              /* start offset in subject */      start_offset,                      /* start offset in subject */
2995      offsets,                                   /* offset vector */      offsets,                           /* offset vector */
2996      offsetcount,                               /* size of same */      offsetcount,                       /* size of same */
2997      workspace,                                 /* workspace vector */      workspace,                         /* workspace vector */
2998      wscount,                                   /* size of same */      wscount,                           /* size of same */
2999      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
3000      0,                                         /* function recurse level */      0,                                 /* function recurse level */
3001      0);                                        /* regex recurse level */      0);                                /* regex recurse level */
3002    
3003    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3004    on only if not anchored. */    on only if not anchored. */
# Line 1902  for (;;) Line 3008  for (;;)
3008    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
3009    and firstline is set. */    and firstline is set. */
3010    
3011    if (firstline && *current_subject == NEWLINE) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3012    current_subject++;    current_subject++;
   
 #ifdef SUPPORT_UTF8  
3013    if (utf8)    if (utf8)
3014      {      {
3015      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
3016        current_subject++;        current_subject++;
3017      }      }
 #endif  
   
3018    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3019    }  
3020      /* If we have just passed a CR and we are now at a LF, and the pattern does
3021      not contain any explicit matches for \r or \n, and the newline option is CRLF
3022      or ANY or ANYCRLF, advance the match position by one more character. */
3023    
3024      if (current_subject[-1] == CHAR_CR &&
3025          current_subject < end_subject &&
3026          *current_subject == CHAR_NL &&
3027          (re->flags & PCRE_HASCRORLF) == 0 &&
3028            (md->nltype == NLTYPE_ANY ||
3029             md->nltype == NLTYPE_ANYCRLF ||
3030             md->nllen == 2))
3031        current_subject++;
3032    
3033      }   /* "Bumpalong" loop */
3034    
3035  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
3036  }  }

Legend:
Removed from v.77  
changed lines
  Added in v.459

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12