/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 461 - (hide annotations) (download)
Mon Oct 5 10:59:35 2009 UTC (5 years ago) by ph10
File MIME type: text/plain
File size: 102172 byte(s)
Tidy up, remove trailing spaces, etc. for 8.00-RC1.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6 ph10 392 and semantics are as close as possible to those of the Perl 5 language (but see
7 ph10 383 below for why this module is different).
8 nigel 77
9     Written by Philip Hazel
10 ph10 383 Copyright (c) 1997-2009 University of Cambridge
11 nigel 77
12     -----------------------------------------------------------------------------
13     Redistribution and use in source and binary forms, with or without
14     modification, are permitted provided that the following conditions are met:
15    
16     * Redistributions of source code must retain the above copyright notice,
17     this list of conditions and the following disclaimer.
18    
19     * Redistributions in binary form must reproduce the above copyright
20     notice, this list of conditions and the following disclaimer in the
21     documentation and/or other materials provided with the distribution.
22    
23     * Neither the name of the University of Cambridge nor the names of its
24     contributors may be used to endorse or promote products derived from
25     this software without specific prior written permission.
26    
27     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37     POSSIBILITY OF SUCH DAMAGE.
38     -----------------------------------------------------------------------------
39     */
40    
41    
42     /* This module contains the external function pcre_dfa_exec(), which is an
43 nigel 93 alternative matching function that uses a sort of DFA algorithm (not a true
44     FSM). This is NOT Perl- compatible, but it has advantages in certain
45     applications. */
46 nigel 77
47    
48 ph10 461 /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
49     the performance of his patterns greatly. I could not use it as it stood, as it
50     was not thread safe, and made assumptions about pattern sizes. Also, it caused
51 ph10 439 test 7 to loop, and test 9 to crash with a segfault.
52    
53     The issue is the check for duplicate states, which is done by a simple linear
54     search up the state list. (Grep for "duplicate" below to find the code.) For
55     many patterns, there will never be many states active at one time, so a simple
56     linear search is fine. In patterns that have many active states, it might be a
57     bottleneck. The suggested code used an indexing scheme to remember which states
58     had previously been used for each character, and avoided the linear search when
59     it knew there was no chance of a duplicate. This was implemented when adding
60     states to the state lists.
61    
62     I wrote some thread-safe, not-limited code to try something similar at the time
63     of checking for duplicates (instead of when adding states), using index vectors
64     on the stack. It did give a 13% improvement with one specially constructed
65     pattern for certain subject strings, but on other strings and on many of the
66     simpler patterns in the test suite it did worse. The major problem, I think,
67     was the extra time to initialize the index. This had to be done for each call
68     of internal_dfa_exec(). (The supplied patch used a static vector, initialized
69     only once - I suspect this was the cause of the problems with the tests.)
70    
71 ph10 461 Overall, I concluded that the gains in some cases did not outweigh the losses
72 ph10 439 in others, so I abandoned this code. */
73    
74    
75    
76 ph10 200 #ifdef HAVE_CONFIG_H
77 ph10 236 #include "config.h"
78 ph10 200 #endif
79 ph10 199
80 nigel 93 #define NLBLOCK md /* Block containing newline information */
81     #define PSSTART start_subject /* Field containing processed string start */
82     #define PSEND end_subject /* Field containing processed string end */
83    
84 nigel 77 #include "pcre_internal.h"
85    
86    
87     /* For use to indent debugging output */
88    
89     #define SP " "
90    
91    
92     /*************************************************
93     * Code parameters and static tables *
94     *************************************************/
95    
96     /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
97 nigel 93 into others, under special conditions. A gap of 20 between the blocks should be
98 ph10 182 enough. The resulting opcodes don't have to be less than 256 because they are
99 ph10 178 never stored, so we push them well clear of the normal opcodes. */
100 nigel 77
101 ph10 178 #define OP_PROP_EXTRA 300
102     #define OP_EXTUNI_EXTRA 320
103     #define OP_ANYNL_EXTRA 340
104     #define OP_HSPACE_EXTRA 360
105     #define OP_VSPACE_EXTRA 380
106 nigel 77
107    
108     /* This table identifies those opcodes that are followed immediately by a
109     character that is to be tested in some way. This makes is possible to
110     centralize the loading of these characters. In the case of Type * etc, the
111     "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
112 ph10 172 small value. ***NOTE*** If the start of this table is modified, the two tables
113 ph10 168 that follow must also be modified. */
114 nigel 77
115 ph10 327 static const uschar coptable[] = {
116 nigel 77 0, /* End */
117 ph10 168 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
118     0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
119 ph10 341 0, 0, 0, /* Any, AllAny, Anybyte */
120 ph10 178 0, 0, 0, /* NOTPROP, PROP, EXTUNI */
121     0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
122 nigel 77 0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
123     1, /* Char */
124     1, /* Charnc */
125     1, /* not */
126     /* Positive single-char repeats */
127     1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
128     3, 3, 3, /* upto, minupto, exact */
129 nigel 93 1, 1, 1, 3, /* *+, ++, ?+, upto+ */
130 nigel 77 /* Negative single-char repeats - only for chars < 256 */
131     1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
132     3, 3, 3, /* NOT upto, minupto, exact */
133 nigel 93 1, 1, 1, 3, /* NOT *+, ++, ?+, updo+ */
134 nigel 77 /* Positive type repeats */
135     1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
136     3, 3, 3, /* Type upto, minupto, exact */
137 nigel 93 1, 1, 1, 3, /* Type *+, ++, ?+, upto+ */
138 nigel 77 /* Character class & ref repeats */
139     0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
140     0, 0, /* CRRANGE, CRMINRANGE */
141     0, /* CLASS */
142     0, /* NCLASS */
143     0, /* XCLASS - variable length */
144     0, /* REF */
145     0, /* RECURSE */
146     0, /* CALLOUT */
147     0, /* Alt */
148     0, /* Ket */
149     0, /* KetRmax */
150     0, /* KetRmin */
151     0, /* Assert */
152     0, /* Assert not */
153     0, /* Assert behind */
154     0, /* Assert behind not */
155     0, /* Reverse */
156 nigel 93 0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */
157     0, 0, 0, /* SBRA, SCBRA, SCOND */
158 nigel 77 0, /* CREF */
159 nigel 93 0, /* RREF */
160     0, /* DEF */
161 ph10 210 0, 0, /* BRAZERO, BRAMINZERO */
162     0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */
163 ph10 341 0, 0, 0 /* FAIL, ACCEPT, SKIPZERO */
164 nigel 77 };
165    
166     /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
167     and \w */
168    
169 ph10 327 static const uschar toptable1[] = {
170 ph10 168 0, 0, 0, 0, 0, 0,
171 nigel 77 ctype_digit, ctype_digit,
172     ctype_space, ctype_space,
173     ctype_word, ctype_word,
174 ph10 341 0, 0 /* OP_ANY, OP_ALLANY */
175 nigel 77 };
176    
177 ph10 327 static const uschar toptable2[] = {
178 ph10 168 0, 0, 0, 0, 0, 0,
179 nigel 77 ctype_digit, 0,
180     ctype_space, 0,
181     ctype_word, 0,
182 ph10 341 1, 1 /* OP_ANY, OP_ALLANY */
183 nigel 77 };
184    
185    
186     /* Structure for holding data about a particular state, which is in effect the
187     current data for an active path through the match tree. It must consist
188     entirely of ints because the working vector we are passed, and which we put
189     these structures in, is a vector of ints. */
190    
191     typedef struct stateblock {
192     int offset; /* Offset to opcode */
193     int count; /* Count for repeats */
194     int ims; /* ims flag bits */
195     int data; /* Some use extra data */
196     } stateblock;
197    
198     #define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
199    
200    
201     #ifdef DEBUG
202     /*************************************************
203     * Print character string *
204     *************************************************/
205    
206     /* Character string printing function for debugging.
207    
208     Arguments:
209     p points to string
210     length number of bytes
211     f where to print
212    
213     Returns: nothing
214     */
215    
216     static void
217     pchars(unsigned char *p, int length, FILE *f)
218     {
219     int c;
220     while (length-- > 0)
221     {
222     if (isprint(c = *(p++)))
223     fprintf(f, "%c", c);
224     else
225     fprintf(f, "\\x%02x", c);
226     }
227     }
228     #endif
229    
230    
231    
232     /*************************************************
233     * Execute a Regular Expression - DFA engine *
234     *************************************************/
235    
236     /* This internal function applies a compiled pattern to a subject string,
237     starting at a given point, using a DFA engine. This function is called from the
238     external one, possibly multiple times if the pattern is not anchored. The
239     function calls itself recursively for some kinds of subpattern.
240    
241     Arguments:
242     md the match_data block with fixed information
243     this_start_code the opening bracket of this subexpression's code
244     current_subject where we currently are in the subject string
245     start_offset start offset in the subject string
246     offsets vector to contain the matching string offsets
247     offsetcount size of same
248     workspace vector of workspace
249     wscount size of same
250     ims the current ims flags
251     rlevel function call recursion level
252     recursing regex recursive call level
253    
254 ph10 345 Returns: > 0 => number of match offset pairs placed in offsets
255 ph10 341 = 0 => offsets overflowed; longest matches are present
256 nigel 77 -1 => failed to match
257     < -1 => some kind of unexpected problem
258    
259     The following macros are used for adding states to the two state vectors (one
260     for the current character, one for the following character). */
261    
262     #define ADD_ACTIVE(x,y) \
263     if (active_count++ < wscount) \
264     { \
265     next_active_state->offset = (x); \
266     next_active_state->count = (y); \
267     next_active_state->ims = ims; \
268     next_active_state++; \
269     DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
270     } \
271     else return PCRE_ERROR_DFA_WSSIZE
272    
273     #define ADD_ACTIVE_DATA(x,y,z) \
274     if (active_count++ < wscount) \
275     { \
276     next_active_state->offset = (x); \
277     next_active_state->count = (y); \
278     next_active_state->ims = ims; \
279     next_active_state->data = (z); \
280     next_active_state++; \
281     DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
282     } \
283     else return PCRE_ERROR_DFA_WSSIZE
284    
285     #define ADD_NEW(x,y) \
286     if (new_count++ < wscount) \
287     { \
288     next_new_state->offset = (x); \
289     next_new_state->count = (y); \
290     next_new_state->ims = ims; \
291     next_new_state++; \
292     DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
293     } \
294     else return PCRE_ERROR_DFA_WSSIZE
295    
296     #define ADD_NEW_DATA(x,y,z) \
297     if (new_count++ < wscount) \
298     { \
299     next_new_state->offset = (x); \
300     next_new_state->count = (y); \
301     next_new_state->ims = ims; \
302     next_new_state->data = (z); \
303     next_new_state++; \
304     DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
305     } \
306     else return PCRE_ERROR_DFA_WSSIZE
307    
308     /* And now, here is the code */
309    
310     static int
311     internal_dfa_exec(
312     dfa_match_data *md,
313     const uschar *this_start_code,
314     const uschar *current_subject,
315     int start_offset,
316     int *offsets,
317     int offsetcount,
318     int *workspace,
319     int wscount,
320     int ims,
321     int rlevel,
322     int recursing)
323     {
324     stateblock *active_states, *new_states, *temp_states;
325     stateblock *next_active_state, *next_new_state;
326    
327     const uschar *ctypes, *lcc, *fcc;
328     const uschar *ptr;
329 nigel 93 const uschar *end_code, *first_op;
330 nigel 77
331     int active_count, new_count, match_count;
332    
333     /* Some fields in the md block are frequently referenced, so we load them into
334     independent variables in the hope that this will perform better. */
335    
336     const uschar *start_subject = md->start_subject;
337     const uschar *end_subject = md->end_subject;
338     const uschar *start_code = md->start_code;
339    
340 nigel 87 #ifdef SUPPORT_UTF8
341 nigel 77 BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
342 nigel 93 #else
343     BOOL utf8 = FALSE;
344 nigel 87 #endif
345 nigel 77
346     rlevel++;
347     offsetcount &= (-2);
348    
349     wscount -= 2;
350     wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
351     (2 * INTS_PER_STATEBLOCK);
352    
353     DPRINTF(("\n%.*s---------------------\n"
354     "%.*sCall to internal_dfa_exec f=%d r=%d\n",
355     rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));
356    
357     ctypes = md->tables + ctypes_offset;
358     lcc = md->tables + lcc_offset;
359     fcc = md->tables + fcc_offset;
360    
361     match_count = PCRE_ERROR_NOMATCH; /* A negative number */
362    
363     active_states = (stateblock *)(workspace + 2);
364     next_new_state = new_states = active_states + wscount;
365     new_count = 0;
366    
367 nigel 93 first_op = this_start_code + 1 + LINK_SIZE +
368     ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
369    
370 nigel 77 /* The first thing in any (sub) pattern is a bracket of some sort. Push all
371     the alternative states onto the list, and find out where the end is. This
372     makes is possible to use this function recursively, when we want to stop at a
373     matching internal ket rather than at the end.
374    
375     If the first opcode in the first alternative is OP_REVERSE, we are dealing with
376     a backward assertion. In that case, we have to find out the maximum amount to
377     move back, and set up each alternative appropriately. */
378    
379 nigel 93 if (*first_op == OP_REVERSE)
380 nigel 77 {
381     int max_back = 0;
382     int gone_back;
383    
384     end_code = this_start_code;
385     do
386     {
387     int back = GET(end_code, 2+LINK_SIZE);
388     if (back > max_back) max_back = back;
389     end_code += GET(end_code, 1);
390     }
391     while (*end_code == OP_ALT);
392    
393     /* If we can't go back the amount required for the longest lookbehind
394     pattern, go back as far as we can; some alternatives may still be viable. */
395    
396     #ifdef SUPPORT_UTF8
397     /* In character mode we have to step back character by character */
398    
399     if (utf8)
400     {
401     for (gone_back = 0; gone_back < max_back; gone_back++)
402     {
403     if (current_subject <= start_subject) break;
404     current_subject--;
405     while (current_subject > start_subject &&
406     (*current_subject & 0xc0) == 0x80)
407     current_subject--;
408     }
409     }
410     else
411     #endif
412    
413     /* In byte-mode we can do this quickly. */
414    
415     {
416     gone_back = (current_subject - max_back < start_subject)?
417     current_subject - start_subject : max_back;
418     current_subject -= gone_back;
419     }
420 ph10 461
421 ph10 435 /* Save the earliest consulted character */
422 nigel 77
423 ph10 461 if (current_subject < md->start_used_ptr)
424     md->start_used_ptr = current_subject;
425    
426 nigel 77 /* Now we can process the individual branches. */
427    
428     end_code = this_start_code;
429     do
430     {
431     int back = GET(end_code, 2+LINK_SIZE);
432     if (back <= gone_back)
433     {
434     int bstate = end_code - start_code + 2 + 2*LINK_SIZE;
435     ADD_NEW_DATA(-bstate, 0, gone_back - back);
436     }
437     end_code += GET(end_code, 1);
438     }
439     while (*end_code == OP_ALT);
440     }
441    
442     /* This is the code for a "normal" subpattern (not a backward assertion). The
443     start of a whole pattern is always one of these. If we are at the top level,
444     we may be asked to restart matching from the same point that we reached for a
445     previous partial match. We still have to scan through the top-level branches to
446     find the end state. */
447    
448     else
449     {
450     end_code = this_start_code;
451    
452     /* Restarting */
453    
454     if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
455     {
456     do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
457     new_count = workspace[1];
458     if (!workspace[0])
459     memcpy(new_states, active_states, new_count * sizeof(stateblock));
460     }
461    
462     /* Not restarting */
463    
464     else
465     {
466 nigel 93 int length = 1 + LINK_SIZE +
467     ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
468 nigel 77 do
469     {
470 nigel 93 ADD_NEW(end_code - start_code + length, 0);
471 nigel 77 end_code += GET(end_code, 1);
472 nigel 93 length = 1 + LINK_SIZE;
473 nigel 77 }
474     while (*end_code == OP_ALT);
475     }
476     }
477    
478     workspace[0] = 0; /* Bit indicating which vector is current */
479    
480     DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));
481    
482     /* Loop for scanning the subject */
483    
484     ptr = current_subject;
485     for (;;)
486     {
487     int i, j;
488 nigel 91 int clen, dlen;
489     unsigned int c, d;
490 ph10 428 int forced_fail = 0;
491 ph10 461 int reached_end = 0;
492 nigel 77
493     /* Make the new state list into the active state list and empty the
494     new state list. */
495    
496     temp_states = active_states;
497     active_states = new_states;
498     new_states = temp_states;
499     active_count = new_count;
500     new_count = 0;
501    
502     workspace[0] ^= 1; /* Remember for the restarting feature */
503     workspace[1] = active_count;
504    
505     #ifdef DEBUG
506     printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
507     pchars((uschar *)ptr, strlen((char *)ptr), stdout);
508     printf("\"\n");
509    
510     printf("%.*sActive states: ", rlevel*2-2, SP);
511     for (i = 0; i < active_count; i++)
512     printf("%d/%d ", active_states[i].offset, active_states[i].count);
513     printf("\n");
514     #endif
515    
516     /* Set the pointers for adding new states */
517    
518     next_active_state = active_states + active_count;
519     next_new_state = new_states;
520    
521     /* Load the current character from the subject outside the loop, as many
522     different states may want to look at it, and we assume that at least one
523     will. */
524    
525     if (ptr < end_subject)
526     {
527 nigel 93 clen = 1; /* Number of bytes in the character */
528 nigel 77 #ifdef SUPPORT_UTF8
529     if (utf8) { GETCHARLEN(c, ptr, clen); } else
530     #endif /* SUPPORT_UTF8 */
531     c = *ptr;
532     }
533     else
534     {
535 nigel 93 clen = 0; /* This indicates the end of the subject */
536     c = NOTACHAR; /* This value should never actually be used */
537 nigel 77 }
538    
539     /* Scan up the active states and act on each one. The result of an action
540     may be to add more states to the currently active list (e.g. on hitting a
541     parenthesis) or it may be to put states on the new list, for considering
542     when we move the character pointer on. */
543    
544     for (i = 0; i < active_count; i++)
545     {
546     stateblock *current_state = active_states + i;
547     const uschar *code;
548     int state_offset = current_state->offset;
549 ph10 397 int count, codevalue, rrc;
550 nigel 77
551     #ifdef DEBUG
552     printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
553 nigel 93 if (clen == 0) printf("EOL\n");
554 nigel 77 else if (c > 32 && c < 127) printf("'%c'\n", c);
555     else printf("0x%02x\n", c);
556     #endif
557    
558     /* This variable is referred to implicity in the ADD_xxx macros. */
559    
560     ims = current_state->ims;
561    
562     /* A negative offset is a special case meaning "hold off going to this
563     (negated) state until the number of characters in the data field have
564     been skipped". */
565    
566     if (state_offset < 0)
567     {
568     if (current_state->data > 0)
569     {
570     DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
571     ADD_NEW_DATA(state_offset, current_state->count,
572     current_state->data - 1);
573     continue;
574     }
575     else
576     {
577     current_state->offset = state_offset = -state_offset;
578     }
579     }
580    
581 ph10 461 /* Check for a duplicate state with the same count, and skip if found.
582 ph10 439 See the note at the head of this module about the possibility of improving
583     performance here. */
584 nigel 77
585     for (j = 0; j < i; j++)
586     {
587     if (active_states[j].offset == state_offset &&
588     active_states[j].count == current_state->count)
589     {
590     DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
591     goto NEXT_ACTIVE_STATE;
592     }
593     }
594    
595     /* The state offset is the offset to the opcode */
596    
597     code = start_code + state_offset;
598     codevalue = *code;
599    
600     /* If this opcode is followed by an inline character, load it. It is
601     tempting to test for the presence of a subject character here, but that
602     is wrong, because sometimes zero repetitions of the subject are
603     permitted.
604    
605     We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
606 ph10 178 argument that is not a data character - but is always one byte long. We
607     have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in
608     this case. To keep the other cases fast, convert these ones to new opcodes.
609     */
610 nigel 77
611     if (coptable[codevalue] > 0)
612     {
613     dlen = 1;
614     #ifdef SUPPORT_UTF8
615     if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
616     #endif /* SUPPORT_UTF8 */
617     d = code[coptable[codevalue]];
618     if (codevalue >= OP_TYPESTAR)
619     {
620 nigel 93 switch(d)
621     {
622     case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
623     case OP_NOTPROP:
624     case OP_PROP: codevalue += OP_PROP_EXTRA; break;
625     case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
626     case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
627 ph10 178 case OP_NOT_HSPACE:
628 ph10 182 case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
629 ph10 178 case OP_NOT_VSPACE:
630 ph10 182 case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
631 nigel 93 default: break;
632     }
633 nigel 77 }
634     }
635     else
636     {
637     dlen = 0; /* Not strictly necessary, but compilers moan */
638 nigel 93 d = NOTACHAR; /* if these variables are not set. */
639 nigel 77 }
640    
641    
642     /* Now process the individual opcodes */
643    
644     switch (codevalue)
645     {
646    
647     /* ========================================================================== */
648     /* Reached a closing bracket. If not at the end of the pattern, carry
649     on with the next opcode. Otherwise, unless we have an empty string and
650 ph10 461 PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
651 ph10 442 start of the subject, save the match data, shifting up all previous
652 nigel 77 matches so we always have the longest first. */
653    
654     case OP_KET:
655     case OP_KETRMIN:
656     case OP_KETRMAX:
657     if (code != end_code)
658     {
659     ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
660     if (codevalue != OP_KET)
661     {
662     ADD_ACTIVE(state_offset - GET(code, 1), 0);
663     }
664     }
665 ph10 461 else
666 nigel 77 {
667 ph10 461 reached_end++; /* Count branches that reach the end */
668     if (ptr > current_subject ||
669 ph10 442 ((md->moptions & PCRE_NOTEMPTY) == 0 &&
670     ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
671     current_subject > start_subject + md->start_offset)))
672 nigel 77 {
673 ph10 428 if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
674     else if (match_count > 0 && ++match_count * 2 >= offsetcount)
675     match_count = 0;
676     count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
677     if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
678     if (offsetcount >= 2)
679     {
680     offsets[0] = current_subject - start_subject;
681     offsets[1] = ptr - start_subject;
682     DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
683     offsets[1] - offsets[0], current_subject));
684     }
685     if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
686     {
687     DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
688     "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
689     match_count, rlevel*2-2, SP));
690     return match_count;
691     }
692 ph10 461 }
693 nigel 77 }
694     break;
695    
696     /* ========================================================================== */
697     /* These opcodes add to the current list of states without looking
698     at the current character. */
699    
700     /*-----------------------------------------------------------------*/
701     case OP_ALT:
702     do { code += GET(code, 1); } while (*code == OP_ALT);
703     ADD_ACTIVE(code - start_code, 0);
704     break;
705    
706     /*-----------------------------------------------------------------*/
707     case OP_BRA:
708 nigel 93 case OP_SBRA:
709 nigel 77 do
710     {
711     ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
712     code += GET(code, 1);
713     }
714     while (*code == OP_ALT);
715     break;
716    
717     /*-----------------------------------------------------------------*/
718 nigel 93 case OP_CBRA:
719     case OP_SCBRA:
720     ADD_ACTIVE(code - start_code + 3 + LINK_SIZE, 0);
721     code += GET(code, 1);
722     while (*code == OP_ALT)
723     {
724     ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
725     code += GET(code, 1);
726     }
727     break;
728    
729     /*-----------------------------------------------------------------*/
730 nigel 77 case OP_BRAZERO:
731     case OP_BRAMINZERO:
732     ADD_ACTIVE(state_offset + 1, 0);
733     code += 1 + GET(code, 2);
734     while (*code == OP_ALT) code += GET(code, 1);
735     ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
736     break;
737    
738     /*-----------------------------------------------------------------*/
739 ph10 335 case OP_SKIPZERO:
740     code += 1 + GET(code, 2);
741     while (*code == OP_ALT) code += GET(code, 1);
742     ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
743     break;
744    
745     /*-----------------------------------------------------------------*/
746 nigel 77 case OP_CIRC:
747     if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
748 nigel 91 ((ims & PCRE_MULTILINE) != 0 &&
749     ptr != end_subject &&
750 nigel 93 WAS_NEWLINE(ptr)))
751 nigel 77 { ADD_ACTIVE(state_offset + 1, 0); }
752     break;
753    
754     /*-----------------------------------------------------------------*/
755     case OP_EOD:
756     if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }
757     break;
758    
759     /*-----------------------------------------------------------------*/
760     case OP_OPT:
761     ims = code[1];
762     ADD_ACTIVE(state_offset + 2, 0);
763     break;
764    
765     /*-----------------------------------------------------------------*/
766     case OP_SOD:
767     if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
768     break;
769    
770     /*-----------------------------------------------------------------*/
771     case OP_SOM:
772     if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
773     break;
774    
775    
776     /* ========================================================================== */
777     /* These opcodes inspect the next subject character, and sometimes
778     the previous one as well, but do not have an argument. The variable
779     clen contains the length of the current character and is zero if we are
780     at the end of the subject. */
781    
782     /*-----------------------------------------------------------------*/
783     case OP_ANY:
784 ph10 342 if (clen > 0 && !IS_NEWLINE(ptr))
785 nigel 77 { ADD_NEW(state_offset + 1, 0); }
786     break;
787    
788     /*-----------------------------------------------------------------*/
789 ph10 341 case OP_ALLANY:
790     if (clen > 0)
791     { ADD_NEW(state_offset + 1, 0); }
792     break;
793    
794     /*-----------------------------------------------------------------*/
795 nigel 77 case OP_EODN:
796 nigel 93 if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
797 nigel 77 { ADD_ACTIVE(state_offset + 1, 0); }
798     break;
799    
800     /*-----------------------------------------------------------------*/
801     case OP_DOLL:
802     if ((md->moptions & PCRE_NOTEOL) == 0)
803     {
804 nigel 91 if (clen == 0 ||
805 ph10 383 ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
806 nigel 91 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
807     ))
808 nigel 77 { ADD_ACTIVE(state_offset + 1, 0); }
809     }
810 nigel 93 else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
811 nigel 77 { ADD_ACTIVE(state_offset + 1, 0); }
812     break;
813    
814     /*-----------------------------------------------------------------*/
815    
816     case OP_DIGIT:
817     case OP_WHITESPACE:
818     case OP_WORDCHAR:
819     if (clen > 0 && c < 256 &&
820     ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
821     { ADD_NEW(state_offset + 1, 0); }
822     break;
823    
824     /*-----------------------------------------------------------------*/
825     case OP_NOT_DIGIT:
826     case OP_NOT_WHITESPACE:
827     case OP_NOT_WORDCHAR:
828     if (clen > 0 && (c >= 256 ||
829     ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
830     { ADD_NEW(state_offset + 1, 0); }
831     break;
832    
833     /*-----------------------------------------------------------------*/
834     case OP_WORD_BOUNDARY:
835     case OP_NOT_WORD_BOUNDARY:
836     {
837     int left_word, right_word;
838    
839     if (ptr > start_subject)
840     {
841     const uschar *temp = ptr - 1;
842 ph10 461 if (temp < md->start_used_ptr) md->start_used_ptr = temp;
843 nigel 77 #ifdef SUPPORT_UTF8
844     if (utf8) BACKCHAR(temp);
845     #endif
846     GETCHARTEST(d, temp);
847     left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
848     }
849     else left_word = 0;
850    
851 ph10 461 if (clen > 0)
852 ph10 428 right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
853     else /* This is a fudge to ensure that if this is the */
854     { /* last item in the pattern, we don't count it as */
855     reached_end--; /* reached, thus disabling a partial match. */
856     right_word = 0;
857 ph10 461 }
858 nigel 77
859     if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
860     { ADD_ACTIVE(state_offset + 1, 0); }
861     }
862     break;
863    
864    
865     /*-----------------------------------------------------------------*/
866     /* Check the next character by Unicode property. We will get here only
867     if the support is in the binary; otherwise a compile-time error occurs.
868     */
869    
870 ph10 151 #ifdef SUPPORT_UCP
871 nigel 77 case OP_PROP:
872     case OP_NOTPROP:
873     if (clen > 0)
874     {
875 nigel 87 BOOL OK;
876 ph10 349 const ucd_record * prop = GET_UCD(c);
877 nigel 87 switch(code[1])
878 nigel 77 {
879 nigel 87 case PT_ANY:
880     OK = TRUE;
881     break;
882    
883     case PT_LAMP:
884 ph10 349 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
885 nigel 87 break;
886    
887     case PT_GC:
888 ph10 351 OK = _pcre_ucp_gentype[prop->chartype] == code[2];
889 nigel 87 break;
890    
891     case PT_PC:
892 ph10 349 OK = prop->chartype == code[2];
893 nigel 87 break;
894    
895     case PT_SC:
896 ph10 349 OK = prop->script == code[2];
897 nigel 87 break;
898    
899     /* Should never occur, but keep compilers from grumbling. */
900    
901     default:
902     OK = codevalue != OP_PROP;
903     break;
904 nigel 77 }
905 nigel 87
906     if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
907 nigel 77 }
908     break;
909     #endif
910    
911    
912    
913     /* ========================================================================== */
914     /* These opcodes likewise inspect the subject character, but have an
915     argument that is not a data character. It is one of these opcodes:
916 ph10 341 OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
917     OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
918 nigel 77
919     case OP_TYPEPLUS:
920     case OP_TYPEMINPLUS:
921 nigel 93 case OP_TYPEPOSPLUS:
922 nigel 77 count = current_state->count; /* Already matched */
923     if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
924     if (clen > 0)
925     {
926     if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
927     (c < 256 &&
928 ph10 342 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
929 nigel 77 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
930     {
931 nigel 93 if (count > 0 && codevalue == OP_TYPEPOSPLUS)
932     {
933     active_count--; /* Remove non-match possibility */
934     next_active_state--;
935     }
936 nigel 77 count++;
937     ADD_NEW(state_offset, count);
938     }
939     }
940     break;
941    
942     /*-----------------------------------------------------------------*/
943     case OP_TYPEQUERY:
944     case OP_TYPEMINQUERY:
945 nigel 93 case OP_TYPEPOSQUERY:
946 nigel 77 ADD_ACTIVE(state_offset + 2, 0);
947     if (clen > 0)
948     {
949     if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
950     (c < 256 &&
951 ph10 342 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
952 nigel 77 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
953     {
954 nigel 93 if (codevalue == OP_TYPEPOSQUERY)
955     {
956     active_count--; /* Remove non-match possibility */
957     next_active_state--;
958     }
959 nigel 77 ADD_NEW(state_offset + 2, 0);
960     }
961     }
962     break;
963    
964     /*-----------------------------------------------------------------*/
965     case OP_TYPESTAR:
966     case OP_TYPEMINSTAR:
967 nigel 93 case OP_TYPEPOSSTAR:
968 nigel 77 ADD_ACTIVE(state_offset + 2, 0);
969     if (clen > 0)
970     {
971     if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
972     (c < 256 &&
973 ph10 342 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
974 nigel 77 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
975     {
976 nigel 93 if (codevalue == OP_TYPEPOSSTAR)
977     {
978     active_count--; /* Remove non-match possibility */
979     next_active_state--;
980     }
981 nigel 77 ADD_NEW(state_offset, 0);
982     }
983     }
984     break;
985    
986     /*-----------------------------------------------------------------*/
987     case OP_TYPEEXACT:
988 nigel 93 count = current_state->count; /* Number already matched */
989     if (clen > 0)
990     {
991     if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
992     (c < 256 &&
993 ph10 342 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
994 nigel 93 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
995     {
996     if (++count >= GET2(code, 1))
997     { ADD_NEW(state_offset + 4, 0); }
998     else
999     { ADD_NEW(state_offset, count); }
1000     }
1001     }
1002     break;
1003    
1004     /*-----------------------------------------------------------------*/
1005 nigel 77 case OP_TYPEUPTO:
1006     case OP_TYPEMINUPTO:
1007 nigel 93 case OP_TYPEPOSUPTO:
1008     ADD_ACTIVE(state_offset + 4, 0);
1009 nigel 77 count = current_state->count; /* Number already matched */
1010     if (clen > 0)
1011     {
1012     if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1013     (c < 256 &&
1014 ph10 342 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1015 nigel 77 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1016     {
1017 nigel 93 if (codevalue == OP_TYPEPOSUPTO)
1018     {
1019     active_count--; /* Remove non-match possibility */
1020     next_active_state--;
1021     }
1022 nigel 77 if (++count >= GET2(code, 1))
1023     { ADD_NEW(state_offset + 4, 0); }
1024     else
1025     { ADD_NEW(state_offset, count); }
1026     }
1027     }
1028     break;
1029    
1030     /* ========================================================================== */
1031     /* These are virtual opcodes that are used when something like
1032 nigel 93 OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
1033     argument. It keeps the code above fast for the other cases. The argument
1034     is in the d variable. */
1035 nigel 77
1036 ph10 151 #ifdef SUPPORT_UCP
1037 nigel 77 case OP_PROP_EXTRA + OP_TYPEPLUS:
1038     case OP_PROP_EXTRA + OP_TYPEMINPLUS:
1039 nigel 93 case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
1040 nigel 77 count = current_state->count; /* Already matched */
1041 nigel 87 if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
1042 nigel 77 if (clen > 0)
1043     {
1044 nigel 87 BOOL OK;
1045 ph10 349 const ucd_record * prop = GET_UCD(c);
1046 nigel 87 switch(code[2])
1047     {
1048     case PT_ANY:
1049     OK = TRUE;
1050     break;
1051    
1052     case PT_LAMP:
1053 ph10 349 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1054 nigel 87 break;
1055    
1056     case PT_GC:
1057 ph10 351 OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1058 nigel 87 break;
1059    
1060     case PT_PC:
1061 ph10 349 OK = prop->chartype == code[3];
1062 nigel 87 break;
1063    
1064     case PT_SC:
1065 ph10 349 OK = prop->script == code[3];
1066 nigel 87 break;
1067    
1068     /* Should never occur, but keep compilers from grumbling. */
1069    
1070     default:
1071     OK = codevalue != OP_PROP;
1072     break;
1073     }
1074    
1075 nigel 93 if (OK == (d == OP_PROP))
1076     {
1077     if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1078     {
1079     active_count--; /* Remove non-match possibility */
1080     next_active_state--;
1081     }
1082     count++;
1083     ADD_NEW(state_offset, count);
1084     }
1085 nigel 77 }
1086     break;
1087    
1088     /*-----------------------------------------------------------------*/
1089     case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1090     case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1091 nigel 93 case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1092 nigel 77 count = current_state->count; /* Already matched */
1093     if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1094 ph10 349 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1095 nigel 77 {
1096     const uschar *nptr = ptr + clen;
1097     int ncount = 0;
1098 nigel 93 if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1099     {
1100     active_count--; /* Remove non-match possibility */
1101     next_active_state--;
1102     }
1103 nigel 77 while (nptr < end_subject)
1104     {
1105     int nd;
1106     int ndlen = 1;
1107     GETCHARLEN(nd, nptr, ndlen);
1108 ph10 349 if (UCD_CATEGORY(nd) != ucp_M) break;
1109 nigel 77 ncount++;
1110     nptr += ndlen;
1111     }
1112     count++;
1113     ADD_NEW_DATA(-state_offset, count, ncount);
1114     }
1115     break;
1116 ph10 151 #endif
1117 nigel 77
1118     /*-----------------------------------------------------------------*/
1119 nigel 93 case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1120     case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1121     case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1122     count = current_state->count; /* Already matched */
1123     if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1124     if (clen > 0)
1125     {
1126     int ncount = 0;
1127     switch (c)
1128     {
1129     case 0x000b:
1130     case 0x000c:
1131     case 0x0085:
1132     case 0x2028:
1133     case 0x2029:
1134 ph10 231 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1135     goto ANYNL01;
1136    
1137     case 0x000d:
1138     if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1139     /* Fall through */
1140    
1141     ANYNL01:
1142     case 0x000a:
1143 nigel 93 if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1144     {
1145     active_count--; /* Remove non-match possibility */
1146     next_active_state--;
1147     }
1148     count++;
1149     ADD_NEW_DATA(-state_offset, count, ncount);
1150     break;
1151 ph10 231
1152 nigel 93 default:
1153     break;
1154     }
1155     }
1156     break;
1157    
1158     /*-----------------------------------------------------------------*/
1159 ph10 178 case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1160     case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1161     case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1162     count = current_state->count; /* Already matched */
1163     if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1164     if (clen > 0)
1165     {
1166 ph10 182 BOOL OK;
1167 ph10 178 switch (c)
1168     {
1169     case 0x000a:
1170     case 0x000b:
1171     case 0x000c:
1172     case 0x000d:
1173     case 0x0085:
1174     case 0x2028:
1175     case 0x2029:
1176     OK = TRUE;
1177 ph10 182 break;
1178 ph10 178
1179     default:
1180     OK = FALSE;
1181 ph10 182 break;
1182 ph10 178 }
1183    
1184     if (OK == (d == OP_VSPACE))
1185 ph10 182 {
1186 ph10 178 if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1187     {
1188     active_count--; /* Remove non-match possibility */
1189     next_active_state--;
1190     }
1191     count++;
1192     ADD_NEW_DATA(-state_offset, count, 0);
1193     }
1194     }
1195     break;
1196    
1197     /*-----------------------------------------------------------------*/
1198     case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1199     case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1200     case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1201     count = current_state->count; /* Already matched */
1202     if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1203     if (clen > 0)
1204     {
1205 ph10 182 BOOL OK;
1206 ph10 178 switch (c)
1207     {
1208     case 0x09: /* HT */
1209     case 0x20: /* SPACE */
1210     case 0xa0: /* NBSP */
1211     case 0x1680: /* OGHAM SPACE MARK */
1212     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1213     case 0x2000: /* EN QUAD */
1214     case 0x2001: /* EM QUAD */
1215     case 0x2002: /* EN SPACE */
1216     case 0x2003: /* EM SPACE */
1217     case 0x2004: /* THREE-PER-EM SPACE */
1218     case 0x2005: /* FOUR-PER-EM SPACE */
1219     case 0x2006: /* SIX-PER-EM SPACE */
1220     case 0x2007: /* FIGURE SPACE */
1221     case 0x2008: /* PUNCTUATION SPACE */
1222     case 0x2009: /* THIN SPACE */
1223     case 0x200A: /* HAIR SPACE */
1224     case 0x202f: /* NARROW NO-BREAK SPACE */
1225     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1226     case 0x3000: /* IDEOGRAPHIC SPACE */
1227     OK = TRUE;
1228     break;
1229 ph10 182
1230 ph10 178 default:
1231     OK = FALSE;
1232     break;
1233     }
1234 ph10 182
1235 ph10 178 if (OK == (d == OP_HSPACE))
1236 ph10 182 {
1237 ph10 178 if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1238     {
1239     active_count--; /* Remove non-match possibility */
1240     next_active_state--;
1241     }
1242     count++;
1243     ADD_NEW_DATA(-state_offset, count, 0);
1244     }
1245     }
1246     break;
1247    
1248     /*-----------------------------------------------------------------*/
1249 ph10 151 #ifdef SUPPORT_UCP
1250 nigel 77 case OP_PROP_EXTRA + OP_TYPEQUERY:
1251     case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1252 nigel 93 case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1253 nigel 87 count = 4;
1254 nigel 77 goto QS1;
1255    
1256     case OP_PROP_EXTRA + OP_TYPESTAR:
1257     case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1258 nigel 93 case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1259 nigel 77 count = 0;
1260    
1261     QS1:
1262    
1263 nigel 87 ADD_ACTIVE(state_offset + 4, 0);
1264 nigel 77 if (clen > 0)
1265     {
1266 nigel 87 BOOL OK;
1267 ph10 349 const ucd_record * prop = GET_UCD(c);
1268 nigel 87 switch(code[2])
1269     {
1270     case PT_ANY:
1271     OK = TRUE;
1272     break;
1273    
1274     case PT_LAMP:
1275 ph10 349 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1276 nigel 87 break;
1277    
1278     case PT_GC:
1279 ph10 351 OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1280 nigel 87 break;
1281    
1282     case PT_PC:
1283 ph10 349 OK = prop->chartype == code[3];
1284 nigel 87 break;
1285    
1286     case PT_SC:
1287 ph10 349 OK = prop->script == code[3];
1288 nigel 87 break;
1289    
1290     /* Should never occur, but keep compilers from grumbling. */
1291    
1292     default:
1293     OK = codevalue != OP_PROP;
1294     break;
1295     }
1296    
1297 nigel 93 if (OK == (d == OP_PROP))
1298     {
1299     if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1300     codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1301     {
1302     active_count--; /* Remove non-match possibility */
1303     next_active_state--;
1304     }
1305     ADD_NEW(state_offset + count, 0);
1306     }
1307 nigel 77 }
1308     break;
1309    
1310     /*-----------------------------------------------------------------*/
1311     case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1312     case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1313 nigel 93 case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1314 nigel 77 count = 2;
1315     goto QS2;
1316    
1317     case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1318     case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1319 nigel 93 case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1320 nigel 77 count = 0;
1321    
1322     QS2:
1323    
1324     ADD_ACTIVE(state_offset + 2, 0);
1325 ph10 349 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1326 nigel 77 {
1327     const uschar *nptr = ptr + clen;
1328     int ncount = 0;
1329 nigel 93 if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1330     codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1331     {
1332     active_count--; /* Remove non-match possibility */
1333     next_active_state--;
1334     }
1335 nigel 77 while (nptr < end_subject)
1336     {
1337     int nd;
1338     int ndlen = 1;
1339     GETCHARLEN(nd, nptr, ndlen);
1340 ph10 349 if (UCD_CATEGORY(nd) != ucp_M) break;
1341 nigel 77 ncount++;
1342     nptr += ndlen;
1343     }
1344     ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1345     }
1346     break;
1347 ph10 151 #endif
1348 nigel 77
1349     /*-----------------------------------------------------------------*/
1350 nigel 93 case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1351     case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1352     case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1353     count = 2;
1354     goto QS3;
1355    
1356     case OP_ANYNL_EXTRA + OP_TYPESTAR:
1357     case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1358     case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1359     count = 0;
1360    
1361     QS3:
1362     ADD_ACTIVE(state_offset + 2, 0);
1363     if (clen > 0)
1364     {
1365     int ncount = 0;
1366     switch (c)
1367     {
1368     case 0x000b:
1369     case 0x000c:
1370     case 0x0085:
1371     case 0x2028:
1372     case 0x2029:
1373 ph10 231 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1374     goto ANYNL02;
1375    
1376     case 0x000d:
1377     if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1378     /* Fall through */
1379    
1380     ANYNL02:
1381     case 0x000a:
1382 nigel 93 if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1383     codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1384     {
1385     active_count--; /* Remove non-match possibility */
1386     next_active_state--;
1387     }
1388     ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1389     break;
1390 ph10 231
1391 nigel 93 default:
1392     break;
1393     }
1394     }
1395     break;
1396    
1397     /*-----------------------------------------------------------------*/
1398 ph10 178 case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1399     case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1400     case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1401     count = 2;
1402     goto QS4;
1403    
1404     case OP_VSPACE_EXTRA + OP_TYPESTAR:
1405     case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1406     case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1407     count = 0;
1408    
1409     QS4:
1410     ADD_ACTIVE(state_offset + 2, 0);
1411     if (clen > 0)
1412     {
1413 ph10 182 BOOL OK;
1414 ph10 178 switch (c)
1415     {
1416     case 0x000a:
1417     case 0x000b:
1418     case 0x000c:
1419     case 0x000d:
1420     case 0x0085:
1421     case 0x2028:
1422     case 0x2029:
1423     OK = TRUE;
1424     break;
1425 ph10 182
1426 ph10 178 default:
1427     OK = FALSE;
1428     break;
1429     }
1430     if (OK == (d == OP_VSPACE))
1431 ph10 182 {
1432 ph10 178 if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1433     codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1434     {
1435     active_count--; /* Remove non-match possibility */
1436     next_active_state--;
1437     }
1438     ADD_NEW_DATA(-(state_offset + count), 0, 0);
1439     }
1440     }
1441     break;
1442    
1443     /*-----------------------------------------------------------------*/
1444     case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1445     case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1446     case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1447     count = 2;
1448     goto QS5;
1449    
1450     case OP_HSPACE_EXTRA + OP_TYPESTAR:
1451     case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1452     case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1453     count = 0;
1454    
1455     QS5:
1456     ADD_ACTIVE(state_offset + 2, 0);
1457     if (clen > 0)
1458     {
1459 ph10 182 BOOL OK;
1460 ph10 178 switch (c)
1461     {
1462     case 0x09: /* HT */
1463     case 0x20: /* SPACE */
1464     case 0xa0: /* NBSP */
1465     case 0x1680: /* OGHAM SPACE MARK */
1466     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1467     case 0x2000: /* EN QUAD */
1468     case 0x2001: /* EM QUAD */
1469     case 0x2002: /* EN SPACE */
1470     case 0x2003: /* EM SPACE */
1471     case 0x2004: /* THREE-PER-EM SPACE */
1472     case 0x2005: /* FOUR-PER-EM SPACE */
1473     case 0x2006: /* SIX-PER-EM SPACE */
1474     case 0x2007: /* FIGURE SPACE */
1475     case 0x2008: /* PUNCTUATION SPACE */
1476     case 0x2009: /* THIN SPACE */
1477     case 0x200A: /* HAIR SPACE */
1478     case 0x202f: /* NARROW NO-BREAK SPACE */
1479     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1480     case 0x3000: /* IDEOGRAPHIC SPACE */
1481     OK = TRUE;
1482     break;
1483 ph10 182
1484 ph10 178 default:
1485     OK = FALSE;
1486     break;
1487     }
1488 ph10 182
1489 ph10 178 if (OK == (d == OP_HSPACE))
1490 ph10 182 {
1491 ph10 178 if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1492     codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1493     {
1494     active_count--; /* Remove non-match possibility */
1495     next_active_state--;
1496     }
1497     ADD_NEW_DATA(-(state_offset + count), 0, 0);
1498     }
1499     }
1500     break;
1501    
1502     /*-----------------------------------------------------------------*/
1503 ph10 151 #ifdef SUPPORT_UCP
1504 nigel 77 case OP_PROP_EXTRA + OP_TYPEEXACT:
1505     case OP_PROP_EXTRA + OP_TYPEUPTO:
1506     case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1507 nigel 93 case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1508 nigel 77 if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1509 nigel 87 { ADD_ACTIVE(state_offset + 6, 0); }
1510 nigel 77 count = current_state->count; /* Number already matched */
1511     if (clen > 0)
1512     {
1513 nigel 87 BOOL OK;
1514 ph10 349 const ucd_record * prop = GET_UCD(c);
1515 nigel 87 switch(code[4])
1516 nigel 77 {
1517 nigel 87 case PT_ANY:
1518     OK = TRUE;
1519     break;
1520    
1521     case PT_LAMP:
1522 ph10 349 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;
1523 nigel 87 break;
1524    
1525     case PT_GC:
1526 ph10 351 OK = _pcre_ucp_gentype[prop->chartype] == code[5];
1527 nigel 87 break;
1528    
1529     case PT_PC:
1530 ph10 349 OK = prop->chartype == code[5];
1531 nigel 87 break;
1532    
1533     case PT_SC:
1534 ph10 349 OK = prop->script == code[5];
1535 nigel 87 break;
1536    
1537     /* Should never occur, but keep compilers from grumbling. */
1538    
1539     default:
1540     OK = codevalue != OP_PROP;
1541     break;
1542     }
1543    
1544     if (OK == (d == OP_PROP))
1545     {
1546 nigel 93 if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1547     {
1548     active_count--; /* Remove non-match possibility */
1549     next_active_state--;
1550     }
1551 nigel 77 if (++count >= GET2(code, 1))
1552 nigel 87 { ADD_NEW(state_offset + 6, 0); }
1553 nigel 77 else
1554     { ADD_NEW(state_offset, count); }
1555     }
1556     }
1557     break;
1558    
1559     /*-----------------------------------------------------------------*/
1560     case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1561     case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1562     case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1563 nigel 93 case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1564 nigel 77 if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1565     { ADD_ACTIVE(state_offset + 4, 0); }
1566     count = current_state->count; /* Number already matched */
1567 ph10 349 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1568 nigel 77 {
1569     const uschar *nptr = ptr + clen;
1570     int ncount = 0;
1571 nigel 93 if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1572     {
1573     active_count--; /* Remove non-match possibility */
1574     next_active_state--;
1575     }
1576 nigel 77 while (nptr < end_subject)
1577     {
1578     int nd;
1579     int ndlen = 1;
1580     GETCHARLEN(nd, nptr, ndlen);
1581 ph10 349 if (UCD_CATEGORY(nd) != ucp_M) break;
1582 nigel 77 ncount++;
1583     nptr += ndlen;
1584     }
1585     if (++count >= GET2(code, 1))
1586     { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1587     else
1588     { ADD_NEW_DATA(-state_offset, count, ncount); }
1589     }
1590     break;
1591 ph10 151 #endif
1592 nigel 77
1593 nigel 93 /*-----------------------------------------------------------------*/
1594     case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1595     case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1596     case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1597     case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1598     if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1599     { ADD_ACTIVE(state_offset + 4, 0); }
1600     count = current_state->count; /* Number already matched */
1601     if (clen > 0)
1602     {
1603     int ncount = 0;
1604     switch (c)
1605     {
1606     case 0x000b:
1607     case 0x000c:
1608     case 0x0085:
1609     case 0x2028:
1610     case 0x2029:
1611 ph10 231 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1612     goto ANYNL03;
1613    
1614     case 0x000d:
1615     if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1616     /* Fall through */
1617    
1618     ANYNL03:
1619     case 0x000a:
1620 nigel 93 if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1621     {
1622     active_count--; /* Remove non-match possibility */
1623     next_active_state--;
1624     }
1625     if (++count >= GET2(code, 1))
1626     { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1627     else
1628     { ADD_NEW_DATA(-state_offset, count, ncount); }
1629     break;
1630 ph10 231
1631 nigel 93 default:
1632     break;
1633     }
1634     }
1635     break;
1636    
1637 ph10 178 /*-----------------------------------------------------------------*/
1638     case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1639     case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1640     case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1641     case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1642     if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1643     { ADD_ACTIVE(state_offset + 4, 0); }
1644     count = current_state->count; /* Number already matched */
1645     if (clen > 0)
1646     {
1647 ph10 182 BOOL OK;
1648 ph10 178 switch (c)
1649     {
1650     case 0x000a:
1651     case 0x000b:
1652     case 0x000c:
1653     case 0x000d:
1654     case 0x0085:
1655     case 0x2028:
1656     case 0x2029:
1657     OK = TRUE;
1658     break;
1659 ph10 182
1660 ph10 178 default:
1661     OK = FALSE;
1662     }
1663 ph10 182
1664 ph10 178 if (OK == (d == OP_VSPACE))
1665 ph10 182 {
1666 ph10 178 if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1667     {
1668     active_count--; /* Remove non-match possibility */
1669     next_active_state--;
1670     }
1671     if (++count >= GET2(code, 1))
1672     { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1673     else
1674     { ADD_NEW_DATA(-state_offset, count, 0); }
1675     }
1676     }
1677     break;
1678    
1679     /*-----------------------------------------------------------------*/
1680     case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1681     case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1682     case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1683     case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1684     if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1685     { ADD_ACTIVE(state_offset + 4, 0); }
1686     count = current_state->count; /* Number already matched */
1687     if (clen > 0)
1688     {
1689 ph10 182 BOOL OK;
1690 ph10 178 switch (c)
1691     {
1692     case 0x09: /* HT */
1693     case 0x20: /* SPACE */
1694     case 0xa0: /* NBSP */
1695     case 0x1680: /* OGHAM SPACE MARK */
1696     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1697     case 0x2000: /* EN QUAD */
1698     case 0x2001: /* EM QUAD */
1699     case 0x2002: /* EN SPACE */
1700     case 0x2003: /* EM SPACE */
1701     case 0x2004: /* THREE-PER-EM SPACE */
1702     case 0x2005: /* FOUR-PER-EM SPACE */
1703     case 0x2006: /* SIX-PER-EM SPACE */
1704     case 0x2007: /* FIGURE SPACE */
1705     case 0x2008: /* PUNCTUATION SPACE */
1706     case 0x2009: /* THIN SPACE */
1707     case 0x200A: /* HAIR SPACE */
1708     case 0x202f: /* NARROW NO-BREAK SPACE */
1709     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1710     case 0x3000: /* IDEOGRAPHIC SPACE */
1711     OK = TRUE;
1712     break;
1713 ph10 182
1714 ph10 178 default:
1715     OK = FALSE;
1716     break;
1717     }
1718 ph10 182
1719 ph10 178 if (OK == (d == OP_HSPACE))
1720 ph10 182 {
1721 ph10 178 if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1722     {
1723     active_count--; /* Remove non-match possibility */
1724     next_active_state--;
1725     }
1726     if (++count >= GET2(code, 1))
1727     { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1728     else
1729     { ADD_NEW_DATA(-state_offset, count, 0); }
1730     }
1731     }
1732     break;
1733    
1734 nigel 77 /* ========================================================================== */
1735     /* These opcodes are followed by a character that is usually compared
1736     to the current subject character; it is loaded into d. We still get
1737     here even if there is no subject character, because in some cases zero
1738     repetitions are permitted. */
1739    
1740     /*-----------------------------------------------------------------*/
1741     case OP_CHAR:
1742     if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
1743     break;
1744    
1745     /*-----------------------------------------------------------------*/
1746     case OP_CHARNC:
1747     if (clen == 0) break;
1748    
1749     #ifdef SUPPORT_UTF8
1750     if (utf8)
1751     {
1752     if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1753     {
1754 nigel 93 unsigned int othercase;
1755 nigel 77 if (c < 128) othercase = fcc[c]; else
1756    
1757     /* If we have Unicode property support, we can use it to test the
1758 nigel 87 other case of the character. */
1759 nigel 77
1760     #ifdef SUPPORT_UCP
1761 ph10 349 othercase = UCD_OTHERCASE(c);
1762 nigel 87 #else
1763 nigel 93 othercase = NOTACHAR;
1764 nigel 77 #endif
1765    
1766     if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
1767     }
1768     }
1769     else
1770     #endif /* SUPPORT_UTF8 */
1771    
1772     /* Non-UTF-8 mode */
1773     {
1774     if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }
1775     }
1776     break;
1777    
1778    
1779     #ifdef SUPPORT_UCP
1780     /*-----------------------------------------------------------------*/
1781     /* This is a tricky one because it can match more than one character.
1782     Find out how many characters to skip, and then set up a negative state
1783     to wait for them to pass before continuing. */
1784    
1785     case OP_EXTUNI:
1786 ph10 349 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1787 nigel 77 {
1788     const uschar *nptr = ptr + clen;
1789     int ncount = 0;
1790     while (nptr < end_subject)
1791     {
1792     int nclen = 1;
1793     GETCHARLEN(c, nptr, nclen);
1794 ph10 349 if (UCD_CATEGORY(c) != ucp_M) break;
1795 nigel 77 ncount++;
1796     nptr += nclen;
1797     }
1798     ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
1799     }
1800     break;
1801     #endif
1802    
1803     /*-----------------------------------------------------------------*/
1804 nigel 93 /* This is a tricky like EXTUNI because it too can match more than one
1805     character (when CR is followed by LF). In this case, set up a negative
1806     state to wait for one character to pass before continuing. */
1807    
1808     case OP_ANYNL:
1809     if (clen > 0) switch(c)
1810     {
1811     case 0x000b:
1812     case 0x000c:
1813     case 0x0085:
1814     case 0x2028:
1815     case 0x2029:
1816 ph10 231 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1817    
1818     case 0x000a:
1819 nigel 93 ADD_NEW(state_offset + 1, 0);
1820     break;
1821 ph10 231
1822 nigel 93 case 0x000d:
1823     if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1824     {
1825     ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1826     }
1827     else
1828     {
1829     ADD_NEW(state_offset + 1, 0);
1830     }
1831     break;
1832     }
1833     break;
1834    
1835     /*-----------------------------------------------------------------*/
1836 ph10 178 case OP_NOT_VSPACE:
1837     if (clen > 0) switch(c)
1838     {
1839     case 0x000a:
1840     case 0x000b:
1841     case 0x000c:
1842     case 0x000d:
1843     case 0x0085:
1844     case 0x2028:
1845     case 0x2029:
1846     break;
1847 ph10 182
1848     default:
1849 ph10 178 ADD_NEW(state_offset + 1, 0);
1850     break;
1851     }
1852     break;
1853    
1854     /*-----------------------------------------------------------------*/
1855     case OP_VSPACE:
1856     if (clen > 0) switch(c)
1857     {
1858     case 0x000a:
1859     case 0x000b:
1860     case 0x000c:
1861     case 0x000d:
1862     case 0x0085:
1863     case 0x2028:
1864     case 0x2029:
1865     ADD_NEW(state_offset + 1, 0);
1866     break;
1867 ph10 182
1868 ph10 178 default: break;
1869     }
1870     break;
1871    
1872     /*-----------------------------------------------------------------*/
1873     case OP_NOT_HSPACE:
1874     if (clen > 0) switch(c)
1875     {
1876     case 0x09: /* HT */
1877     case 0x20: /* SPACE */
1878     case 0xa0: /* NBSP */
1879     case 0x1680: /* OGHAM SPACE MARK */
1880     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1881     case 0x2000: /* EN QUAD */
1882     case 0x2001: /* EM QUAD */
1883     case 0x2002: /* EN SPACE */
1884     case 0x2003: /* EM SPACE */
1885     case 0x2004: /* THREE-PER-EM SPACE */
1886     case 0x2005: /* FOUR-PER-EM SPACE */
1887     case 0x2006: /* SIX-PER-EM SPACE */
1888     case 0x2007: /* FIGURE SPACE */
1889     case 0x2008: /* PUNCTUATION SPACE */
1890     case 0x2009: /* THIN SPACE */
1891     case 0x200A: /* HAIR SPACE */
1892     case 0x202f: /* NARROW NO-BREAK SPACE */
1893     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1894     case 0x3000: /* IDEOGRAPHIC SPACE */
1895     break;
1896 ph10 182
1897     default:
1898 ph10 178 ADD_NEW(state_offset + 1, 0);
1899     break;
1900     }
1901     break;
1902    
1903     /*-----------------------------------------------------------------*/
1904     case OP_HSPACE:
1905     if (clen > 0) switch(c)
1906     {
1907     case 0x09: /* HT */
1908     case 0x20: /* SPACE */
1909     case 0xa0: /* NBSP */
1910     case 0x1680: /* OGHAM SPACE MARK */
1911     case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1912     case 0x2000: /* EN QUAD */
1913     case 0x2001: /* EM QUAD */
1914     case 0x2002: /* EN SPACE */
1915     case 0x2003: /* EM SPACE */
1916     case 0x2004: /* THREE-PER-EM SPACE */
1917     case 0x2005: /* FOUR-PER-EM SPACE */
1918     case 0x2006: /* SIX-PER-EM SPACE */
1919     case 0x2007: /* FIGURE SPACE */
1920     case 0x2008: /* PUNCTUATION SPACE */
1921     case 0x2009: /* THIN SPACE */
1922     case 0x200A: /* HAIR SPACE */
1923     case 0x202f: /* NARROW NO-BREAK SPACE */
1924     case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1925     case 0x3000: /* IDEOGRAPHIC SPACE */
1926     ADD_NEW(state_offset + 1, 0);
1927     break;
1928     }
1929     break;
1930    
1931     /*-----------------------------------------------------------------*/
1932 nigel 77 /* Match a negated single character. This is only used for one-byte
1933     characters, that is, we know that d < 256. The character we are
1934     checking (c) can be multibyte. */
1935    
1936     case OP_NOT:
1937     if (clen > 0)
1938     {
1939 nigel 93 unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1940 nigel 77 if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1941     }
1942     break;
1943    
1944     /*-----------------------------------------------------------------*/
1945     case OP_PLUS:
1946     case OP_MINPLUS:
1947 nigel 93 case OP_POSPLUS:
1948 nigel 77 case OP_NOTPLUS:
1949     case OP_NOTMINPLUS:
1950 nigel 93 case OP_NOTPOSPLUS:
1951 nigel 77 count = current_state->count; /* Already matched */
1952     if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1953     if (clen > 0)
1954     {
1955 nigel 93 unsigned int otherd = NOTACHAR;
1956 nigel 77 if ((ims & PCRE_CASELESS) != 0)
1957     {
1958     #ifdef SUPPORT_UTF8
1959 nigel 87 if (utf8 && d >= 128)
1960 nigel 77 {
1961     #ifdef SUPPORT_UCP
1962 ph10 349 otherd = UCD_OTHERCASE(d);
1963 nigel 77 #endif /* SUPPORT_UCP */
1964     }
1965     else
1966     #endif /* SUPPORT_UTF8 */
1967     otherd = fcc[d];
1968     }
1969     if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1970 nigel 93 {
1971     if (count > 0 &&
1972     (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1973     {
1974     active_count--; /* Remove non-match possibility */
1975     next_active_state--;
1976     }
1977     count++;
1978     ADD_NEW(state_offset, count);
1979     }
1980 nigel 77 }
1981     break;
1982    
1983     /*-----------------------------------------------------------------*/
1984     case OP_QUERY:
1985     case OP_MINQUERY:
1986 nigel 93 case OP_POSQUERY:
1987 nigel 77 case OP_NOTQUERY:
1988     case OP_NOTMINQUERY:
1989 nigel 93 case OP_NOTPOSQUERY:
1990 nigel 77 ADD_ACTIVE(state_offset + dlen + 1, 0);
1991     if (clen > 0)
1992     {
1993 nigel 93 unsigned int otherd = NOTACHAR;
1994 nigel 91 if ((ims & PCRE_CASELESS) != 0)
1995 nigel 77 {
1996     #ifdef SUPPORT_UTF8
1997 nigel 87 if (utf8 && d >= 128)
1998 nigel 77 {
1999     #ifdef SUPPORT_UCP
2000 ph10 349 otherd = UCD_OTHERCASE(d);
2001 nigel 77 #endif /* SUPPORT_UCP */
2002     }
2003     else
2004     #endif /* SUPPORT_UTF8 */
2005     otherd = fcc[d];
2006     }
2007     if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2008 nigel 93 {
2009     if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
2010     {
2011     active_count--; /* Remove non-match possibility */
2012     next_active_state--;
2013     }
2014     ADD_NEW(state_offset + dlen + 1, 0);
2015     }
2016 nigel 77 }
2017     break;
2018    
2019     /*-----------------------------------------------------------------*/
2020     case OP_STAR:
2021     case OP_MINSTAR:
2022 nigel 93 case OP_POSSTAR:
2023 nigel 77 case OP_NOTSTAR:
2024     case OP_NOTMINSTAR:
2025 nigel 93 case OP_NOTPOSSTAR:
2026 nigel 77 ADD_ACTIVE(state_offset + dlen + 1, 0);
2027     if (clen > 0)
2028     {
2029 nigel 93 unsigned int otherd = NOTACHAR;
2030 nigel 91 if ((ims & PCRE_CASELESS) != 0)
2031 nigel 77 {
2032     #ifdef SUPPORT_UTF8
2033 nigel 87 if (utf8 && d >= 128)
2034 nigel 77 {
2035     #ifdef SUPPORT_UCP
2036 ph10 349 otherd = UCD_OTHERCASE(d);
2037 nigel 77 #endif /* SUPPORT_UCP */
2038     }
2039     else
2040     #endif /* SUPPORT_UTF8 */
2041     otherd = fcc[d];
2042     }
2043     if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2044 nigel 93 {
2045     if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
2046     {
2047     active_count--; /* Remove non-match possibility */
2048     next_active_state--;
2049     }
2050     ADD_NEW(state_offset, 0);
2051     }
2052 nigel 77 }
2053     break;
2054    
2055     /*-----------------------------------------------------------------*/
2056     case OP_EXACT:
2057 nigel 93 case OP_NOTEXACT:
2058     count = current_state->count; /* Number already matched */
2059     if (clen > 0)
2060     {
2061     unsigned int otherd = NOTACHAR;
2062     if ((ims & PCRE_CASELESS) != 0)
2063     {
2064     #ifdef SUPPORT_UTF8
2065     if (utf8 && d >= 128)
2066     {
2067     #ifdef SUPPORT_UCP
2068 ph10 349 otherd = UCD_OTHERCASE(d);
2069 nigel 93 #endif /* SUPPORT_UCP */
2070     }
2071     else
2072     #endif /* SUPPORT_UTF8 */
2073     otherd = fcc[d];
2074     }
2075     if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2076     {
2077     if (++count >= GET2(code, 1))
2078     { ADD_NEW(state_offset + dlen + 3, 0); }
2079     else
2080     { ADD_NEW(state_offset, count); }
2081     }
2082     }
2083     break;
2084    
2085     /*-----------------------------------------------------------------*/
2086 nigel 77 case OP_UPTO:
2087     case OP_MINUPTO:
2088 nigel 93 case OP_POSUPTO:
2089 nigel 77 case OP_NOTUPTO:
2090     case OP_NOTMINUPTO:
2091 nigel 93 case OP_NOTPOSUPTO:
2092     ADD_ACTIVE(state_offset + dlen + 3, 0);
2093 nigel 77 count = current_state->count; /* Number already matched */
2094     if (clen > 0)
2095     {
2096 nigel 93 unsigned int otherd = NOTACHAR;
2097 nigel 77 if ((ims & PCRE_CASELESS) != 0)
2098     {
2099     #ifdef SUPPORT_UTF8
2100 nigel 87 if (utf8 && d >= 128)
2101 nigel 77 {
2102     #ifdef SUPPORT_UCP
2103 ph10 349 otherd = UCD_OTHERCASE(d);
2104 nigel 77 #endif /* SUPPORT_UCP */
2105     }
2106     else
2107     #endif /* SUPPORT_UTF8 */
2108     otherd = fcc[d];
2109     }
2110     if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2111     {
2112 nigel 93 if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2113     {
2114     active_count--; /* Remove non-match possibility */
2115     next_active_state--;
2116     }
2117 nigel 77 if (++count >= GET2(code, 1))
2118     { ADD_NEW(state_offset + dlen + 3, 0); }
2119     else
2120     { ADD_NEW(state_offset, count); }
2121     }
2122     }
2123     break;
2124    
2125    
2126     /* ========================================================================== */
2127     /* These are the class-handling opcodes */
2128    
2129     case OP_CLASS:
2130     case OP_NCLASS:
2131     case OP_XCLASS:
2132     {
2133     BOOL isinclass = FALSE;
2134     int next_state_offset;
2135     const uschar *ecode;
2136    
2137     /* For a simple class, there is always just a 32-byte table, and we
2138     can set isinclass from it. */
2139    
2140     if (codevalue != OP_XCLASS)
2141     {
2142     ecode = code + 33;
2143     if (clen > 0)
2144     {
2145     isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2146     ((code[1 + c/8] & (1 << (c&7))) != 0);
2147     }
2148     }
2149    
2150     /* An extended class may have a table or a list of single characters,
2151     ranges, or both, and it may be positive or negative. There's a
2152     function that sorts all this out. */
2153    
2154     else
2155     {
2156     ecode = code + GET(code, 1);
2157     if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);
2158     }
2159    
2160     /* At this point, isinclass is set for all kinds of class, and ecode
2161     points to the byte after the end of the class. If there is a
2162     quantifier, this is where it will be. */
2163    
2164     next_state_offset = ecode - start_code;
2165    
2166     switch (*ecode)
2167     {
2168     case OP_CRSTAR:
2169     case OP_CRMINSTAR:
2170     ADD_ACTIVE(next_state_offset + 1, 0);
2171     if (isinclass) { ADD_NEW(state_offset, 0); }
2172     break;
2173    
2174     case OP_CRPLUS:
2175     case OP_CRMINPLUS:
2176     count = current_state->count; /* Already matched */
2177     if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
2178     if (isinclass) { count++; ADD_NEW(state_offset, count); }
2179     break;
2180    
2181     case OP_CRQUERY:
2182     case OP_CRMINQUERY:
2183     ADD_ACTIVE(next_state_offset + 1, 0);
2184     if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
2185     break;
2186    
2187     case OP_CRRANGE:
2188     case OP_CRMINRANGE:
2189     count = current_state->count; /* Already matched */
2190     if (count >= GET2(ecode, 1))
2191     { ADD_ACTIVE(next_state_offset + 5, 0); }
2192     if (isinclass)
2193     {
2194 nigel 91 int max = GET2(ecode, 3);
2195     if (++count >= max && max != 0) /* Max 0 => no limit */
2196 nigel 77 { ADD_NEW(next_state_offset + 5, 0); }
2197     else
2198     { ADD_NEW(state_offset, count); }
2199     }
2200     break;
2201    
2202     default:
2203     if (isinclass) { ADD_NEW(next_state_offset, 0); }
2204     break;
2205     }
2206     }
2207     break;
2208    
2209     /* ========================================================================== */
2210     /* These are the opcodes for fancy brackets of various kinds. We have
2211 ph10 426 to use recursion in order to handle them. The "always failing" assertion
2212     (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2213 ph10 341 though the other "backtracking verbs" are not supported. */
2214 ph10 345
2215 ph10 341 case OP_FAIL:
2216 ph10 428 forced_fail++; /* Count FAILs for multiple states */
2217 ph10 345 break;
2218 nigel 77
2219     case OP_ASSERT:
2220     case OP_ASSERT_NOT:
2221     case OP_ASSERTBACK:
2222     case OP_ASSERTBACK_NOT:
2223     {
2224     int rc;
2225     int local_offsets[2];
2226     int local_workspace[1000];
2227     const uschar *endasscode = code + GET(code, 1);
2228    
2229     while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2230    
2231     rc = internal_dfa_exec(
2232     md, /* static match data */
2233     code, /* this subexpression's code */
2234     ptr, /* where we currently are */
2235     ptr - start_subject, /* start offset */
2236     local_offsets, /* offset vector */
2237     sizeof(local_offsets)/sizeof(int), /* size of same */
2238     local_workspace, /* workspace vector */
2239     sizeof(local_workspace)/sizeof(int), /* size of same */
2240     ims, /* the current ims flags */
2241     rlevel, /* function recursion level */
2242     recursing); /* pass on regex recursion */
2243    
2244     if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2245     { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
2246     }
2247     break;
2248    
2249     /*-----------------------------------------------------------------*/
2250     case OP_COND:
2251 nigel 93 case OP_SCOND:
2252 nigel 77 {
2253     int local_offsets[1000];
2254     int local_workspace[1000];
2255 ph10 406 int codelink = GET(code, 1);
2256 ph10 397 int condcode;
2257 ph10 406
2258 ph10 397 /* Because of the way auto-callout works during compile, a callout item
2259 ph10 406 is inserted between OP_COND and an assertion condition. This does not
2260 ph10 398 happen for the other conditions. */
2261 nigel 77
2262 ph10 397 if (code[LINK_SIZE+1] == OP_CALLOUT)
2263 ph10 406 {
2264     rrc = 0;
2265 ph10 397 if (pcre_callout != NULL)
2266     {
2267     pcre_callout_block cb;
2268     cb.version = 1; /* Version 1 of the callout block */
2269     cb.callout_number = code[LINK_SIZE+2];
2270     cb.offset_vector = offsets;
2271     cb.subject = (PCRE_SPTR)start_subject;
2272     cb.subject_length = end_subject - start_subject;
2273     cb.start_match = current_subject - start_subject;
2274     cb.current_position = ptr - start_subject;
2275     cb.pattern_position = GET(code, LINK_SIZE + 3);
2276     cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2277     cb.capture_top = 1;
2278     cb.capture_last = -1;
2279     cb.callout_data = md->callout_data;
2280     if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */
2281     }
2282 ph10 398 if (rrc > 0) break; /* Fail this thread */
2283     code += _pcre_OP_lengths[OP_CALLOUT]; /* Skip callout data */
2284 ph10 406 }
2285 ph10 398
2286 ph10 397 condcode = code[LINK_SIZE+1];
2287 ph10 406
2288 nigel 93 /* Back reference conditions are not supported */
2289 nigel 77
2290 ph10 461 if (condcode == OP_CREF || condcode == OP_NCREF)
2291 ph10 459 return PCRE_ERROR_DFA_UCOND;
2292 nigel 93
2293     /* The DEFINE condition is always false */
2294    
2295     if (condcode == OP_DEF)
2296 ph10 398 { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2297 nigel 93
2298     /* The only supported version of OP_RREF is for the value RREF_ANY,
2299     which means "test if in any recursion". We can't test for specifically
2300     recursed groups. */
2301    
2302 ph10 459 else if (condcode == OP_RREF || condcode == OP_NRREF)
2303 nigel 93 {
2304 nigel 77 int value = GET2(code, LINK_SIZE+2);
2305 nigel 93 if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2306 ph10 406 if (recursing > 0)
2307 ph10 398 { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2308     else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2309 nigel 77 }
2310    
2311     /* Otherwise, the condition is an assertion */
2312    
2313     else
2314     {
2315     int rc;
2316     const uschar *asscode = code + LINK_SIZE + 1;
2317     const uschar *endasscode = asscode + GET(asscode, 1);
2318    
2319     while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2320    
2321     rc = internal_dfa_exec(
2322     md, /* fixed match data */
2323     asscode, /* this subexpression's code */
2324     ptr, /* where we currently are */
2325     ptr - start_subject, /* start offset */
2326     local_offsets, /* offset vector */
2327     sizeof(local_offsets)/sizeof(int), /* size of same */
2328     local_workspace, /* workspace vector */
2329     sizeof(local_workspace)/sizeof(int), /* size of same */
2330     ims, /* the current ims flags */
2331     rlevel, /* function recursion level */
2332     recursing); /* pass on regex recursion */
2333    
2334     if ((rc >= 0) ==
2335     (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2336 ph10 398 { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
2337 nigel 77 else
2338 ph10 397 { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2339 nigel 77 }
2340     }
2341     break;
2342    
2343     /*-----------------------------------------------------------------*/
2344     case OP_RECURSE:
2345     {
2346     int local_offsets[1000];
2347     int local_workspace[1000];
2348     int rc;
2349    
2350     DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,
2351     recursing + 1));
2352    
2353     rc = internal_dfa_exec(
2354     md, /* fixed match data */
2355     start_code + GET(code, 1), /* this subexpression's code */
2356     ptr, /* where we currently are */
2357     ptr - start_subject, /* start offset */
2358     local_offsets, /* offset vector */
2359     sizeof(local_offsets)/sizeof(int), /* size of same */
2360     local_workspace, /* workspace vector */
2361     sizeof(local_workspace)/sizeof(int), /* size of same */
2362     ims, /* the current ims flags */
2363     rlevel, /* function recursion level */
2364     recursing + 1); /* regex recurse level */
2365    
2366     DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,
2367     recursing + 1, rc));
2368    
2369     /* Ran out of internal offsets */
2370    
2371     if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
2372    
2373     /* For each successful matched substring, set up the next state with a
2374     count of characters to skip before trying it. Note that the count is in
2375     characters, not bytes. */
2376    
2377     if (rc > 0)
2378     {
2379     for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2380     {
2381     const uschar *p = start_subject + local_offsets[rc];
2382     const uschar *pp = start_subject + local_offsets[rc+1];
2383     int charcount = local_offsets[rc+1] - local_offsets[rc];
2384     while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
2385     if (charcount > 0)
2386     {
2387     ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
2388     }
2389     else
2390     {
2391     ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
2392     }
2393     }
2394     }
2395     else if (rc != PCRE_ERROR_NOMATCH) return rc;
2396     }
2397     break;
2398    
2399     /*-----------------------------------------------------------------*/
2400     case OP_ONCE:
2401     {
2402     int local_offsets[2];
2403     int local_workspace[1000];
2404    
2405     int rc = internal_dfa_exec(
2406     md, /* fixed match data */
2407     code, /* this subexpression's code */
2408     ptr, /* where we currently are */
2409     ptr - start_subject, /* start offset */
2410     local_offsets, /* offset vector */
2411     sizeof(local_offsets)/sizeof(int), /* size of same */
2412     local_workspace, /* workspace vector */
2413     sizeof(local_workspace)/sizeof(int), /* size of same */
2414     ims, /* the current ims flags */
2415     rlevel, /* function recursion level */
2416     recursing); /* pass on regex recursion */
2417    
2418     if (rc >= 0)
2419     {
2420     const uschar *end_subpattern = code;
2421     int charcount = local_offsets[1] - local_offsets[0];
2422     int next_state_offset, repeat_state_offset;
2423    
2424     do { end_subpattern += GET(end_subpattern, 1); }
2425     while (*end_subpattern == OP_ALT);
2426     next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;
2427    
2428     /* If the end of this subpattern is KETRMAX or KETRMIN, we must
2429     arrange for the repeat state also to be added to the relevant list.
2430     Calculate the offset, or set -1 for no repeat. */
2431    
2432     repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
2433     *end_subpattern == OP_KETRMIN)?
2434     end_subpattern - start_code - GET(end_subpattern, 1) : -1;
2435    
2436     /* If we have matched an empty string, add the next state at the
2437     current character pointer. This is important so that the duplicate
2438     checking kicks in, which is what breaks infinite loops that match an
2439     empty string. */
2440    
2441     if (charcount == 0)
2442     {
2443     ADD_ACTIVE(next_state_offset, 0);
2444     }
2445    
2446     /* Optimization: if there are no more active states, and there
2447     are no new states yet set up, then skip over the subject string
2448     right here, to save looping. Otherwise, set up the new state to swing
2449     into action when the end of the substring is reached. */
2450    
2451     else if (i + 1 >= active_count && new_count == 0)
2452     {
2453     ptr += charcount;
2454     clen = 0;
2455     ADD_NEW(next_state_offset, 0);
2456    
2457     /* If we are adding a repeat state at the new character position,
2458     we must fudge things so that it is the only current state.
2459     Otherwise, it might be a duplicate of one we processed before, and
2460     that would cause it to be skipped. */
2461    
2462     if (repeat_state_offset >= 0)
2463     {
2464     next_active_state = active_states;
2465     active_count = 0;
2466     i = -1;
2467     ADD_ACTIVE(repeat_state_offset, 0);
2468     }
2469     }
2470     else
2471     {
2472     const uschar *p = start_subject + local_offsets[0];
2473     const uschar *pp = start_subject + local_offsets[1];
2474     while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
2475     ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2476     if (repeat_state_offset >= 0)
2477     { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
2478     }
2479    
2480     }
2481     else if (rc != PCRE_ERROR_NOMATCH) return rc;
2482     }
2483     break;
2484    
2485    
2486     /* ========================================================================== */
2487     /* Handle callouts */
2488    
2489     case OP_CALLOUT:
2490 ph10 406 rrc = 0;
2491 nigel 77 if (pcre_callout != NULL)
2492     {
2493     pcre_callout_block cb;
2494     cb.version = 1; /* Version 1 of the callout block */
2495     cb.callout_number = code[1];
2496     cb.offset_vector = offsets;
2497 nigel 87 cb.subject = (PCRE_SPTR)start_subject;
2498 nigel 77 cb.subject_length = end_subject - start_subject;
2499     cb.start_match = current_subject - start_subject;
2500     cb.current_position = ptr - start_subject;
2501     cb.pattern_position = GET(code, 2);
2502     cb.next_item_length = GET(code, 2 + LINK_SIZE);
2503     cb.capture_top = 1;
2504     cb.capture_last = -1;
2505     cb.callout_data = md->callout_data;
2506     if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */
2507 ph10 406 }
2508     if (rrc == 0)
2509     { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
2510 nigel 77 break;
2511    
2512    
2513     /* ========================================================================== */
2514     default: /* Unsupported opcode */
2515     return PCRE_ERROR_DFA_UITEM;
2516     }
2517    
2518     NEXT_ACTIVE_STATE: continue;
2519    
2520     } /* End of loop scanning active states */
2521    
2522     /* We have finished the processing at the current subject character. If no
2523     new states have been set for the next character, we have found all the
2524     matches that we are going to find. If we are at the top level and partial
2525 ph10 428 matching has been requested, check for appropriate conditions. The "forced_
2526     fail" variable counts the number of (*F) encountered for the character. If it
2527     is equal to the original active_count (saved in workspace[1]) it means that
2528     (*F) was found on every active state. In this case we don't want to give a
2529     partial match. */
2530 nigel 77
2531     if (new_count <= 0)
2532     {
2533 ph10 427 if (rlevel == 1 && /* Top level, and */
2534 ph10 461 reached_end != workspace[1] && /* Not all reached end */
2535 ph10 428 forced_fail != workspace[1] && /* Not all forced fail & */
2536 ph10 427 ( /* either... */
2537     (md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
2538     || /* or... */
2539     ((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
2540     match_count < 0) /* no matches */
2541     ) && /* And... */
2542     ptr >= end_subject && /* Reached end of subject */
2543     ptr > current_subject) /* Matched non-empty string */
2544 nigel 77 {
2545     if (offsetcount >= 2)
2546     {
2547 ph10 435 offsets[0] = md->start_used_ptr - start_subject;
2548 nigel 77 offsets[1] = end_subject - start_subject;
2549     }
2550     match_count = PCRE_ERROR_PARTIAL;
2551     }
2552    
2553     DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
2554     "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
2555     rlevel*2-2, SP));
2556 nigel 91 break; /* In effect, "return", but see the comment below */
2557 nigel 77 }
2558    
2559     /* One or more states are active for the next character. */
2560    
2561     ptr += clen; /* Advance to next subject character */
2562     } /* Loop to move along the subject string */
2563    
2564 nigel 91 /* Control gets here from "break" a few lines above. We do it this way because
2565     if we use "return" above, we have compiler trouble. Some compilers warn if
2566     there's nothing here because they think the function doesn't return a value. On
2567     the other hand, if we put a dummy statement here, some more clever compilers
2568     complain that it can't be reached. Sigh. */
2569 nigel 77
2570 nigel 91 return match_count;
2571 nigel 77 }
2572    
2573    
2574    
2575    
2576     /*************************************************
2577     * Execute a Regular Expression - DFA engine *
2578     *************************************************/
2579    
2580     /* This external function applies a compiled re to a subject string using a DFA
2581     engine. This function calls the internal function multiple times if the pattern
2582     is not anchored.
2583    
2584     Arguments:
2585     argument_re points to the compiled expression
2586 ph10 97 extra_data points to extra data or is NULL
2587 nigel 77 subject points to the subject string
2588     length length of subject string (may contain binary zeros)
2589     start_offset where to start in the subject string
2590     options option bits
2591     offsets vector of match offsets
2592     offsetcount size of same
2593     workspace workspace vector
2594     wscount size of same
2595    
2596     Returns: > 0 => number of match offset pairs placed in offsets
2597     = 0 => offsets overflowed; longest matches are present
2598     -1 => failed to match
2599     < -1 => some kind of unexpected problem
2600     */
2601    
2602 ph10 359 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
2603 nigel 77 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2604     const char *subject, int length, int start_offset, int options, int *offsets,
2605     int offsetcount, int *workspace, int wscount)
2606     {
2607     real_pcre *re = (real_pcre *)argument_re;
2608     dfa_match_data match_block;
2609 nigel 91 dfa_match_data *md = &match_block;
2610 nigel 77 BOOL utf8, anchored, startline, firstline;
2611     const uschar *current_subject, *end_subject, *lcc;
2612    
2613     pcre_study_data internal_study;
2614     const pcre_study_data *study = NULL;
2615     real_pcre internal_re;
2616    
2617     const uschar *req_byte_ptr;
2618     const uschar *start_bits = NULL;
2619     BOOL first_byte_caseless = FALSE;
2620     BOOL req_byte_caseless = FALSE;
2621     int first_byte = -1;
2622     int req_byte = -1;
2623     int req_byte2 = -1;
2624 nigel 91 int newline;
2625 nigel 77
2626     /* Plausibility checks */
2627    
2628     if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
2629     if (re == NULL || subject == NULL || workspace == NULL ||
2630     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
2631     if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
2632     if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
2633    
2634     /* We need to find the pointer to any study data before we test for byte
2635     flipping, so we scan the extra_data block first. This may set two fields in the
2636     match block, so we must initialize them beforehand. However, the other fields
2637     in the match block must not be set until after the byte flipping. */
2638    
2639 nigel 91 md->tables = re->tables;
2640     md->callout_data = NULL;
2641 nigel 77
2642     if (extra_data != NULL)
2643     {
2644     unsigned int flags = extra_data->flags;
2645     if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
2646     study = (const pcre_study_data *)extra_data->study_data;
2647     if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
2648 nigel 87 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
2649     return PCRE_ERROR_DFA_UMLIMIT;
2650 nigel 77 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
2651 nigel 91 md->callout_data = extra_data->callout_data;
2652 nigel 77 if ((flags & PCRE_EXTRA_TABLES) != 0)
2653 nigel 91 md->tables = extra_data->tables;
2654 nigel 77 }
2655 ph10 461
2656 nigel 77 /* Check that the first field in the block is the magic number. If it is not,
2657     test for a regex that was compiled on a host of opposite endianness. If this is
2658     the case, flipped values are put in internal_re and internal_study if there was
2659     study data too. */
2660    
2661     if (re->magic_number != MAGIC_NUMBER)
2662     {
2663     re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
2664     if (re == NULL) return PCRE_ERROR_BADMAGIC;
2665     if (study != NULL) study = &internal_study;
2666     }
2667    
2668     /* Set some local values */
2669    
2670     current_subject = (const unsigned char *)subject + start_offset;
2671     end_subject = (const unsigned char *)subject + length;
2672     req_byte_ptr = current_subject - 1;
2673    
2674 nigel 91 #ifdef SUPPORT_UTF8
2675 nigel 77 utf8 = (re->options & PCRE_UTF8) != 0;
2676 nigel 91 #else
2677     utf8 = FALSE;
2678     #endif
2679 nigel 77
2680 nigel 87 anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
2681     (re->options & PCRE_ANCHORED) != 0;
2682    
2683 nigel 77 /* The remaining fixed data for passing around. */
2684    
2685 nigel 91 md->start_code = (const uschar *)argument_re +
2686 nigel 77 re->name_table_offset + re->name_count * re->name_entry_size;
2687 nigel 91 md->start_subject = (const unsigned char *)subject;
2688     md->end_subject = end_subject;
2689 ph10 442 md->start_offset = start_offset;
2690 nigel 91 md->moptions = options;
2691     md->poptions = re->options;
2692 nigel 77
2693 ph10 231 /* If the BSR option is not set at match time, copy what was set
2694     at compile time. */
2695    
2696     if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2697     {
2698     if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2699     md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2700     #ifdef BSR_ANYCRLF
2701     else md->moptions |= PCRE_BSR_ANYCRLF;
2702 ph10 243 #endif
2703     }
2704 ph10 231
2705 ph10 97 /* Handle different types of newline. The three bits give eight cases. If
2706     nothing is set at run time, whatever was used at compile time applies. */
2707 nigel 91
2708 ph10 144 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2709 nigel 93 PCRE_NEWLINE_BITS)
2710 nigel 91 {
2711 nigel 93 case 0: newline = NEWLINE; break; /* Compile-time default */
2712 ph10 391 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
2713     case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
2714 nigel 91 case PCRE_NEWLINE_CR+
2715 ph10 391 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
2716 nigel 93 case PCRE_NEWLINE_ANY: newline = -1; break;
2717 ph10 150 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2718 nigel 93 default: return PCRE_ERROR_BADNEWLINE;
2719 nigel 91 }
2720    
2721 ph10 149 if (newline == -2)
2722 nigel 91 {
2723 ph10 149 md->nltype = NLTYPE_ANYCRLF;
2724     }
2725     else if (newline < 0)
2726     {
2727 nigel 93 md->nltype = NLTYPE_ANY;
2728 nigel 91 }
2729     else
2730     {
2731 nigel 93 md->nltype = NLTYPE_FIXED;
2732     if (newline > 255)
2733     {
2734     md->nllen = 2;
2735     md->nl[0] = (newline >> 8) & 255;
2736     md->nl[1] = newline & 255;
2737     }
2738     else
2739     {
2740     md->nllen = 1;
2741     md->nl[0] = newline;
2742     }
2743 nigel 91 }
2744    
2745 nigel 77 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
2746     back the character offset. */
2747    
2748     #ifdef SUPPORT_UTF8
2749     if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
2750     {
2751     if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
2752     return PCRE_ERROR_BADUTF8;
2753     if (start_offset > 0 && start_offset < length)
2754     {
2755     int tb = ((uschar *)subject)[start_offset];
2756     if (tb > 127)
2757     {
2758     tb &= 0xc0;
2759     if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
2760     }
2761     }
2762     }
2763     #endif
2764    
2765     /* If the exec call supplied NULL for tables, use the inbuilt ones. This
2766     is a feature that makes it possible to save compiled regex and re-use them
2767     in other programs later. */
2768    
2769 nigel 91 if (md->tables == NULL) md->tables = _pcre_default_tables;
2770 nigel 77
2771     /* The lower casing table and the "must be at the start of a line" flag are
2772     used in a loop when finding where to start. */
2773    
2774 nigel 91 lcc = md->tables + lcc_offset;
2775 ph10 230 startline = (re->flags & PCRE_STARTLINE) != 0;
2776 nigel 77 firstline = (re->options & PCRE_FIRSTLINE) != 0;
2777    
2778     /* Set up the first character to match, if available. The first_byte value is
2779     never set for an anchored regular expression, but the anchoring may be forced
2780     at run time, so we have to test for anchoring. The first char may be unset for
2781     an unanchored pattern, of course. If there's no first char and the pattern was
2782     studied, there may be a bitmap of possible first characters. */
2783    
2784     if (!anchored)
2785     {
2786 ph10 230 if ((re->flags & PCRE_FIRSTSET) != 0)
2787 nigel 77 {
2788     first_byte = re->first_byte & 255;
2789     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
2790     first_byte = lcc[first_byte];
2791     }
2792     else
2793     {
2794 ph10 455 if (!startline && study != NULL &&
2795     (study->flags & PCRE_STUDY_MAPPED) != 0)
2796 nigel 77 start_bits = study->start_bits;
2797     }
2798     }
2799    
2800     /* For anchored or unanchored matches, there may be a "last known required
2801     character" set. */
2802    
2803 ph10 230 if ((re->flags & PCRE_REQCHSET) != 0)
2804 nigel 77 {
2805     req_byte = re->req_byte & 255;
2806     req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
2807 nigel 91 req_byte2 = (md->tables + fcc_offset)[req_byte]; /* case flipped */
2808 nigel 77 }
2809    
2810     /* Call the main matching function, looping for a non-anchored regex after a
2811 ph10 389 failed match. If not restarting, perform certain optimizations at the start of
2812     a match. */
2813 nigel 77
2814     for (;;)
2815     {
2816     int rc;
2817    
2818     if ((options & PCRE_DFA_RESTART) == 0)
2819     {
2820     const uschar *save_end_subject = end_subject;
2821    
2822 ph10 389 /* If firstline is TRUE, the start of the match is constrained to the first
2823     line of a multiline string. Implement this by temporarily adjusting
2824     end_subject so that we stop scanning at a newline. If the match fails at
2825     the newline, later code breaks this loop. */
2826 nigel 77
2827     if (firstline)
2828     {
2829 ph10 365 USPTR t = current_subject;
2830     #ifdef SUPPORT_UTF8
2831     if (utf8)
2832 ph10 371 {
2833     while (t < md->end_subject && !IS_NEWLINE(t))
2834 ph10 365 {
2835     t++;
2836     while (t < end_subject && (*t & 0xc0) == 0x80) t++;
2837 ph10 371 }
2838 ph10 365 }
2839     else
2840 ph10 371 #endif
2841 nigel 93 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2842 nigel 77 end_subject = t;
2843     }
2844 ph10 392
2845 ph10 389 /* There are some optimizations that avoid running the match if a known
2846 ph10 455 starting point is not found. However, there is an option that disables
2847     these, for testing and for ensuring that all callouts do actually occur. */
2848 nigel 77
2849 ph10 389 if ((options & PCRE_NO_START_OPTIMIZE) == 0)
2850 ph10 392 {
2851 ph10 389 /* Advance to a known first byte. */
2852 ph10 392
2853 ph10 389 if (first_byte >= 0)
2854 nigel 77 {
2855 ph10 389 if (first_byte_caseless)
2856     while (current_subject < end_subject &&
2857     lcc[*current_subject] != first_byte)
2858     current_subject++;
2859     else
2860 ph10 392 while (current_subject < end_subject &&
2861 ph10 389 *current_subject != first_byte)
2862     current_subject++;
2863     }
2864 ph10 392
2865 ph10 389 /* Or to just after a linebreak for a multiline match if possible */
2866 ph10 392
2867 ph10 389 else if (startline)
2868     {
2869     if (current_subject > md->start_subject + start_offset)
2870     {
2871 ph10 365 #ifdef SUPPORT_UTF8
2872 ph10 389 if (utf8)
2873 ph10 365 {
2874 ph10 392 while (current_subject < end_subject &&
2875 ph10 389 !WAS_NEWLINE(current_subject))
2876     {
2877 ph10 365 current_subject++;
2878 ph10 389 while(current_subject < end_subject &&
2879     (*current_subject & 0xc0) == 0x80)
2880     current_subject++;
2881     }
2882 ph10 371 }
2883 ph10 389 else
2884     #endif
2885     while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
2886     current_subject++;
2887 ph10 392
2888 ph10 389 /* If we have just passed a CR and the newline option is ANY or
2889     ANYCRLF, and we are now at a LF, advance the match position by one
2890     more character. */
2891 ph10 392
2892 ph10 391 if (current_subject[-1] == CHAR_CR &&
2893 ph10 389 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2894     current_subject < end_subject &&
2895 ph10 391 *current_subject == CHAR_NL)
2896 ph10 389 current_subject++;
2897 ph10 365 }
2898 nigel 77 }
2899 ph10 392
2900 ph10 389 /* Or to a non-unique first char after study */
2901 ph10 392
2902 ph10 389 else if (start_bits != NULL)
2903 nigel 77 {
2904 ph10 389 while (current_subject < end_subject)
2905     {
2906     register unsigned int c = *current_subject;
2907     if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
2908     else break;
2909     }
2910 nigel 77 }
2911 ph10 392 }
2912 nigel 77
2913     /* Restore fudged end_subject */
2914    
2915     end_subject = save_end_subject;
2916    
2917 ph10 461 /* The following two optimizations are disabled for partial matching or if
2918     disabling is explicitly requested (and of course, by the test above, this
2919 ph10 455 code is not obeyed when restarting after a partial match). */
2920 ph10 461
2921 ph10 455 if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
2922     (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
2923 ph10 461 {
2924 ph10 455 /* If the pattern was studied, a minimum subject length may be set. This
2925     is a lower bound; no actual string of that length may actually match the
2926     pattern. Although the value is, strictly, in characters, we treat it as
2927     bytes to avoid spending too much time in this optimization. */
2928 nigel 77
2929 ph10 455 if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
2930     end_subject - current_subject < study->minlength)
2931     return PCRE_ERROR_NOMATCH;
2932 ph10 461
2933 ph10 455 /* If req_byte is set, we know that that character must appear in the
2934     subject for the match to succeed. If the first character is set, req_byte
2935     must be later in the subject; otherwise the test starts at the match
2936     point. This optimization can save a huge amount of work in patterns with
2937     nested unlimited repeats that aren't going to match. Writing separate
2938     code for cased/caseless versions makes it go faster, as does using an
2939     autoincrement and backing off on a match.
2940 ph10 461
2941 ph10 455 HOWEVER: when the subject string is very, very long, searching to its end
2942     can take a long time, and give bad performance on quite ordinary
2943     patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
2944     string... so we don't do this when the string is sufficiently long. */
2945 ph10 461
2946 ph10 455 if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
2947 nigel 77 {
2948 ph10 455 register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
2949 ph10 461
2950 ph10 455 /* We don't need to repeat the search if we haven't yet reached the
2951     place we found it at last time. */
2952 ph10 461
2953 ph10 455 if (p > req_byte_ptr)
2954 nigel 77 {
2955 ph10 455 if (req_byte_caseless)
2956     {
2957     while (p < end_subject)
2958     {
2959     register int pp = *p++;
2960     if (pp == req_byte || pp == req_byte2) { p--; break; }
2961     }
2962     }
2963     else
2964     {
2965     while (p < end_subject)
2966     {
2967     if (*p++ == req_byte) { p--; break; }
2968     }
2969     }
2970 ph10 461
2971 ph10 455 /* If we can't find the required character, break the matching loop,
2972     which will cause a return or PCRE_ERROR_NOMATCH. */
2973 ph10 461
2974 ph10 455 if (p >= end_subject) break;
2975 ph10 461
2976 ph10 455 /* If we have found the required character, save the point where we
2977     found it, so that we don't search again next time round the loop if
2978     the start hasn't passed this character yet. */
2979 ph10 461
2980 ph10 455 req_byte_ptr = p;
2981 nigel 77 }
2982 ph10 461 }
2983 nigel 77 }
2984 ph10 455 } /* End of optimizations that are done when not restarting */
2985 nigel 77
2986     /* OK, now we can do the business */
2987    
2988 ph10 435 md->start_used_ptr = current_subject;
2989 ph10 461
2990 nigel 77 rc = internal_dfa_exec(
2991 nigel 91 md, /* fixed match data */
2992     md->start_code, /* this subexpression's code */
2993     current_subject, /* where we currently are */
2994     start_offset, /* start offset in subject */
2995     offsets, /* offset vector */
2996     offsetcount, /* size of same */
2997     workspace, /* workspace vector */
2998     wscount, /* size of same */
2999 nigel 77 re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
3000 nigel 91 0, /* function recurse level */
3001     0); /* regex recurse level */
3002 nigel 77
3003     /* Anything other than "no match" means we are done, always; otherwise, carry
3004     on only if not anchored. */
3005    
3006     if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
3007    
3008     /* Advance to the next subject character unless we are at the end of a line
3009     and firstline is set. */
3010    
3011 nigel 93 if (firstline && IS_NEWLINE(current_subject)) break;
3012 nigel 77 current_subject++;
3013     if (utf8)
3014     {
3015     while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
3016     current_subject++;
3017     }
3018     if (current_subject > end_subject) break;
3019    
3020 ph10 227 /* If we have just passed a CR and we are now at a LF, and the pattern does
3021 ph10 226 not contain any explicit matches for \r or \n, and the newline option is CRLF
3022     or ANY or ANYCRLF, advance the match position by one more character. */
3023 nigel 93
3024 ph10 391 if (current_subject[-1] == CHAR_CR &&
3025 ph10 226 current_subject < end_subject &&
3026 ph10 391 *current_subject == CHAR_NL &&
3027 ph10 230 (re->flags & PCRE_HASCRORLF) == 0 &&
3028 ph10 226 (md->nltype == NLTYPE_ANY ||
3029     md->nltype == NLTYPE_ANYCRLF ||
3030     md->nllen == 2))
3031 nigel 93 current_subject++;
3032    
3033     } /* "Bumpalong" loop */
3034    
3035 nigel 77 return PCRE_ERROR_NOMATCH;
3036     }
3037    
3038     /* End of pcre_dfa_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12