/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 545 - (show annotations) (download)
Wed Jun 16 10:51:15 2010 UTC (4 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 111930 byte(s)
Tidyup for 8.10-RC2 test release.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language (but see
7 below for why this module is different).
8
9 Written by Philip Hazel
10 Copyright (c) 1997-2010 University of Cambridge
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41
42 /* This module contains the external function pcre_dfa_exec(), which is an
43 alternative matching function that uses a sort of DFA algorithm (not a true
44 FSM). This is NOT Perl- compatible, but it has advantages in certain
45 applications. */
46
47
48 /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
49 the performance of his patterns greatly. I could not use it as it stood, as it
50 was not thread safe, and made assumptions about pattern sizes. Also, it caused
51 test 7 to loop, and test 9 to crash with a segfault.
52
53 The issue is the check for duplicate states, which is done by a simple linear
54 search up the state list. (Grep for "duplicate" below to find the code.) For
55 many patterns, there will never be many states active at one time, so a simple
56 linear search is fine. In patterns that have many active states, it might be a
57 bottleneck. The suggested code used an indexing scheme to remember which states
58 had previously been used for each character, and avoided the linear search when
59 it knew there was no chance of a duplicate. This was implemented when adding
60 states to the state lists.
61
62 I wrote some thread-safe, not-limited code to try something similar at the time
63 of checking for duplicates (instead of when adding states), using index vectors
64 on the stack. It did give a 13% improvement with one specially constructed
65 pattern for certain subject strings, but on other strings and on many of the
66 simpler patterns in the test suite it did worse. The major problem, I think,
67 was the extra time to initialize the index. This had to be done for each call
68 of internal_dfa_exec(). (The supplied patch used a static vector, initialized
69 only once - I suspect this was the cause of the problems with the tests.)
70
71 Overall, I concluded that the gains in some cases did not outweigh the losses
72 in others, so I abandoned this code. */
73
74
75
76 #ifdef HAVE_CONFIG_H
77 #include "config.h"
78 #endif
79
80 #define NLBLOCK md /* Block containing newline information */
81 #define PSSTART start_subject /* Field containing processed string start */
82 #define PSEND end_subject /* Field containing processed string end */
83
84 #include "pcre_internal.h"
85
86
87 /* For use to indent debugging output */
88
89 #define SP " "
90
91
92 /*************************************************
93 * Code parameters and static tables *
94 *************************************************/
95
96 /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
97 into others, under special conditions. A gap of 20 between the blocks should be
98 enough. The resulting opcodes don't have to be less than 256 because they are
99 never stored, so we push them well clear of the normal opcodes. */
100
101 #define OP_PROP_EXTRA 300
102 #define OP_EXTUNI_EXTRA 320
103 #define OP_ANYNL_EXTRA 340
104 #define OP_HSPACE_EXTRA 360
105 #define OP_VSPACE_EXTRA 380
106
107
108 /* This table identifies those opcodes that are followed immediately by a
109 character that is to be tested in some way. This makes it possible to
110 centralize the loading of these characters. In the case of Type * etc, the
111 "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
112 small value. Non-zero values in the table are the offsets from the opcode where
113 the character is to be found. ***NOTE*** If the start of this table is
114 modified, the three tables that follow must also be modified. */
115
116 static const uschar coptable[] = {
117 0, /* End */
118 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
119 0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */
120 0, 0, 0, /* Any, AllAny, Anybyte */
121 0, 0, /* \P, \p */
122 0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */
123 0, /* \X */
124 0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
125 1, /* Char */
126 1, /* Charnc */
127 1, /* not */
128 /* Positive single-char repeats */
129 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
130 3, 3, 3, /* upto, minupto, exact */
131 1, 1, 1, 3, /* *+, ++, ?+, upto+ */
132 /* Negative single-char repeats - only for chars < 256 */
133 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
134 3, 3, 3, /* NOT upto, minupto, exact */
135 1, 1, 1, 3, /* NOT *+, ++, ?+, updo+ */
136 /* Positive type repeats */
137 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
138 3, 3, 3, /* Type upto, minupto, exact */
139 1, 1, 1, 3, /* Type *+, ++, ?+, upto+ */
140 /* Character class & ref repeats */
141 0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
142 0, 0, /* CRRANGE, CRMINRANGE */
143 0, /* CLASS */
144 0, /* NCLASS */
145 0, /* XCLASS - variable length */
146 0, /* REF */
147 0, /* RECURSE */
148 0, /* CALLOUT */
149 0, /* Alt */
150 0, /* Ket */
151 0, /* KetRmax */
152 0, /* KetRmin */
153 0, /* Assert */
154 0, /* Assert not */
155 0, /* Assert behind */
156 0, /* Assert behind not */
157 0, /* Reverse */
158 0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */
159 0, 0, 0, /* SBRA, SCBRA, SCOND */
160 0, 0, /* CREF, NCREF */
161 0, 0, /* RREF, NRREF */
162 0, /* DEF */
163 0, 0, /* BRAZERO, BRAMINZERO */
164 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG, */
165 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG, */
166 0, 0, 0, 0, 0 /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO */
167 };
168
169 /* This table identifies those opcodes that inspect a character. It is used to
170 remember the fact that a character could have been inspected when the end of
171 the subject is reached. ***NOTE*** If the start of this table is modified, the
172 two tables that follow must also be modified. */
173
174 static const uschar poptable[] = {
175 0, /* End */
176 0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */
177 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
178 1, 1, 1, /* Any, AllAny, Anybyte */
179 1, 1, /* \P, \p */
180 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
181 1, /* \X */
182 0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
183 1, /* Char */
184 1, /* Charnc */
185 1, /* not */
186 /* Positive single-char repeats */
187 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
188 1, 1, 1, /* upto, minupto, exact */
189 1, 1, 1, 1, /* *+, ++, ?+, upto+ */
190 /* Negative single-char repeats - only for chars < 256 */
191 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
192 1, 1, 1, /* NOT upto, minupto, exact */
193 1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */
194 /* Positive type repeats */
195 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
196 1, 1, 1, /* Type upto, minupto, exact */
197 1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */
198 /* Character class & ref repeats */
199 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
200 1, 1, /* CRRANGE, CRMINRANGE */
201 1, /* CLASS */
202 1, /* NCLASS */
203 1, /* XCLASS - variable length */
204 0, /* REF */
205 0, /* RECURSE */
206 0, /* CALLOUT */
207 0, /* Alt */
208 0, /* Ket */
209 0, /* KetRmax */
210 0, /* KetRmin */
211 0, /* Assert */
212 0, /* Assert not */
213 0, /* Assert behind */
214 0, /* Assert behind not */
215 0, /* Reverse */
216 0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */
217 0, 0, 0, /* SBRA, SCBRA, SCOND */
218 0, 0, /* CREF, NCREF */
219 0, 0, /* RREF, NRREF */
220 0, /* DEF */
221 0, 0, /* BRAZERO, BRAMINZERO */
222 0, 0, 0, /* MARK, PRUNE, PRUNE_ARG, */
223 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG, */
224 0, 0, 0, 0, 0 /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO */
225 };
226
227 /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
228 and \w */
229
230 static const uschar toptable1[] = {
231 0, 0, 0, 0, 0, 0,
232 ctype_digit, ctype_digit,
233 ctype_space, ctype_space,
234 ctype_word, ctype_word,
235 0, 0 /* OP_ANY, OP_ALLANY */
236 };
237
238 static const uschar toptable2[] = {
239 0, 0, 0, 0, 0, 0,
240 ctype_digit, 0,
241 ctype_space, 0,
242 ctype_word, 0,
243 1, 1 /* OP_ANY, OP_ALLANY */
244 };
245
246
247 /* Structure for holding data about a particular state, which is in effect the
248 current data for an active path through the match tree. It must consist
249 entirely of ints because the working vector we are passed, and which we put
250 these structures in, is a vector of ints. */
251
252 typedef struct stateblock {
253 int offset; /* Offset to opcode */
254 int count; /* Count for repeats */
255 int ims; /* ims flag bits */
256 int data; /* Some use extra data */
257 } stateblock;
258
259 #define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
260
261
262 #ifdef PCRE_DEBUG
263 /*************************************************
264 * Print character string *
265 *************************************************/
266
267 /* Character string printing function for debugging.
268
269 Arguments:
270 p points to string
271 length number of bytes
272 f where to print
273
274 Returns: nothing
275 */
276
277 static void
278 pchars(unsigned char *p, int length, FILE *f)
279 {
280 int c;
281 while (length-- > 0)
282 {
283 if (isprint(c = *(p++)))
284 fprintf(f, "%c", c);
285 else
286 fprintf(f, "\\x%02x", c);
287 }
288 }
289 #endif
290
291
292
293 /*************************************************
294 * Execute a Regular Expression - DFA engine *
295 *************************************************/
296
297 /* This internal function applies a compiled pattern to a subject string,
298 starting at a given point, using a DFA engine. This function is called from the
299 external one, possibly multiple times if the pattern is not anchored. The
300 function calls itself recursively for some kinds of subpattern.
301
302 Arguments:
303 md the match_data block with fixed information
304 this_start_code the opening bracket of this subexpression's code
305 current_subject where we currently are in the subject string
306 start_offset start offset in the subject string
307 offsets vector to contain the matching string offsets
308 offsetcount size of same
309 workspace vector of workspace
310 wscount size of same
311 ims the current ims flags
312 rlevel function call recursion level
313 recursing regex recursive call level
314
315 Returns: > 0 => number of match offset pairs placed in offsets
316 = 0 => offsets overflowed; longest matches are present
317 -1 => failed to match
318 < -1 => some kind of unexpected problem
319
320 The following macros are used for adding states to the two state vectors (one
321 for the current character, one for the following character). */
322
323 #define ADD_ACTIVE(x,y) \
324 if (active_count++ < wscount) \
325 { \
326 next_active_state->offset = (x); \
327 next_active_state->count = (y); \
328 next_active_state->ims = ims; \
329 next_active_state++; \
330 DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
331 } \
332 else return PCRE_ERROR_DFA_WSSIZE
333
334 #define ADD_ACTIVE_DATA(x,y,z) \
335 if (active_count++ < wscount) \
336 { \
337 next_active_state->offset = (x); \
338 next_active_state->count = (y); \
339 next_active_state->ims = ims; \
340 next_active_state->data = (z); \
341 next_active_state++; \
342 DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
343 } \
344 else return PCRE_ERROR_DFA_WSSIZE
345
346 #define ADD_NEW(x,y) \
347 if (new_count++ < wscount) \
348 { \
349 next_new_state->offset = (x); \
350 next_new_state->count = (y); \
351 next_new_state->ims = ims; \
352 next_new_state++; \
353 DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
354 } \
355 else return PCRE_ERROR_DFA_WSSIZE
356
357 #define ADD_NEW_DATA(x,y,z) \
358 if (new_count++ < wscount) \
359 { \
360 next_new_state->offset = (x); \
361 next_new_state->count = (y); \
362 next_new_state->ims = ims; \
363 next_new_state->data = (z); \
364 next_new_state++; \
365 DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
366 } \
367 else return PCRE_ERROR_DFA_WSSIZE
368
369 /* And now, here is the code */
370
371 static int
372 internal_dfa_exec(
373 dfa_match_data *md,
374 const uschar *this_start_code,
375 const uschar *current_subject,
376 int start_offset,
377 int *offsets,
378 int offsetcount,
379 int *workspace,
380 int wscount,
381 int ims,
382 int rlevel,
383 int recursing)
384 {
385 stateblock *active_states, *new_states, *temp_states;
386 stateblock *next_active_state, *next_new_state;
387
388 const uschar *ctypes, *lcc, *fcc;
389 const uschar *ptr;
390 const uschar *end_code, *first_op;
391
392 int active_count, new_count, match_count;
393
394 /* Some fields in the md block are frequently referenced, so we load them into
395 independent variables in the hope that this will perform better. */
396
397 const uschar *start_subject = md->start_subject;
398 const uschar *end_subject = md->end_subject;
399 const uschar *start_code = md->start_code;
400
401 #ifdef SUPPORT_UTF8
402 BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
403 #else
404 BOOL utf8 = FALSE;
405 #endif
406
407 rlevel++;
408 offsetcount &= (-2);
409
410 wscount -= 2;
411 wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
412 (2 * INTS_PER_STATEBLOCK);
413
414 DPRINTF(("\n%.*s---------------------\n"
415 "%.*sCall to internal_dfa_exec f=%d r=%d\n",
416 rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));
417
418 ctypes = md->tables + ctypes_offset;
419 lcc = md->tables + lcc_offset;
420 fcc = md->tables + fcc_offset;
421
422 match_count = PCRE_ERROR_NOMATCH; /* A negative number */
423
424 active_states = (stateblock *)(workspace + 2);
425 next_new_state = new_states = active_states + wscount;
426 new_count = 0;
427
428 first_op = this_start_code + 1 + LINK_SIZE +
429 ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
430
431 /* The first thing in any (sub) pattern is a bracket of some sort. Push all
432 the alternative states onto the list, and find out where the end is. This
433 makes is possible to use this function recursively, when we want to stop at a
434 matching internal ket rather than at the end.
435
436 If the first opcode in the first alternative is OP_REVERSE, we are dealing with
437 a backward assertion. In that case, we have to find out the maximum amount to
438 move back, and set up each alternative appropriately. */
439
440 if (*first_op == OP_REVERSE)
441 {
442 int max_back = 0;
443 int gone_back;
444
445 end_code = this_start_code;
446 do
447 {
448 int back = GET(end_code, 2+LINK_SIZE);
449 if (back > max_back) max_back = back;
450 end_code += GET(end_code, 1);
451 }
452 while (*end_code == OP_ALT);
453
454 /* If we can't go back the amount required for the longest lookbehind
455 pattern, go back as far as we can; some alternatives may still be viable. */
456
457 #ifdef SUPPORT_UTF8
458 /* In character mode we have to step back character by character */
459
460 if (utf8)
461 {
462 for (gone_back = 0; gone_back < max_back; gone_back++)
463 {
464 if (current_subject <= start_subject) break;
465 current_subject--;
466 while (current_subject > start_subject &&
467 (*current_subject & 0xc0) == 0x80)
468 current_subject--;
469 }
470 }
471 else
472 #endif
473
474 /* In byte-mode we can do this quickly. */
475
476 {
477 gone_back = (current_subject - max_back < start_subject)?
478 (int)(current_subject - start_subject) : max_back;
479 current_subject -= gone_back;
480 }
481
482 /* Save the earliest consulted character */
483
484 if (current_subject < md->start_used_ptr)
485 md->start_used_ptr = current_subject;
486
487 /* Now we can process the individual branches. */
488
489 end_code = this_start_code;
490 do
491 {
492 int back = GET(end_code, 2+LINK_SIZE);
493 if (back <= gone_back)
494 {
495 int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
496 ADD_NEW_DATA(-bstate, 0, gone_back - back);
497 }
498 end_code += GET(end_code, 1);
499 }
500 while (*end_code == OP_ALT);
501 }
502
503 /* This is the code for a "normal" subpattern (not a backward assertion). The
504 start of a whole pattern is always one of these. If we are at the top level,
505 we may be asked to restart matching from the same point that we reached for a
506 previous partial match. We still have to scan through the top-level branches to
507 find the end state. */
508
509 else
510 {
511 end_code = this_start_code;
512
513 /* Restarting */
514
515 if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
516 {
517 do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
518 new_count = workspace[1];
519 if (!workspace[0])
520 memcpy(new_states, active_states, new_count * sizeof(stateblock));
521 }
522
523 /* Not restarting */
524
525 else
526 {
527 int length = 1 + LINK_SIZE +
528 ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
529 do
530 {
531 ADD_NEW((int)(end_code - start_code + length), 0);
532 end_code += GET(end_code, 1);
533 length = 1 + LINK_SIZE;
534 }
535 while (*end_code == OP_ALT);
536 }
537 }
538
539 workspace[0] = 0; /* Bit indicating which vector is current */
540
541 DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));
542
543 /* Loop for scanning the subject */
544
545 ptr = current_subject;
546 for (;;)
547 {
548 int i, j;
549 int clen, dlen;
550 unsigned int c, d;
551 int forced_fail = 0;
552 BOOL could_continue = FALSE;
553
554 /* Make the new state list into the active state list and empty the
555 new state list. */
556
557 temp_states = active_states;
558 active_states = new_states;
559 new_states = temp_states;
560 active_count = new_count;
561 new_count = 0;
562
563 workspace[0] ^= 1; /* Remember for the restarting feature */
564 workspace[1] = active_count;
565
566 #ifdef PCRE_DEBUG
567 printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
568 pchars((uschar *)ptr, strlen((char *)ptr), stdout);
569 printf("\"\n");
570
571 printf("%.*sActive states: ", rlevel*2-2, SP);
572 for (i = 0; i < active_count; i++)
573 printf("%d/%d ", active_states[i].offset, active_states[i].count);
574 printf("\n");
575 #endif
576
577 /* Set the pointers for adding new states */
578
579 next_active_state = active_states + active_count;
580 next_new_state = new_states;
581
582 /* Load the current character from the subject outside the loop, as many
583 different states may want to look at it, and we assume that at least one
584 will. */
585
586 if (ptr < end_subject)
587 {
588 clen = 1; /* Number of bytes in the character */
589 #ifdef SUPPORT_UTF8
590 if (utf8) { GETCHARLEN(c, ptr, clen); } else
591 #endif /* SUPPORT_UTF8 */
592 c = *ptr;
593 }
594 else
595 {
596 clen = 0; /* This indicates the end of the subject */
597 c = NOTACHAR; /* This value should never actually be used */
598 }
599
600 /* Scan up the active states and act on each one. The result of an action
601 may be to add more states to the currently active list (e.g. on hitting a
602 parenthesis) or it may be to put states on the new list, for considering
603 when we move the character pointer on. */
604
605 for (i = 0; i < active_count; i++)
606 {
607 stateblock *current_state = active_states + i;
608 const uschar *code;
609 int state_offset = current_state->offset;
610 int count, codevalue, rrc;
611
612 #ifdef PCRE_DEBUG
613 printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
614 if (clen == 0) printf("EOL\n");
615 else if (c > 32 && c < 127) printf("'%c'\n", c);
616 else printf("0x%02x\n", c);
617 #endif
618
619 /* This variable is referred to implicity in the ADD_xxx macros. */
620
621 ims = current_state->ims;
622
623 /* A negative offset is a special case meaning "hold off going to this
624 (negated) state until the number of characters in the data field have
625 been skipped". */
626
627 if (state_offset < 0)
628 {
629 if (current_state->data > 0)
630 {
631 DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
632 ADD_NEW_DATA(state_offset, current_state->count,
633 current_state->data - 1);
634 continue;
635 }
636 else
637 {
638 current_state->offset = state_offset = -state_offset;
639 }
640 }
641
642 /* Check for a duplicate state with the same count, and skip if found.
643 See the note at the head of this module about the possibility of improving
644 performance here. */
645
646 for (j = 0; j < i; j++)
647 {
648 if (active_states[j].offset == state_offset &&
649 active_states[j].count == current_state->count)
650 {
651 DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
652 goto NEXT_ACTIVE_STATE;
653 }
654 }
655
656 /* The state offset is the offset to the opcode */
657
658 code = start_code + state_offset;
659 codevalue = *code;
660
661 /* If this opcode inspects a character, but we are at the end of the
662 subject, remember the fact for use when testing for a partial match. */
663
664 if (clen == 0 && poptable[codevalue] != 0)
665 could_continue = TRUE;
666
667 /* If this opcode is followed by an inline character, load it. It is
668 tempting to test for the presence of a subject character here, but that
669 is wrong, because sometimes zero repetitions of the subject are
670 permitted.
671
672 We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
673 argument that is not a data character - but is always one byte long. We
674 have to take special action to deal with \P, \p, \H, \h, \V, \v and \X in
675 this case. To keep the other cases fast, convert these ones to new opcodes.
676 */
677
678 if (coptable[codevalue] > 0)
679 {
680 dlen = 1;
681 #ifdef SUPPORT_UTF8
682 if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
683 #endif /* SUPPORT_UTF8 */
684 d = code[coptable[codevalue]];
685 if (codevalue >= OP_TYPESTAR)
686 {
687 switch(d)
688 {
689 case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
690 case OP_NOTPROP:
691 case OP_PROP: codevalue += OP_PROP_EXTRA; break;
692 case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
693 case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
694 case OP_NOT_HSPACE:
695 case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
696 case OP_NOT_VSPACE:
697 case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
698 default: break;
699 }
700 }
701 }
702 else
703 {
704 dlen = 0; /* Not strictly necessary, but compilers moan */
705 d = NOTACHAR; /* if these variables are not set. */
706 }
707
708
709 /* Now process the individual opcodes */
710
711 switch (codevalue)
712 {
713 /* ========================================================================== */
714 /* These cases are never obeyed. This is a fudge that causes a compile-
715 time error if the vectors coptable or poptable, which are indexed by
716 opcode, are not the correct length. It seems to be the only way to do
717 such a check at compile time, as the sizeof() operator does not work
718 in the C preprocessor. */
719
720 case OP_TABLE_LENGTH:
721 case OP_TABLE_LENGTH +
722 ((sizeof(coptable) == OP_TABLE_LENGTH) &&
723 (sizeof(poptable) == OP_TABLE_LENGTH)):
724 break;
725
726 /* ========================================================================== */
727 /* Reached a closing bracket. If not at the end of the pattern, carry
728 on with the next opcode. Otherwise, unless we have an empty string and
729 PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
730 start of the subject, save the match data, shifting up all previous
731 matches so we always have the longest first. */
732
733 case OP_KET:
734 case OP_KETRMIN:
735 case OP_KETRMAX:
736 if (code != end_code)
737 {
738 ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
739 if (codevalue != OP_KET)
740 {
741 ADD_ACTIVE(state_offset - GET(code, 1), 0);
742 }
743 }
744 else
745 {
746 if (ptr > current_subject ||
747 ((md->moptions & PCRE_NOTEMPTY) == 0 &&
748 ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
749 current_subject > start_subject + md->start_offset)))
750 {
751 if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
752 else if (match_count > 0 && ++match_count * 2 >= offsetcount)
753 match_count = 0;
754 count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
755 if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
756 if (offsetcount >= 2)
757 {
758 offsets[0] = (int)(current_subject - start_subject);
759 offsets[1] = (int)(ptr - start_subject);
760 DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
761 offsets[1] - offsets[0], current_subject));
762 }
763 if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
764 {
765 DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
766 "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
767 match_count, rlevel*2-2, SP));
768 return match_count;
769 }
770 }
771 }
772 break;
773
774 /* ========================================================================== */
775 /* These opcodes add to the current list of states without looking
776 at the current character. */
777
778 /*-----------------------------------------------------------------*/
779 case OP_ALT:
780 do { code += GET(code, 1); } while (*code == OP_ALT);
781 ADD_ACTIVE((int)(code - start_code), 0);
782 break;
783
784 /*-----------------------------------------------------------------*/
785 case OP_BRA:
786 case OP_SBRA:
787 do
788 {
789 ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
790 code += GET(code, 1);
791 }
792 while (*code == OP_ALT);
793 break;
794
795 /*-----------------------------------------------------------------*/
796 case OP_CBRA:
797 case OP_SCBRA:
798 ADD_ACTIVE((int)(code - start_code + 3 + LINK_SIZE), 0);
799 code += GET(code, 1);
800 while (*code == OP_ALT)
801 {
802 ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
803 code += GET(code, 1);
804 }
805 break;
806
807 /*-----------------------------------------------------------------*/
808 case OP_BRAZERO:
809 case OP_BRAMINZERO:
810 ADD_ACTIVE(state_offset + 1, 0);
811 code += 1 + GET(code, 2);
812 while (*code == OP_ALT) code += GET(code, 1);
813 ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
814 break;
815
816 /*-----------------------------------------------------------------*/
817 case OP_SKIPZERO:
818 code += 1 + GET(code, 2);
819 while (*code == OP_ALT) code += GET(code, 1);
820 ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
821 break;
822
823 /*-----------------------------------------------------------------*/
824 case OP_CIRC:
825 if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
826 ((ims & PCRE_MULTILINE) != 0 &&
827 ptr != end_subject &&
828 WAS_NEWLINE(ptr)))
829 { ADD_ACTIVE(state_offset + 1, 0); }
830 break;
831
832 /*-----------------------------------------------------------------*/
833 case OP_EOD:
834 if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }
835 break;
836
837 /*-----------------------------------------------------------------*/
838 case OP_OPT:
839 ims = code[1];
840 ADD_ACTIVE(state_offset + 2, 0);
841 break;
842
843 /*-----------------------------------------------------------------*/
844 case OP_SOD:
845 if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
846 break;
847
848 /*-----------------------------------------------------------------*/
849 case OP_SOM:
850 if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
851 break;
852
853
854 /* ========================================================================== */
855 /* These opcodes inspect the next subject character, and sometimes
856 the previous one as well, but do not have an argument. The variable
857 clen contains the length of the current character and is zero if we are
858 at the end of the subject. */
859
860 /*-----------------------------------------------------------------*/
861 case OP_ANY:
862 if (clen > 0 && !IS_NEWLINE(ptr))
863 { ADD_NEW(state_offset + 1, 0); }
864 break;
865
866 /*-----------------------------------------------------------------*/
867 case OP_ALLANY:
868 if (clen > 0)
869 { ADD_NEW(state_offset + 1, 0); }
870 break;
871
872 /*-----------------------------------------------------------------*/
873 case OP_EODN:
874 if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
875 { ADD_ACTIVE(state_offset + 1, 0); }
876 break;
877
878 /*-----------------------------------------------------------------*/
879 case OP_DOLL:
880 if ((md->moptions & PCRE_NOTEOL) == 0)
881 {
882 if (clen == 0 ||
883 ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
884 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
885 ))
886 { ADD_ACTIVE(state_offset + 1, 0); }
887 }
888 else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
889 { ADD_ACTIVE(state_offset + 1, 0); }
890 break;
891
892 /*-----------------------------------------------------------------*/
893
894 case OP_DIGIT:
895 case OP_WHITESPACE:
896 case OP_WORDCHAR:
897 if (clen > 0 && c < 256 &&
898 ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
899 { ADD_NEW(state_offset + 1, 0); }
900 break;
901
902 /*-----------------------------------------------------------------*/
903 case OP_NOT_DIGIT:
904 case OP_NOT_WHITESPACE:
905 case OP_NOT_WORDCHAR:
906 if (clen > 0 && (c >= 256 ||
907 ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
908 { ADD_NEW(state_offset + 1, 0); }
909 break;
910
911 /*-----------------------------------------------------------------*/
912 case OP_WORD_BOUNDARY:
913 case OP_NOT_WORD_BOUNDARY:
914 {
915 int left_word, right_word;
916
917 if (ptr > start_subject)
918 {
919 const uschar *temp = ptr - 1;
920 if (temp < md->start_used_ptr) md->start_used_ptr = temp;
921 #ifdef SUPPORT_UTF8
922 if (utf8) BACKCHAR(temp);
923 #endif
924 GETCHARTEST(d, temp);
925 #ifdef SUPPORT_UCP
926 if ((md->poptions & PCRE_UCP) != 0)
927 {
928 if (d == '_') left_word = TRUE; else
929 {
930 int cat = UCD_CATEGORY(d);
931 left_word = (cat == ucp_L || cat == ucp_N);
932 }
933 }
934 else
935 #endif
936 left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
937 }
938 else left_word = FALSE;
939
940 if (clen > 0)
941 {
942 #ifdef SUPPORT_UCP
943 if ((md->poptions & PCRE_UCP) != 0)
944 {
945 if (c == '_') right_word = TRUE; else
946 {
947 int cat = UCD_CATEGORY(c);
948 right_word = (cat == ucp_L || cat == ucp_N);
949 }
950 }
951 else
952 #endif
953 right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
954 }
955 else right_word = FALSE;
956
957 if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
958 { ADD_ACTIVE(state_offset + 1, 0); }
959 }
960 break;
961
962
963 /*-----------------------------------------------------------------*/
964 /* Check the next character by Unicode property. We will get here only
965 if the support is in the binary; otherwise a compile-time error occurs.
966 */
967
968 #ifdef SUPPORT_UCP
969 case OP_PROP:
970 case OP_NOTPROP:
971 if (clen > 0)
972 {
973 BOOL OK;
974 const ucd_record * prop = GET_UCD(c);
975 switch(code[1])
976 {
977 case PT_ANY:
978 OK = TRUE;
979 break;
980
981 case PT_LAMP:
982 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
983 prop->chartype == ucp_Lt;
984 break;
985
986 case PT_GC:
987 OK = _pcre_ucp_gentype[prop->chartype] == code[2];
988 break;
989
990 case PT_PC:
991 OK = prop->chartype == code[2];
992 break;
993
994 case PT_SC:
995 OK = prop->script == code[2];
996 break;
997
998 /* These are specials for combination cases. */
999
1000 case PT_ALNUM:
1001 OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
1002 _pcre_ucp_gentype[prop->chartype] == ucp_N;
1003 break;
1004
1005 case PT_SPACE: /* Perl space */
1006 OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
1007 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1008 break;
1009
1010 case PT_PXSPACE: /* POSIX space */
1011 OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
1012 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1013 c == CHAR_FF || c == CHAR_CR;
1014 break;
1015
1016 case PT_WORD:
1017 OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
1018 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
1019 c == CHAR_UNDERSCORE;
1020 break;
1021
1022 /* Should never occur, but keep compilers from grumbling. */
1023
1024 default:
1025 OK = codevalue != OP_PROP;
1026 break;
1027 }
1028
1029 if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
1030 }
1031 break;
1032 #endif
1033
1034
1035
1036 /* ========================================================================== */
1037 /* These opcodes likewise inspect the subject character, but have an
1038 argument that is not a data character. It is one of these opcodes:
1039 OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
1040 OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
1041
1042 case OP_TYPEPLUS:
1043 case OP_TYPEMINPLUS:
1044 case OP_TYPEPOSPLUS:
1045 count = current_state->count; /* Already matched */
1046 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1047 if (clen > 0)
1048 {
1049 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1050 (c < 256 &&
1051 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1052 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1053 {
1054 if (count > 0 && codevalue == OP_TYPEPOSPLUS)
1055 {
1056 active_count--; /* Remove non-match possibility */
1057 next_active_state--;
1058 }
1059 count++;
1060 ADD_NEW(state_offset, count);
1061 }
1062 }
1063 break;
1064
1065 /*-----------------------------------------------------------------*/
1066 case OP_TYPEQUERY:
1067 case OP_TYPEMINQUERY:
1068 case OP_TYPEPOSQUERY:
1069 ADD_ACTIVE(state_offset + 2, 0);
1070 if (clen > 0)
1071 {
1072 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1073 (c < 256 &&
1074 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1075 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1076 {
1077 if (codevalue == OP_TYPEPOSQUERY)
1078 {
1079 active_count--; /* Remove non-match possibility */
1080 next_active_state--;
1081 }
1082 ADD_NEW(state_offset + 2, 0);
1083 }
1084 }
1085 break;
1086
1087 /*-----------------------------------------------------------------*/
1088 case OP_TYPESTAR:
1089 case OP_TYPEMINSTAR:
1090 case OP_TYPEPOSSTAR:
1091 ADD_ACTIVE(state_offset + 2, 0);
1092 if (clen > 0)
1093 {
1094 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1095 (c < 256 &&
1096 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1097 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1098 {
1099 if (codevalue == OP_TYPEPOSSTAR)
1100 {
1101 active_count--; /* Remove non-match possibility */
1102 next_active_state--;
1103 }
1104 ADD_NEW(state_offset, 0);
1105 }
1106 }
1107 break;
1108
1109 /*-----------------------------------------------------------------*/
1110 case OP_TYPEEXACT:
1111 count = current_state->count; /* Number already matched */
1112 if (clen > 0)
1113 {
1114 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1115 (c < 256 &&
1116 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1117 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1118 {
1119 if (++count >= GET2(code, 1))
1120 { ADD_NEW(state_offset + 4, 0); }
1121 else
1122 { ADD_NEW(state_offset, count); }
1123 }
1124 }
1125 break;
1126
1127 /*-----------------------------------------------------------------*/
1128 case OP_TYPEUPTO:
1129 case OP_TYPEMINUPTO:
1130 case OP_TYPEPOSUPTO:
1131 ADD_ACTIVE(state_offset + 4, 0);
1132 count = current_state->count; /* Number already matched */
1133 if (clen > 0)
1134 {
1135 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1136 (c < 256 &&
1137 (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1138 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1139 {
1140 if (codevalue == OP_TYPEPOSUPTO)
1141 {
1142 active_count--; /* Remove non-match possibility */
1143 next_active_state--;
1144 }
1145 if (++count >= GET2(code, 1))
1146 { ADD_NEW(state_offset + 4, 0); }
1147 else
1148 { ADD_NEW(state_offset, count); }
1149 }
1150 }
1151 break;
1152
1153 /* ========================================================================== */
1154 /* These are virtual opcodes that are used when something like
1155 OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
1156 argument. It keeps the code above fast for the other cases. The argument
1157 is in the d variable. */
1158
1159 #ifdef SUPPORT_UCP
1160 case OP_PROP_EXTRA + OP_TYPEPLUS:
1161 case OP_PROP_EXTRA + OP_TYPEMINPLUS:
1162 case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
1163 count = current_state->count; /* Already matched */
1164 if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
1165 if (clen > 0)
1166 {
1167 BOOL OK;
1168 const ucd_record * prop = GET_UCD(c);
1169 switch(code[2])
1170 {
1171 case PT_ANY:
1172 OK = TRUE;
1173 break;
1174
1175 case PT_LAMP:
1176 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1177 prop->chartype == ucp_Lt;
1178 break;
1179
1180 case PT_GC:
1181 OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1182 break;
1183
1184 case PT_PC:
1185 OK = prop->chartype == code[3];
1186 break;
1187
1188 case PT_SC:
1189 OK = prop->script == code[3];
1190 break;
1191
1192 /* These are specials for combination cases. */
1193
1194 case PT_ALNUM:
1195 OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
1196 _pcre_ucp_gentype[prop->chartype] == ucp_N;
1197 break;
1198
1199 case PT_SPACE: /* Perl space */
1200 OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
1201 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1202 break;
1203
1204 case PT_PXSPACE: /* POSIX space */
1205 OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
1206 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1207 c == CHAR_FF || c == CHAR_CR;
1208 break;
1209
1210 case PT_WORD:
1211 OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
1212 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
1213 c == CHAR_UNDERSCORE;
1214 break;
1215
1216 /* Should never occur, but keep compilers from grumbling. */
1217
1218 default:
1219 OK = codevalue != OP_PROP;
1220 break;
1221 }
1222
1223 if (OK == (d == OP_PROP))
1224 {
1225 if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1226 {
1227 active_count--; /* Remove non-match possibility */
1228 next_active_state--;
1229 }
1230 count++;
1231 ADD_NEW(state_offset, count);
1232 }
1233 }
1234 break;
1235
1236 /*-----------------------------------------------------------------*/
1237 case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1238 case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1239 case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1240 count = current_state->count; /* Already matched */
1241 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1242 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1243 {
1244 const uschar *nptr = ptr + clen;
1245 int ncount = 0;
1246 if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1247 {
1248 active_count--; /* Remove non-match possibility */
1249 next_active_state--;
1250 }
1251 while (nptr < end_subject)
1252 {
1253 int nd;
1254 int ndlen = 1;
1255 GETCHARLEN(nd, nptr, ndlen);
1256 if (UCD_CATEGORY(nd) != ucp_M) break;
1257 ncount++;
1258 nptr += ndlen;
1259 }
1260 count++;
1261 ADD_NEW_DATA(-state_offset, count, ncount);
1262 }
1263 break;
1264 #endif
1265
1266 /*-----------------------------------------------------------------*/
1267 case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1268 case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1269 case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1270 count = current_state->count; /* Already matched */
1271 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1272 if (clen > 0)
1273 {
1274 int ncount = 0;
1275 switch (c)
1276 {
1277 case 0x000b:
1278 case 0x000c:
1279 case 0x0085:
1280 case 0x2028:
1281 case 0x2029:
1282 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1283 goto ANYNL01;
1284
1285 case 0x000d:
1286 if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1287 /* Fall through */
1288
1289 ANYNL01:
1290 case 0x000a:
1291 if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1292 {
1293 active_count--; /* Remove non-match possibility */
1294 next_active_state--;
1295 }
1296 count++;
1297 ADD_NEW_DATA(-state_offset, count, ncount);
1298 break;
1299
1300 default:
1301 break;
1302 }
1303 }
1304 break;
1305
1306 /*-----------------------------------------------------------------*/
1307 case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1308 case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1309 case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1310 count = current_state->count; /* Already matched */
1311 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1312 if (clen > 0)
1313 {
1314 BOOL OK;
1315 switch (c)
1316 {
1317 case 0x000a:
1318 case 0x000b:
1319 case 0x000c:
1320 case 0x000d:
1321 case 0x0085:
1322 case 0x2028:
1323 case 0x2029:
1324 OK = TRUE;
1325 break;
1326
1327 default:
1328 OK = FALSE;
1329 break;
1330 }
1331
1332 if (OK == (d == OP_VSPACE))
1333 {
1334 if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1335 {
1336 active_count--; /* Remove non-match possibility */
1337 next_active_state--;
1338 }
1339 count++;
1340 ADD_NEW_DATA(-state_offset, count, 0);
1341 }
1342 }
1343 break;
1344
1345 /*-----------------------------------------------------------------*/
1346 case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1347 case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1348 case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1349 count = current_state->count; /* Already matched */
1350 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1351 if (clen > 0)
1352 {
1353 BOOL OK;
1354 switch (c)
1355 {
1356 case 0x09: /* HT */
1357 case 0x20: /* SPACE */
1358 case 0xa0: /* NBSP */
1359 case 0x1680: /* OGHAM SPACE MARK */
1360 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1361 case 0x2000: /* EN QUAD */
1362 case 0x2001: /* EM QUAD */
1363 case 0x2002: /* EN SPACE */
1364 case 0x2003: /* EM SPACE */
1365 case 0x2004: /* THREE-PER-EM SPACE */
1366 case 0x2005: /* FOUR-PER-EM SPACE */
1367 case 0x2006: /* SIX-PER-EM SPACE */
1368 case 0x2007: /* FIGURE SPACE */
1369 case 0x2008: /* PUNCTUATION SPACE */
1370 case 0x2009: /* THIN SPACE */
1371 case 0x200A: /* HAIR SPACE */
1372 case 0x202f: /* NARROW NO-BREAK SPACE */
1373 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1374 case 0x3000: /* IDEOGRAPHIC SPACE */
1375 OK = TRUE;
1376 break;
1377
1378 default:
1379 OK = FALSE;
1380 break;
1381 }
1382
1383 if (OK == (d == OP_HSPACE))
1384 {
1385 if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1386 {
1387 active_count--; /* Remove non-match possibility */
1388 next_active_state--;
1389 }
1390 count++;
1391 ADD_NEW_DATA(-state_offset, count, 0);
1392 }
1393 }
1394 break;
1395
1396 /*-----------------------------------------------------------------*/
1397 #ifdef SUPPORT_UCP
1398 case OP_PROP_EXTRA + OP_TYPEQUERY:
1399 case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1400 case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1401 count = 4;
1402 goto QS1;
1403
1404 case OP_PROP_EXTRA + OP_TYPESTAR:
1405 case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1406 case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1407 count = 0;
1408
1409 QS1:
1410
1411 ADD_ACTIVE(state_offset + 4, 0);
1412 if (clen > 0)
1413 {
1414 BOOL OK;
1415 const ucd_record * prop = GET_UCD(c);
1416 switch(code[2])
1417 {
1418 case PT_ANY:
1419 OK = TRUE;
1420 break;
1421
1422 case PT_LAMP:
1423 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1424 prop->chartype == ucp_Lt;
1425 break;
1426
1427 case PT_GC:
1428 OK = _pcre_ucp_gentype[prop->chartype] == code[3];
1429 break;
1430
1431 case PT_PC:
1432 OK = prop->chartype == code[3];
1433 break;
1434
1435 case PT_SC:
1436 OK = prop->script == code[3];
1437 break;
1438
1439 /* These are specials for combination cases. */
1440
1441 case PT_ALNUM:
1442 OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
1443 _pcre_ucp_gentype[prop->chartype] == ucp_N;
1444 break;
1445
1446 case PT_SPACE: /* Perl space */
1447 OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
1448 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1449 break;
1450
1451 case PT_PXSPACE: /* POSIX space */
1452 OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
1453 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1454 c == CHAR_FF || c == CHAR_CR;
1455 break;
1456
1457 case PT_WORD:
1458 OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
1459 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
1460 c == CHAR_UNDERSCORE;
1461 break;
1462
1463 /* Should never occur, but keep compilers from grumbling. */
1464
1465 default:
1466 OK = codevalue != OP_PROP;
1467 break;
1468 }
1469
1470 if (OK == (d == OP_PROP))
1471 {
1472 if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1473 codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1474 {
1475 active_count--; /* Remove non-match possibility */
1476 next_active_state--;
1477 }
1478 ADD_NEW(state_offset + count, 0);
1479 }
1480 }
1481 break;
1482
1483 /*-----------------------------------------------------------------*/
1484 case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1485 case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1486 case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1487 count = 2;
1488 goto QS2;
1489
1490 case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1491 case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1492 case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1493 count = 0;
1494
1495 QS2:
1496
1497 ADD_ACTIVE(state_offset + 2, 0);
1498 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1499 {
1500 const uschar *nptr = ptr + clen;
1501 int ncount = 0;
1502 if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1503 codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1504 {
1505 active_count--; /* Remove non-match possibility */
1506 next_active_state--;
1507 }
1508 while (nptr < end_subject)
1509 {
1510 int nd;
1511 int ndlen = 1;
1512 GETCHARLEN(nd, nptr, ndlen);
1513 if (UCD_CATEGORY(nd) != ucp_M) break;
1514 ncount++;
1515 nptr += ndlen;
1516 }
1517 ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1518 }
1519 break;
1520 #endif
1521
1522 /*-----------------------------------------------------------------*/
1523 case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1524 case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1525 case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1526 count = 2;
1527 goto QS3;
1528
1529 case OP_ANYNL_EXTRA + OP_TYPESTAR:
1530 case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1531 case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1532 count = 0;
1533
1534 QS3:
1535 ADD_ACTIVE(state_offset + 2, 0);
1536 if (clen > 0)
1537 {
1538 int ncount = 0;
1539 switch (c)
1540 {
1541 case 0x000b:
1542 case 0x000c:
1543 case 0x0085:
1544 case 0x2028:
1545 case 0x2029:
1546 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1547 goto ANYNL02;
1548
1549 case 0x000d:
1550 if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1551 /* Fall through */
1552
1553 ANYNL02:
1554 case 0x000a:
1555 if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1556 codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1557 {
1558 active_count--; /* Remove non-match possibility */
1559 next_active_state--;
1560 }
1561 ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1562 break;
1563
1564 default:
1565 break;
1566 }
1567 }
1568 break;
1569
1570 /*-----------------------------------------------------------------*/
1571 case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1572 case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1573 case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1574 count = 2;
1575 goto QS4;
1576
1577 case OP_VSPACE_EXTRA + OP_TYPESTAR:
1578 case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1579 case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1580 count = 0;
1581
1582 QS4:
1583 ADD_ACTIVE(state_offset + 2, 0);
1584 if (clen > 0)
1585 {
1586 BOOL OK;
1587 switch (c)
1588 {
1589 case 0x000a:
1590 case 0x000b:
1591 case 0x000c:
1592 case 0x000d:
1593 case 0x0085:
1594 case 0x2028:
1595 case 0x2029:
1596 OK = TRUE;
1597 break;
1598
1599 default:
1600 OK = FALSE;
1601 break;
1602 }
1603 if (OK == (d == OP_VSPACE))
1604 {
1605 if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1606 codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1607 {
1608 active_count--; /* Remove non-match possibility */
1609 next_active_state--;
1610 }
1611 ADD_NEW_DATA(-(state_offset + count), 0, 0);
1612 }
1613 }
1614 break;
1615
1616 /*-----------------------------------------------------------------*/
1617 case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1618 case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1619 case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1620 count = 2;
1621 goto QS5;
1622
1623 case OP_HSPACE_EXTRA + OP_TYPESTAR:
1624 case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1625 case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1626 count = 0;
1627
1628 QS5:
1629 ADD_ACTIVE(state_offset + 2, 0);
1630 if (clen > 0)
1631 {
1632 BOOL OK;
1633 switch (c)
1634 {
1635 case 0x09: /* HT */
1636 case 0x20: /* SPACE */
1637 case 0xa0: /* NBSP */
1638 case 0x1680: /* OGHAM SPACE MARK */
1639 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1640 case 0x2000: /* EN QUAD */
1641 case 0x2001: /* EM QUAD */
1642 case 0x2002: /* EN SPACE */
1643 case 0x2003: /* EM SPACE */
1644 case 0x2004: /* THREE-PER-EM SPACE */
1645 case 0x2005: /* FOUR-PER-EM SPACE */
1646 case 0x2006: /* SIX-PER-EM SPACE */
1647 case 0x2007: /* FIGURE SPACE */
1648 case 0x2008: /* PUNCTUATION SPACE */
1649 case 0x2009: /* THIN SPACE */
1650 case 0x200A: /* HAIR SPACE */
1651 case 0x202f: /* NARROW NO-BREAK SPACE */
1652 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1653 case 0x3000: /* IDEOGRAPHIC SPACE */
1654 OK = TRUE;
1655 break;
1656
1657 default:
1658 OK = FALSE;
1659 break;
1660 }
1661
1662 if (OK == (d == OP_HSPACE))
1663 {
1664 if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1665 codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1666 {
1667 active_count--; /* Remove non-match possibility */
1668 next_active_state--;
1669 }
1670 ADD_NEW_DATA(-(state_offset + count), 0, 0);
1671 }
1672 }
1673 break;
1674
1675 /*-----------------------------------------------------------------*/
1676 #ifdef SUPPORT_UCP
1677 case OP_PROP_EXTRA + OP_TYPEEXACT:
1678 case OP_PROP_EXTRA + OP_TYPEUPTO:
1679 case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1680 case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1681 if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1682 { ADD_ACTIVE(state_offset + 6, 0); }
1683 count = current_state->count; /* Number already matched */
1684 if (clen > 0)
1685 {
1686 BOOL OK;
1687 const ucd_record * prop = GET_UCD(c);
1688 switch(code[4])
1689 {
1690 case PT_ANY:
1691 OK = TRUE;
1692 break;
1693
1694 case PT_LAMP:
1695 OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1696 prop->chartype == ucp_Lt;
1697 break;
1698
1699 case PT_GC:
1700 OK = _pcre_ucp_gentype[prop->chartype] == code[5];
1701 break;
1702
1703 case PT_PC:
1704 OK = prop->chartype == code[5];
1705 break;
1706
1707 case PT_SC:
1708 OK = prop->script == code[5];
1709 break;
1710
1711 /* These are specials for combination cases. */
1712
1713 case PT_ALNUM:
1714 OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
1715 _pcre_ucp_gentype[prop->chartype] == ucp_N;
1716 break;
1717
1718 case PT_SPACE: /* Perl space */
1719 OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
1720 c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1721 break;
1722
1723 case PT_PXSPACE: /* POSIX space */
1724 OK = _pcre_ucp_gentype[prop->chartype] == ucp_Z ||
1725 c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1726 c == CHAR_FF || c == CHAR_CR;
1727 break;
1728
1729 case PT_WORD:
1730 OK = _pcre_ucp_gentype[prop->chartype] == ucp_L ||
1731 _pcre_ucp_gentype[prop->chartype] == ucp_N ||
1732 c == CHAR_UNDERSCORE;
1733 break;
1734
1735 /* Should never occur, but keep compilers from grumbling. */
1736
1737 default:
1738 OK = codevalue != OP_PROP;
1739 break;
1740 }
1741
1742 if (OK == (d == OP_PROP))
1743 {
1744 if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1745 {
1746 active_count--; /* Remove non-match possibility */
1747 next_active_state--;
1748 }
1749 if (++count >= GET2(code, 1))
1750 { ADD_NEW(state_offset + 6, 0); }
1751 else
1752 { ADD_NEW(state_offset, count); }
1753 }
1754 }
1755 break;
1756
1757 /*-----------------------------------------------------------------*/
1758 case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1759 case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1760 case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1761 case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1762 if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1763 { ADD_ACTIVE(state_offset + 4, 0); }
1764 count = current_state->count; /* Number already matched */
1765 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1766 {
1767 const uschar *nptr = ptr + clen;
1768 int ncount = 0;
1769 if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1770 {
1771 active_count--; /* Remove non-match possibility */
1772 next_active_state--;
1773 }
1774 while (nptr < end_subject)
1775 {
1776 int nd;
1777 int ndlen = 1;
1778 GETCHARLEN(nd, nptr, ndlen);
1779 if (UCD_CATEGORY(nd) != ucp_M) break;
1780 ncount++;
1781 nptr += ndlen;
1782 }
1783 if (++count >= GET2(code, 1))
1784 { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1785 else
1786 { ADD_NEW_DATA(-state_offset, count, ncount); }
1787 }
1788 break;
1789 #endif
1790
1791 /*-----------------------------------------------------------------*/
1792 case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1793 case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1794 case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1795 case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1796 if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1797 { ADD_ACTIVE(state_offset + 4, 0); }
1798 count = current_state->count; /* Number already matched */
1799 if (clen > 0)
1800 {
1801 int ncount = 0;
1802 switch (c)
1803 {
1804 case 0x000b:
1805 case 0x000c:
1806 case 0x0085:
1807 case 0x2028:
1808 case 0x2029:
1809 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1810 goto ANYNL03;
1811
1812 case 0x000d:
1813 if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1814 /* Fall through */
1815
1816 ANYNL03:
1817 case 0x000a:
1818 if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1819 {
1820 active_count--; /* Remove non-match possibility */
1821 next_active_state--;
1822 }
1823 if (++count >= GET2(code, 1))
1824 { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1825 else
1826 { ADD_NEW_DATA(-state_offset, count, ncount); }
1827 break;
1828
1829 default:
1830 break;
1831 }
1832 }
1833 break;
1834
1835 /*-----------------------------------------------------------------*/
1836 case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1837 case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1838 case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1839 case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1840 if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1841 { ADD_ACTIVE(state_offset + 4, 0); }
1842 count = current_state->count; /* Number already matched */
1843 if (clen > 0)
1844 {
1845 BOOL OK;
1846 switch (c)
1847 {
1848 case 0x000a:
1849 case 0x000b:
1850 case 0x000c:
1851 case 0x000d:
1852 case 0x0085:
1853 case 0x2028:
1854 case 0x2029:
1855 OK = TRUE;
1856 break;
1857
1858 default:
1859 OK = FALSE;
1860 }
1861
1862 if (OK == (d == OP_VSPACE))
1863 {
1864 if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1865 {
1866 active_count--; /* Remove non-match possibility */
1867 next_active_state--;
1868 }
1869 if (++count >= GET2(code, 1))
1870 { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1871 else
1872 { ADD_NEW_DATA(-state_offset, count, 0); }
1873 }
1874 }
1875 break;
1876
1877 /*-----------------------------------------------------------------*/
1878 case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1879 case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1880 case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1881 case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1882 if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1883 { ADD_ACTIVE(state_offset + 4, 0); }
1884 count = current_state->count; /* Number already matched */
1885 if (clen > 0)
1886 {
1887 BOOL OK;
1888 switch (c)
1889 {
1890 case 0x09: /* HT */
1891 case 0x20: /* SPACE */
1892 case 0xa0: /* NBSP */
1893 case 0x1680: /* OGHAM SPACE MARK */
1894 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
1895 case 0x2000: /* EN QUAD */
1896 case 0x2001: /* EM QUAD */
1897 case 0x2002: /* EN SPACE */
1898 case 0x2003: /* EM SPACE */
1899 case 0x2004: /* THREE-PER-EM SPACE */
1900 case 0x2005: /* FOUR-PER-EM SPACE */
1901 case 0x2006: /* SIX-PER-EM SPACE */
1902 case 0x2007: /* FIGURE SPACE */
1903 case 0x2008: /* PUNCTUATION SPACE */
1904 case 0x2009: /* THIN SPACE */
1905 case 0x200A: /* HAIR SPACE */
1906 case 0x202f: /* NARROW NO-BREAK SPACE */
1907 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
1908 case 0x3000: /* IDEOGRAPHIC SPACE */
1909 OK = TRUE;
1910 break;
1911
1912 default:
1913 OK = FALSE;
1914 break;
1915 }
1916
1917 if (OK == (d == OP_HSPACE))
1918 {
1919 if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1920 {
1921 active_count--; /* Remove non-match possibility */
1922 next_active_state--;
1923 }
1924 if (++count >= GET2(code, 1))
1925 { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1926 else
1927 { ADD_NEW_DATA(-state_offset, count, 0); }
1928 }
1929 }
1930 break;
1931
1932 /* ========================================================================== */
1933 /* These opcodes are followed by a character that is usually compared
1934 to the current subject character; it is loaded into d. We still get
1935 here even if there is no subject character, because in some cases zero
1936 repetitions are permitted. */
1937
1938 /*-----------------------------------------------------------------*/
1939 case OP_CHAR:
1940 if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
1941 break;
1942
1943 /*-----------------------------------------------------------------*/
1944 case OP_CHARNC:
1945 if (clen == 0) break;
1946
1947 #ifdef SUPPORT_UTF8
1948 if (utf8)
1949 {
1950 if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1951 {
1952 unsigned int othercase;
1953 if (c < 128) othercase = fcc[c]; else
1954
1955 /* If we have Unicode property support, we can use it to test the
1956 other case of the character. */
1957
1958 #ifdef SUPPORT_UCP
1959 othercase = UCD_OTHERCASE(c);
1960 #else
1961 othercase = NOTACHAR;
1962 #endif
1963
1964 if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
1965 }
1966 }
1967 else
1968 #endif /* SUPPORT_UTF8 */
1969
1970 /* Non-UTF-8 mode */
1971 {
1972 if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }
1973 }
1974 break;
1975
1976
1977 #ifdef SUPPORT_UCP
1978 /*-----------------------------------------------------------------*/
1979 /* This is a tricky one because it can match more than one character.
1980 Find out how many characters to skip, and then set up a negative state
1981 to wait for them to pass before continuing. */
1982
1983 case OP_EXTUNI:
1984 if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1985 {
1986 const uschar *nptr = ptr + clen;
1987 int ncount = 0;
1988 while (nptr < end_subject)
1989 {
1990 int nclen = 1;
1991 GETCHARLEN(c, nptr, nclen);
1992 if (UCD_CATEGORY(c) != ucp_M) break;
1993 ncount++;
1994 nptr += nclen;
1995 }
1996 ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
1997 }
1998 break;
1999 #endif
2000
2001 /*-----------------------------------------------------------------*/
2002 /* This is a tricky like EXTUNI because it too can match more than one
2003 character (when CR is followed by LF). In this case, set up a negative
2004 state to wait for one character to pass before continuing. */
2005
2006 case OP_ANYNL:
2007 if (clen > 0) switch(c)
2008 {
2009 case 0x000b:
2010 case 0x000c:
2011 case 0x0085:
2012 case 0x2028:
2013 case 0x2029:
2014 if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2015
2016 case 0x000a:
2017 ADD_NEW(state_offset + 1, 0);
2018 break;
2019
2020 case 0x000d:
2021 if (ptr + 1 < end_subject && ptr[1] == 0x0a)
2022 {
2023 ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2024 }
2025 else
2026 {
2027 ADD_NEW(state_offset + 1, 0);
2028 }
2029 break;
2030 }
2031 break;
2032
2033 /*-----------------------------------------------------------------*/
2034 case OP_NOT_VSPACE:
2035 if (clen > 0) switch(c)
2036 {
2037 case 0x000a:
2038 case 0x000b:
2039 case 0x000c:
2040 case 0x000d:
2041 case 0x0085:
2042 case 0x2028:
2043 case 0x2029:
2044 break;
2045
2046 default:
2047 ADD_NEW(state_offset + 1, 0);
2048 break;
2049 }
2050 break;
2051
2052 /*-----------------------------------------------------------------*/
2053 case OP_VSPACE:
2054 if (clen > 0) switch(c)
2055 {
2056 case 0x000a:
2057 case 0x000b:
2058 case 0x000c:
2059 case 0x000d:
2060 case 0x0085:
2061 case 0x2028:
2062 case 0x2029:
2063 ADD_NEW(state_offset + 1, 0);
2064 break;
2065
2066 default: break;
2067 }
2068 break;
2069
2070 /*-----------------------------------------------------------------*/
2071 case OP_NOT_HSPACE:
2072 if (clen > 0) switch(c)
2073 {
2074 case 0x09: /* HT */
2075 case 0x20: /* SPACE */
2076 case 0xa0: /* NBSP */
2077 case 0x1680: /* OGHAM SPACE MARK */
2078 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2079 case 0x2000: /* EN QUAD */
2080 case 0x2001: /* EM QUAD */
2081 case 0x2002: /* EN SPACE */
2082 case 0x2003: /* EM SPACE */
2083 case 0x2004: /* THREE-PER-EM SPACE */
2084 case 0x2005: /* FOUR-PER-EM SPACE */
2085 case 0x2006: /* SIX-PER-EM SPACE */
2086 case 0x2007: /* FIGURE SPACE */
2087 case 0x2008: /* PUNCTUATION SPACE */
2088 case 0x2009: /* THIN SPACE */
2089 case 0x200A: /* HAIR SPACE */
2090 case 0x202f: /* NARROW NO-BREAK SPACE */
2091 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2092 case 0x3000: /* IDEOGRAPHIC SPACE */
2093 break;
2094
2095 default:
2096 ADD_NEW(state_offset + 1, 0);
2097 break;
2098 }
2099 break;
2100
2101 /*-----------------------------------------------------------------*/
2102 case OP_HSPACE:
2103 if (clen > 0) switch(c)
2104 {
2105 case 0x09: /* HT */
2106 case 0x20: /* SPACE */
2107 case 0xa0: /* NBSP */
2108 case 0x1680: /* OGHAM SPACE MARK */
2109 case 0x180e: /* MONGOLIAN VOWEL SEPARATOR */
2110 case 0x2000: /* EN QUAD */
2111 case 0x2001: /* EM QUAD */
2112 case 0x2002: /* EN SPACE */
2113 case 0x2003: /* EM SPACE */
2114 case 0x2004: /* THREE-PER-EM SPACE */
2115 case 0x2005: /* FOUR-PER-EM SPACE */
2116 case 0x2006: /* SIX-PER-EM SPACE */
2117 case 0x2007: /* FIGURE SPACE */
2118 case 0x2008: /* PUNCTUATION SPACE */
2119 case 0x2009: /* THIN SPACE */
2120 case 0x200A: /* HAIR SPACE */
2121 case 0x202f: /* NARROW NO-BREAK SPACE */
2122 case 0x205f: /* MEDIUM MATHEMATICAL SPACE */
2123 case 0x3000: /* IDEOGRAPHIC SPACE */
2124 ADD_NEW(state_offset + 1, 0);
2125 break;
2126 }
2127 break;
2128
2129 /*-----------------------------------------------------------------*/
2130 /* Match a negated single character. This is only used for one-byte
2131 characters, that is, we know that d < 256. The character we are
2132 checking (c) can be multibyte. */
2133
2134 case OP_NOT:
2135 if (clen > 0)
2136 {
2137 unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
2138 if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
2139 }
2140 break;
2141
2142 /*-----------------------------------------------------------------*/
2143 case OP_PLUS:
2144 case OP_MINPLUS:
2145 case OP_POSPLUS:
2146 case OP_NOTPLUS:
2147 case OP_NOTMINPLUS:
2148 case OP_NOTPOSPLUS:
2149 count = current_state->count; /* Already matched */
2150 if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2151 if (clen > 0)
2152 {
2153 unsigned int otherd = NOTACHAR;
2154 if ((ims & PCRE_CASELESS) != 0)
2155 {
2156 #ifdef SUPPORT_UTF8
2157 if (utf8 && d >= 128)
2158 {
2159 #ifdef SUPPORT_UCP
2160 otherd = UCD_OTHERCASE(d);
2161 #endif /* SUPPORT_UCP */
2162 }
2163 else
2164 #endif /* SUPPORT_UTF8 */
2165 otherd = fcc[d];
2166 }
2167 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2168 {
2169 if (count > 0 &&
2170 (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
2171 {
2172 active_count--; /* Remove non-match possibility */
2173 next_active_state--;
2174 }
2175 count++;
2176 ADD_NEW(state_offset, count);
2177 }
2178 }
2179 break;
2180
2181 /*-----------------------------------------------------------------*/
2182 case OP_QUERY:
2183 case OP_MINQUERY:
2184 case OP_POSQUERY:
2185 case OP_NOTQUERY:
2186 case OP_NOTMINQUERY:
2187 case OP_NOTPOSQUERY:
2188 ADD_ACTIVE(state_offset + dlen + 1, 0);
2189 if (clen > 0)
2190 {
2191 unsigned int otherd = NOTACHAR;
2192 if ((ims & PCRE_CASELESS) != 0)
2193 {
2194 #ifdef SUPPORT_UTF8
2195 if (utf8 && d >= 128)
2196 {
2197 #ifdef SUPPORT_UCP
2198 otherd = UCD_OTHERCASE(d);
2199 #endif /* SUPPORT_UCP */
2200 }
2201 else
2202 #endif /* SUPPORT_UTF8 */
2203 otherd = fcc[d];
2204 }
2205 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2206 {
2207 if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
2208 {
2209 active_count--; /* Remove non-match possibility */
2210 next_active_state--;
2211 }
2212 ADD_NEW(state_offset + dlen + 1, 0);
2213 }
2214 }
2215 break;
2216
2217 /*-----------------------------------------------------------------*/
2218 case OP_STAR:
2219 case OP_MINSTAR:
2220 case OP_POSSTAR:
2221 case OP_NOTSTAR:
2222 case OP_NOTMINSTAR:
2223 case OP_NOTPOSSTAR:
2224 ADD_ACTIVE(state_offset + dlen + 1, 0);
2225 if (clen > 0)
2226 {
2227 unsigned int otherd = NOTACHAR;
2228 if ((ims & PCRE_CASELESS) != 0)
2229 {
2230 #ifdef SUPPORT_UTF8
2231 if (utf8 && d >= 128)
2232 {
2233 #ifdef SUPPORT_UCP
2234 otherd = UCD_OTHERCASE(d);
2235 #endif /* SUPPORT_UCP */
2236 }
2237 else
2238 #endif /* SUPPORT_UTF8 */
2239 otherd = fcc[d];
2240 }
2241 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2242 {
2243 if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
2244 {
2245 active_count--; /* Remove non-match possibility */
2246 next_active_state--;
2247 }
2248 ADD_NEW(state_offset, 0);
2249 }
2250 }
2251 break;
2252
2253 /*-----------------------------------------------------------------*/
2254 case OP_EXACT:
2255 case OP_NOTEXACT:
2256 count = current_state->count; /* Number already matched */
2257 if (clen > 0)
2258 {
2259 unsigned int otherd = NOTACHAR;
2260 if ((ims & PCRE_CASELESS) != 0)
2261 {
2262 #ifdef SUPPORT_UTF8
2263 if (utf8 && d >= 128)
2264 {
2265 #ifdef SUPPORT_UCP
2266 otherd = UCD_OTHERCASE(d);
2267 #endif /* SUPPORT_UCP */
2268 }
2269 else
2270 #endif /* SUPPORT_UTF8 */
2271 otherd = fcc[d];
2272 }
2273 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2274 {
2275 if (++count >= GET2(code, 1))
2276 { ADD_NEW(state_offset + dlen + 3, 0); }
2277 else
2278 { ADD_NEW(state_offset, count); }
2279 }
2280 }
2281 break;
2282
2283 /*-----------------------------------------------------------------*/
2284 case OP_UPTO:
2285 case OP_MINUPTO:
2286 case OP_POSUPTO:
2287 case OP_NOTUPTO:
2288 case OP_NOTMINUPTO:
2289 case OP_NOTPOSUPTO:
2290 ADD_ACTIVE(state_offset + dlen + 3, 0);
2291 count = current_state->count; /* Number already matched */
2292 if (clen > 0)
2293 {
2294 unsigned int otherd = NOTACHAR;
2295 if ((ims & PCRE_CASELESS) != 0)
2296 {
2297 #ifdef SUPPORT_UTF8
2298 if (utf8 && d >= 128)
2299 {
2300 #ifdef SUPPORT_UCP
2301 otherd = UCD_OTHERCASE(d);
2302 #endif /* SUPPORT_UCP */
2303 }
2304 else
2305 #endif /* SUPPORT_UTF8 */
2306 otherd = fcc[d];
2307 }
2308 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2309 {
2310 if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2311 {
2312 active_count--; /* Remove non-match possibility */
2313 next_active_state--;
2314 }
2315 if (++count >= GET2(code, 1))
2316 { ADD_NEW(state_offset + dlen + 3, 0); }
2317 else
2318 { ADD_NEW(state_offset, count); }
2319 }
2320 }
2321 break;
2322
2323
2324 /* ========================================================================== */
2325 /* These are the class-handling opcodes */
2326
2327 case OP_CLASS:
2328 case OP_NCLASS:
2329 case OP_XCLASS:
2330 {
2331 BOOL isinclass = FALSE;
2332 int next_state_offset;
2333 const uschar *ecode;
2334
2335 /* For a simple class, there is always just a 32-byte table, and we
2336 can set isinclass from it. */
2337
2338 if (codevalue != OP_XCLASS)
2339 {
2340 ecode = code + 33;
2341 if (clen > 0)
2342 {
2343 isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2344 ((code[1 + c/8] & (1 << (c&7))) != 0);
2345 }
2346 }
2347
2348 /* An extended class may have a table or a list of single characters,
2349 ranges, or both, and it may be positive or negative. There's a
2350 function that sorts all this out. */
2351
2352 else
2353 {
2354 ecode = code + GET(code, 1);
2355 if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);
2356 }
2357
2358 /* At this point, isinclass is set for all kinds of class, and ecode
2359 points to the byte after the end of the class. If there is a
2360 quantifier, this is where it will be. */
2361
2362 next_state_offset = (int)(ecode - start_code);
2363
2364 switch (*ecode)
2365 {
2366 case OP_CRSTAR:
2367 case OP_CRMINSTAR:
2368 ADD_ACTIVE(next_state_offset + 1, 0);
2369 if (isinclass) { ADD_NEW(state_offset, 0); }
2370 break;
2371
2372 case OP_CRPLUS:
2373 case OP_CRMINPLUS:
2374 count = current_state->count; /* Already matched */
2375 if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
2376 if (isinclass) { count++; ADD_NEW(state_offset, count); }
2377 break;
2378
2379 case OP_CRQUERY:
2380 case OP_CRMINQUERY:
2381 ADD_ACTIVE(next_state_offset + 1, 0);
2382 if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
2383 break;
2384
2385 case OP_CRRANGE:
2386 case OP_CRMINRANGE:
2387 count = current_state->count; /* Already matched */
2388 if (count >= GET2(ecode, 1))
2389 { ADD_ACTIVE(next_state_offset + 5, 0); }
2390 if (isinclass)
2391 {
2392 int max = GET2(ecode, 3);
2393 if (++count >= max && max != 0) /* Max 0 => no limit */
2394 { ADD_NEW(next_state_offset + 5, 0); }
2395 else
2396 { ADD_NEW(state_offset, count); }
2397 }
2398 break;
2399
2400 default:
2401 if (isinclass) { ADD_NEW(next_state_offset, 0); }
2402 break;
2403 }
2404 }
2405 break;
2406
2407 /* ========================================================================== */
2408 /* These are the opcodes for fancy brackets of various kinds. We have
2409 to use recursion in order to handle them. The "always failing" assertion
2410 (?!) is optimised to OP_FAIL when compiling, so we have to support that,
2411 though the other "backtracking verbs" are not supported. */
2412
2413 case OP_FAIL:
2414 forced_fail++; /* Count FAILs for multiple states */
2415 break;
2416
2417 case OP_ASSERT:
2418 case OP_ASSERT_NOT:
2419 case OP_ASSERTBACK:
2420 case OP_ASSERTBACK_NOT:
2421 {
2422 int rc;
2423 int local_offsets[2];
2424 int local_workspace[1000];
2425 const uschar *endasscode = code + GET(code, 1);
2426
2427 while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2428
2429 rc = internal_dfa_exec(
2430 md, /* static match data */
2431 code, /* this subexpression's code */
2432 ptr, /* where we currently are */
2433 (int)(ptr - start_subject), /* start offset */
2434 local_offsets, /* offset vector */
2435 sizeof(local_offsets)/sizeof(int), /* size of same */
2436 local_workspace, /* workspace vector */
2437 sizeof(local_workspace)/sizeof(int), /* size of same */
2438 ims, /* the current ims flags */
2439 rlevel, /* function recursion level */
2440 recursing); /* pass on regex recursion */
2441
2442 if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2443 if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2444 { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2445 }
2446 break;
2447
2448 /*-----------------------------------------------------------------*/
2449 case OP_COND:
2450 case OP_SCOND:
2451 {
2452 int local_offsets[1000];
2453 int local_workspace[1000];
2454 int codelink = GET(code, 1);
2455 int condcode;
2456
2457 /* Because of the way auto-callout works during compile, a callout item
2458 is inserted between OP_COND and an assertion condition. This does not
2459 happen for the other conditions. */
2460
2461 if (code[LINK_SIZE+1] == OP_CALLOUT)
2462 {
2463 rrc = 0;
2464 if (pcre_callout != NULL)
2465 {
2466 pcre_callout_block cb;
2467 cb.version = 1; /* Version 1 of the callout block */
2468 cb.callout_number = code[LINK_SIZE+2];
2469 cb.offset_vector = offsets;
2470 cb.subject = (PCRE_SPTR)start_subject;
2471 cb.subject_length = (int)(end_subject - start_subject);
2472 cb.start_match = (int)(current_subject - start_subject);
2473 cb.current_position = (int)(ptr - start_subject);
2474 cb.pattern_position = GET(code, LINK_SIZE + 3);
2475 cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2476 cb.capture_top = 1;
2477 cb.capture_last = -1;
2478 cb.callout_data = md->callout_data;
2479 if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */
2480 }
2481 if (rrc > 0) break; /* Fail this thread */
2482 code += _pcre_OP_lengths[OP_CALLOUT]; /* Skip callout data */
2483 }
2484
2485 condcode = code[LINK_SIZE+1];
2486
2487 /* Back reference conditions are not supported */
2488
2489 if (condcode == OP_CREF || condcode == OP_NCREF)
2490 return PCRE_ERROR_DFA_UCOND;
2491
2492 /* The DEFINE condition is always false */
2493
2494 if (condcode == OP_DEF)
2495 { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2496
2497 /* The only supported version of OP_RREF is for the value RREF_ANY,
2498 which means "test if in any recursion". We can't test for specifically
2499 recursed groups. */
2500
2501 else if (condcode == OP_RREF || condcode == OP_NRREF)
2502 {
2503 int value = GET2(code, LINK_SIZE+2);
2504 if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2505 if (recursing > 0)
2506 { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2507 else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2508 }
2509
2510 /* Otherwise, the condition is an assertion */
2511
2512 else
2513 {
2514 int rc;
2515 const uschar *asscode = code + LINK_SIZE + 1;
2516 const uschar *endasscode = asscode + GET(asscode, 1);
2517
2518 while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2519
2520 rc = internal_dfa_exec(
2521 md, /* fixed match data */
2522 asscode, /* this subexpression's code */
2523 ptr, /* where we currently are */
2524 (int)(ptr - start_subject), /* start offset */
2525 local_offsets, /* offset vector */
2526 sizeof(local_offsets)/sizeof(int), /* size of same */
2527 local_workspace, /* workspace vector */
2528 sizeof(local_workspace)/sizeof(int), /* size of same */
2529 ims, /* the current ims flags */
2530 rlevel, /* function recursion level */
2531 recursing); /* pass on regex recursion */
2532
2533 if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2534 if ((rc >= 0) ==
2535 (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2536 { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2537 else
2538 { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2539 }
2540 }
2541 break;
2542
2543 /*-----------------------------------------------------------------*/
2544 case OP_RECURSE:
2545 {
2546 int local_offsets[1000];
2547 int local_workspace[1000];
2548 int rc;
2549
2550 DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,
2551 recursing + 1));
2552
2553 rc = internal_dfa_exec(
2554 md, /* fixed match data */
2555 start_code + GET(code, 1), /* this subexpression's code */
2556 ptr, /* where we currently are */
2557 (int)(ptr - start_subject), /* start offset */
2558 local_offsets, /* offset vector */
2559 sizeof(local_offsets)/sizeof(int), /* size of same */
2560 local_workspace, /* workspace vector */
2561 sizeof(local_workspace)/sizeof(int), /* size of same */
2562 ims, /* the current ims flags */
2563 rlevel, /* function recursion level */
2564 recursing + 1); /* regex recurse level */
2565
2566 DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,
2567 recursing + 1, rc));
2568
2569 /* Ran out of internal offsets */
2570
2571 if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
2572
2573 /* For each successful matched substring, set up the next state with a
2574 count of characters to skip before trying it. Note that the count is in
2575 characters, not bytes. */
2576
2577 if (rc > 0)
2578 {
2579 for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2580 {
2581 const uschar *p = start_subject + local_offsets[rc];
2582 const uschar *pp = start_subject + local_offsets[rc+1];
2583 int charcount = local_offsets[rc+1] - local_offsets[rc];
2584 while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
2585 if (charcount > 0)
2586 {
2587 ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
2588 }
2589 else
2590 {
2591 ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
2592 }
2593 }
2594 }
2595 else if (rc != PCRE_ERROR_NOMATCH) return rc;
2596 }
2597 break;
2598
2599 /*-----------------------------------------------------------------*/
2600 case OP_ONCE:
2601 {
2602 int local_offsets[2];
2603 int local_workspace[1000];
2604
2605 int rc = internal_dfa_exec(
2606 md, /* fixed match data */
2607 code, /* this subexpression's code */
2608 ptr, /* where we currently are */
2609 (int)(ptr - start_subject), /* start offset */
2610 local_offsets, /* offset vector */
2611 sizeof(local_offsets)/sizeof(int), /* size of same */
2612 local_workspace, /* workspace vector */
2613 sizeof(local_workspace)/sizeof(int), /* size of same */
2614 ims, /* the current ims flags */
2615 rlevel, /* function recursion level */
2616 recursing); /* pass on regex recursion */
2617
2618 if (rc >= 0)
2619 {
2620 const uschar *end_subpattern = code;
2621 int charcount = local_offsets[1] - local_offsets[0];
2622 int next_state_offset, repeat_state_offset;
2623
2624 do { end_subpattern += GET(end_subpattern, 1); }
2625 while (*end_subpattern == OP_ALT);
2626 next_state_offset =
2627 (int)(end_subpattern - start_code + LINK_SIZE + 1);
2628
2629 /* If the end of this subpattern is KETRMAX or KETRMIN, we must
2630 arrange for the repeat state also to be added to the relevant list.
2631 Calculate the offset, or set -1 for no repeat. */
2632
2633 repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
2634 *end_subpattern == OP_KETRMIN)?
2635 (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
2636
2637 /* If we have matched an empty string, add the next state at the
2638 current character pointer. This is important so that the duplicate
2639 checking kicks in, which is what breaks infinite loops that match an
2640 empty string. */
2641
2642 if (charcount == 0)
2643 {
2644 ADD_ACTIVE(next_state_offset, 0);
2645 }
2646
2647 /* Optimization: if there are no more active states, and there
2648 are no new states yet set up, then skip over the subject string
2649 right here, to save looping. Otherwise, set up the new state to swing
2650 into action when the end of the substring is reached. */
2651
2652 else if (i + 1 >= active_count && new_count == 0)
2653 {
2654 ptr += charcount;
2655 clen = 0;
2656 ADD_NEW(next_state_offset, 0);
2657
2658 /* If we are adding a repeat state at the new character position,
2659 we must fudge things so that it is the only current state.
2660 Otherwise, it might be a duplicate of one we processed before, and
2661 that would cause it to be skipped. */
2662
2663 if (repeat_state_offset >= 0)
2664 {
2665 next_active_state = active_states;
2666 active_count = 0;
2667 i = -1;
2668 ADD_ACTIVE(repeat_state_offset, 0);
2669 }
2670 }
2671 else
2672 {
2673 const uschar *p = start_subject + local_offsets[0];
2674 const uschar *pp = start_subject + local_offsets[1];
2675 while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
2676 ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2677 if (repeat_state_offset >= 0)
2678 { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
2679 }
2680
2681 }
2682 else if (rc != PCRE_ERROR_NOMATCH) return rc;
2683 }
2684 break;
2685
2686
2687 /* ========================================================================== */
2688 /* Handle callouts */
2689
2690 case OP_CALLOUT:
2691 rrc = 0;
2692 if (pcre_callout != NULL)
2693 {
2694 pcre_callout_block cb;
2695 cb.version = 1; /* Version 1 of the callout block */
2696 cb.callout_number = code[1];
2697 cb.offset_vector = offsets;
2698 cb.subject = (PCRE_SPTR)start_subject;
2699 cb.subject_length = (int)(end_subject - start_subject);
2700 cb.start_match = (int)(current_subject - start_subject);
2701 cb.current_position = (int)(ptr - start_subject);
2702 cb.pattern_position = GET(code, 2);
2703 cb.next_item_length = GET(code, 2 + LINK_SIZE);
2704 cb.capture_top = 1;
2705 cb.capture_last = -1;
2706 cb.callout_data = md->callout_data;
2707 if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */
2708 }
2709 if (rrc == 0)
2710 { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
2711 break;
2712
2713
2714 /* ========================================================================== */
2715 default: /* Unsupported opcode */
2716 return PCRE_ERROR_DFA_UITEM;
2717 }
2718
2719 NEXT_ACTIVE_STATE: continue;
2720
2721 } /* End of loop scanning active states */
2722
2723 /* We have finished the processing at the current subject character. If no
2724 new states have been set for the next character, we have found all the
2725 matches that we are going to find. If we are at the top level and partial
2726 matching has been requested, check for appropriate conditions.
2727
2728 The "forced_ fail" variable counts the number of (*F) encountered for the
2729 character. If it is equal to the original active_count (saved in
2730 workspace[1]) it means that (*F) was found on every active state. In this
2731 case we don't want to give a partial match.
2732
2733 The "could_continue" variable is true if a state could have continued but
2734 for the fact that the end of the subject was reached. */
2735
2736 if (new_count <= 0)
2737 {
2738 if (rlevel == 1 && /* Top level, and */
2739 could_continue && /* Some could go on */
2740 forced_fail != workspace[1] && /* Not all forced fail & */
2741 ( /* either... */
2742 (md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
2743 || /* or... */
2744 ((md->moptions & PCRE_PARTIAL_SOFT) != 0 && /* Soft partial and */
2745 match_count < 0) /* no matches */
2746 ) && /* And... */
2747 ptr >= end_subject && /* Reached end of subject */
2748 ptr > current_subject) /* Matched non-empty string */
2749 {
2750 if (offsetcount >= 2)
2751 {
2752 offsets[0] = (int)(md->start_used_ptr - start_subject);
2753 offsets[1] = (int)(end_subject - start_subject);
2754 }
2755 match_count = PCRE_ERROR_PARTIAL;
2756 }
2757
2758 DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
2759 "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
2760 rlevel*2-2, SP));
2761 break; /* In effect, "return", but see the comment below */
2762 }
2763
2764 /* One or more states are active for the next character. */
2765
2766 ptr += clen; /* Advance to next subject character */
2767 } /* Loop to move along the subject string */
2768
2769 /* Control gets here from "break" a few lines above. We do it this way because
2770 if we use "return" above, we have compiler trouble. Some compilers warn if
2771 there's nothing here because they think the function doesn't return a value. On
2772 the other hand, if we put a dummy statement here, some more clever compilers
2773 complain that it can't be reached. Sigh. */
2774
2775 return match_count;
2776 }
2777
2778
2779
2780
2781 /*************************************************
2782 * Execute a Regular Expression - DFA engine *
2783 *************************************************/
2784
2785 /* This external function applies a compiled re to a subject string using a DFA
2786 engine. This function calls the internal function multiple times if the pattern
2787 is not anchored.
2788
2789 Arguments:
2790 argument_re points to the compiled expression
2791 extra_data points to extra data or is NULL
2792 subject points to the subject string
2793 length length of subject string (may contain binary zeros)
2794 start_offset where to start in the subject string
2795 options option bits
2796 offsets vector of match offsets
2797 offsetcount size of same
2798 workspace workspace vector
2799 wscount size of same
2800
2801 Returns: > 0 => number of match offset pairs placed in offsets
2802 = 0 => offsets overflowed; longest matches are present
2803 -1 => failed to match
2804 < -1 => some kind of unexpected problem
2805 */
2806
2807 PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
2808 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2809 const char *subject, int length, int start_offset, int options, int *offsets,
2810 int offsetcount, int *workspace, int wscount)
2811 {
2812 real_pcre *re = (real_pcre *)argument_re;
2813 dfa_match_data match_block;
2814 dfa_match_data *md = &match_block;
2815 BOOL utf8, anchored, startline, firstline;
2816 const uschar *current_subject, *end_subject, *lcc;
2817
2818 pcre_study_data internal_study;
2819 const pcre_study_data *study = NULL;
2820 real_pcre internal_re;
2821
2822 const uschar *req_byte_ptr;
2823 const uschar *start_bits = NULL;
2824 BOOL first_byte_caseless = FALSE;
2825 BOOL req_byte_caseless = FALSE;
2826 int first_byte = -1;
2827 int req_byte = -1;
2828 int req_byte2 = -1;
2829 int newline;
2830
2831 /* Plausibility checks */
2832
2833 if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
2834 if (re == NULL || subject == NULL || workspace == NULL ||
2835 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
2836 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
2837 if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
2838
2839 /* We need to find the pointer to any study data before we test for byte
2840 flipping, so we scan the extra_data block first. This may set two fields in the
2841 match block, so we must initialize them beforehand. However, the other fields
2842 in the match block must not be set until after the byte flipping. */
2843
2844 md->tables = re->tables;
2845 md->callout_data = NULL;
2846
2847 if (extra_data != NULL)
2848 {
2849 unsigned int flags = extra_data->flags;
2850 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
2851 study = (const pcre_study_data *)extra_data->study_data;
2852 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
2853 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
2854 return PCRE_ERROR_DFA_UMLIMIT;
2855 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
2856 md->callout_data = extra_data->callout_data;
2857 if ((flags & PCRE_EXTRA_TABLES) != 0)
2858 md->tables = extra_data->tables;
2859 }
2860
2861 /* Check that the first field in the block is the magic number. If it is not,
2862 test for a regex that was compiled on a host of opposite endianness. If this is
2863 the case, flipped values are put in internal_re and internal_study if there was
2864 study data too. */
2865
2866 if (re->magic_number != MAGIC_NUMBER)
2867 {
2868 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
2869 if (re == NULL) return PCRE_ERROR_BADMAGIC;
2870 if (study != NULL) study = &internal_study;
2871 }
2872
2873 /* Set some local values */
2874
2875 current_subject = (const unsigned char *)subject + start_offset;
2876 end_subject = (const unsigned char *)subject + length;
2877 req_byte_ptr = current_subject - 1;
2878
2879 #ifdef SUPPORT_UTF8
2880 utf8 = (re->options & PCRE_UTF8) != 0;
2881 #else
2882 utf8 = FALSE;
2883 #endif
2884
2885 anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
2886 (re->options & PCRE_ANCHORED) != 0;
2887
2888 /* The remaining fixed data for passing around. */
2889
2890 md->start_code = (const uschar *)argument_re +
2891 re->name_table_offset + re->name_count * re->name_entry_size;
2892 md->start_subject = (const unsigned char *)subject;
2893 md->end_subject = end_subject;
2894 md->start_offset = start_offset;
2895 md->moptions = options;
2896 md->poptions = re->options;
2897
2898 /* If the BSR option is not set at match time, copy what was set
2899 at compile time. */
2900
2901 if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2902 {
2903 if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2904 md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2905 #ifdef BSR_ANYCRLF
2906 else md->moptions |= PCRE_BSR_ANYCRLF;
2907 #endif
2908 }
2909
2910 /* Handle different types of newline. The three bits give eight cases. If
2911 nothing is set at run time, whatever was used at compile time applies. */
2912
2913 switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2914 PCRE_NEWLINE_BITS)
2915 {
2916 case 0: newline = NEWLINE; break; /* Compile-time default */
2917 case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
2918 case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
2919 case PCRE_NEWLINE_CR+
2920 PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
2921 case PCRE_NEWLINE_ANY: newline = -1; break;
2922 case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2923 default: return PCRE_ERROR_BADNEWLINE;
2924 }
2925
2926 if (newline == -2)
2927 {
2928 md->nltype = NLTYPE_ANYCRLF;
2929 }
2930 else if (newline < 0)
2931 {
2932 md->nltype = NLTYPE_ANY;
2933 }
2934 else
2935 {
2936 md->nltype = NLTYPE_FIXED;
2937 if (newline > 255)
2938 {
2939 md->nllen = 2;
2940 md->nl[0] = (newline >> 8) & 255;
2941 md->nl[1] = newline & 255;
2942 }
2943 else
2944 {
2945 md->nllen = 1;
2946 md->nl[0] = newline;
2947 }
2948 }
2949
2950 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
2951 back the character offset. */
2952
2953 #ifdef SUPPORT_UTF8
2954 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
2955 {
2956 if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
2957 return PCRE_ERROR_BADUTF8;
2958 if (start_offset > 0 && start_offset < length)
2959 {
2960 int tb = ((uschar *)subject)[start_offset];
2961 if (tb > 127)
2962 {
2963 tb &= 0xc0;
2964 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
2965 }
2966 }
2967 }
2968 #endif
2969
2970 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
2971 is a feature that makes it possible to save compiled regex and re-use them
2972 in other programs later. */
2973
2974 if (md->tables == NULL) md->tables = _pcre_default_tables;
2975
2976 /* The lower casing table and the "must be at the start of a line" flag are
2977 used in a loop when finding where to start. */
2978
2979 lcc = md->tables + lcc_offset;
2980 startline = (re->flags & PCRE_STARTLINE) != 0;
2981 firstline = (re->options & PCRE_FIRSTLINE) != 0;
2982
2983 /* Set up the first character to match, if available. The first_byte value is
2984 never set for an anchored regular expression, but the anchoring may be forced
2985 at run time, so we have to test for anchoring. The first char may be unset for
2986 an unanchored pattern, of course. If there's no first char and the pattern was
2987 studied, there may be a bitmap of possible first characters. */
2988
2989 if (!anchored)
2990 {
2991 if ((re->flags & PCRE_FIRSTSET) != 0)
2992 {
2993 first_byte = re->first_byte & 255;
2994 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
2995 first_byte = lcc[first_byte];
2996 }
2997 else
2998 {
2999 if (!startline && study != NULL &&
3000 (study->flags & PCRE_STUDY_MAPPED) != 0)
3001 start_bits = study->start_bits;
3002 }
3003 }
3004
3005 /* For anchored or unanchored matches, there may be a "last known required
3006 character" set. */
3007
3008 if ((re->flags & PCRE_REQCHSET) != 0)
3009 {
3010 req_byte = re->req_byte & 255;
3011 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
3012 req_byte2 = (md->tables + fcc_offset)[req_byte]; /* case flipped */
3013 }
3014
3015 /* Call the main matching function, looping for a non-anchored regex after a
3016 failed match. If not restarting, perform certain optimizations at the start of
3017 a match. */
3018
3019 for (;;)
3020 {
3021 int rc;
3022
3023 if ((options & PCRE_DFA_RESTART) == 0)
3024 {
3025 const uschar *save_end_subject = end_subject;
3026
3027 /* If firstline is TRUE, the start of the match is constrained to the first
3028 line of a multiline string. Implement this by temporarily adjusting
3029 end_subject so that we stop scanning at a newline. If the match fails at
3030 the newline, later code breaks this loop. */
3031
3032 if (firstline)
3033 {
3034 USPTR t = current_subject;
3035 #ifdef SUPPORT_UTF8
3036 if (utf8)
3037 {
3038 while (t < md->end_subject && !IS_NEWLINE(t))
3039 {
3040 t++;
3041 while (t < end_subject && (*t & 0xc0) == 0x80) t++;
3042 }
3043 }
3044 else
3045 #endif
3046 while (t < md->end_subject && !IS_NEWLINE(t)) t++;
3047 end_subject = t;
3048 }
3049
3050 /* There are some optimizations that avoid running the match if a known
3051 starting point is not found. However, there is an option that disables
3052 these, for testing and for ensuring that all callouts do actually occur. */
3053
3054 if ((options & PCRE_NO_START_OPTIMIZE) == 0)
3055 {
3056 /* Advance to a known first byte. */
3057
3058 if (first_byte >= 0)
3059 {
3060 if (first_byte_caseless)
3061 while (current_subject < end_subject &&
3062 lcc[*current_subject] != first_byte)
3063 current_subject++;
3064 else
3065 while (current_subject < end_subject &&
3066 *current_subject != first_byte)
3067 current_subject++;
3068 }
3069
3070 /* Or to just after a linebreak for a multiline match if possible */
3071
3072 else if (startline)
3073 {
3074 if (current_subject > md->start_subject + start_offset)
3075 {
3076 #ifdef SUPPORT_UTF8
3077 if (utf8)
3078 {
3079 while (current_subject < end_subject &&
3080 !WAS_NEWLINE(current_subject))
3081 {
3082 current_subject++;
3083 while(current_subject < end_subject &&
3084 (*current_subject & 0xc0) == 0x80)
3085 current_subject++;
3086 }
3087 }
3088 else
3089 #endif
3090 while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
3091 current_subject++;
3092
3093 /* If we have just passed a CR and the newline option is ANY or
3094 ANYCRLF, and we are now at a LF, advance the match position by one
3095 more character. */
3096
3097 if (current_subject[-1] == CHAR_CR &&
3098 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3099 current_subject < end_subject &&
3100 *current_subject == CHAR_NL)
3101 current_subject++;
3102 }
3103 }
3104
3105 /* Or to a non-unique first char after study */
3106
3107 else if (start_bits != NULL)
3108 {
3109 while (current_subject < end_subject)
3110 {
3111 register unsigned int c = *current_subject;
3112 if ((start_bits[c/8] & (1 << (c&7))) == 0)
3113 {
3114 current_subject++;
3115 #ifdef SUPPORT_UTF8
3116 if (utf8)
3117 while(current_subject < end_subject &&
3118 (*current_subject & 0xc0) == 0x80) current_subject++;
3119 #endif
3120 }
3121 else break;
3122 }
3123 }
3124 }
3125
3126 /* Restore fudged end_subject */
3127
3128 end_subject = save_end_subject;
3129
3130 /* The following two optimizations are disabled for partial matching or if
3131 disabling is explicitly requested (and of course, by the test above, this
3132 code is not obeyed when restarting after a partial match). */
3133
3134 if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
3135 (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3136 {
3137 /* If the pattern was studied, a minimum subject length may be set. This
3138 is a lower bound; no actual string of that length may actually match the
3139 pattern. Although the value is, strictly, in characters, we treat it as
3140 bytes to avoid spending too much time in this optimization. */
3141
3142 if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
3143 (pcre_uint32)(end_subject - current_subject) < study->minlength)
3144 return PCRE_ERROR_NOMATCH;
3145
3146 /* If req_byte is set, we know that that character must appear in the
3147 subject for the match to succeed. If the first character is set, req_byte
3148 must be later in the subject; otherwise the test starts at the match
3149 point. This optimization can save a huge amount of work in patterns with
3150 nested unlimited repeats that aren't going to match. Writing separate
3151 code for cased/caseless versions makes it go faster, as does using an
3152 autoincrement and backing off on a match.
3153
3154 HOWEVER: when the subject string is very, very long, searching to its end
3155 can take a long time, and give bad performance on quite ordinary
3156 patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3157 string... so we don't do this when the string is sufficiently long. */
3158
3159 if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
3160 {
3161 register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
3162
3163 /* We don't need to repeat the search if we haven't yet reached the
3164 place we found it at last time. */
3165
3166 if (p > req_byte_ptr)
3167 {
3168 if (req_byte_caseless)
3169 {
3170 while (p < end_subject)
3171 {
3172 register int pp = *p++;
3173 if (pp == req_byte || pp == req_byte2) { p--; break; }
3174 }
3175 }
3176 else
3177 {
3178 while (p < end_subject)
3179 {
3180 if (*p++ == req_byte) { p--; break; }
3181 }
3182 }
3183
3184 /* If we can't find the required character, break the matching loop,
3185 which will cause a return or PCRE_ERROR_NOMATCH. */
3186
3187 if (p >= end_subject) break;
3188
3189 /* If we have found the required character, save the point where we
3190 found it, so that we don't search again next time round the loop if
3191 the start hasn't passed this character yet. */
3192
3193 req_byte_ptr = p;
3194 }
3195 }
3196 }
3197 } /* End of optimizations that are done when not restarting */
3198
3199 /* OK, now we can do the business */
3200
3201 md->start_used_ptr = current_subject;
3202
3203 rc = internal_dfa_exec(
3204 md, /* fixed match data */
3205 md->start_code, /* this subexpression's code */
3206 current_subject, /* where we currently are */
3207 start_offset, /* start offset in subject */
3208 offsets, /* offset vector */
3209 offsetcount, /* size of same */
3210 workspace, /* workspace vector */
3211 wscount, /* size of same */
3212 re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
3213 0, /* function recurse level */
3214 0); /* regex recurse level */
3215
3216 /* Anything other than "no match" means we are done, always; otherwise, carry
3217 on only if not anchored. */
3218
3219 if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
3220
3221 /* Advance to the next subject character unless we are at the end of a line
3222 and firstline is set. */
3223
3224 if (firstline && IS_NEWLINE(current_subject)) break;
3225 current_subject++;
3226 if (utf8)
3227 {
3228 while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
3229 current_subject++;
3230 }
3231 if (current_subject > end_subject) break;
3232
3233 /* If we have just passed a CR and we are now at a LF, and the pattern does
3234 not contain any explicit matches for \r or \n, and the newline option is CRLF
3235 or ANY or ANYCRLF, advance the match position by one more character. */
3236
3237 if (current_subject[-1] == CHAR_CR &&
3238 current_subject < end_subject &&
3239 *current_subject == CHAR_NL &&
3240 (re->flags & PCRE_HASCRORLF) == 0 &&
3241 (md->nltype == NLTYPE_ANY ||
3242 md->nltype == NLTYPE_ANYCRLF ||
3243 md->nllen == 2))
3244 current_subject++;
3245
3246 } /* "Bumpalong" loop */
3247
3248 return PCRE_ERROR_NOMATCH;
3249 }
3250
3251 /* End of pcre_dfa_exec.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12