/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 91 - (show annotations) (download)
Sat Feb 24 21:41:34 2007 UTC (7 years, 9 months ago) by nigel
File MIME type: text/plain
File size: 70023 byte(s)
Load pcre-6.7 into code/trunk.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2006 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains the external function pcre_dfa_exec(), which is an
42 alternative matching function that uses a DFA algorithm. This is NOT Perl-
43 compatible, but it has advantages in certain applications. */
44
45
46 #define NLBLOCK md /* The block containing newline information */
47 #include "pcre_internal.h"
48
49
50 /* For use to indent debugging output */
51
52 #define SP " "
53
54
55
56 /*************************************************
57 * Code parameters and static tables *
58 *************************************************/
59
60 /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
61 into others, under special conditions. A gap of 10 between the blocks should be
62 enough. */
63
64 #define OP_PROP_EXTRA (EXTRACT_BASIC_MAX+1)
65 #define OP_EXTUNI_EXTRA (EXTRACT_BASIC_MAX+11)
66
67
68 /* This table identifies those opcodes that are followed immediately by a
69 character that is to be tested in some way. This makes is possible to
70 centralize the loading of these characters. In the case of Type * etc, the
71 "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
72 small value. */
73
74 static uschar coptable[] = {
75 0, /* End */
76 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */
77 0, 0, /* Any, Anybyte */
78 0, 0, 0, /* NOTPROP, PROP, EXTUNI */
79 0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
80 1, /* Char */
81 1, /* Charnc */
82 1, /* not */
83 /* Positive single-char repeats */
84 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
85 3, 3, 3, /* upto, minupto, exact */
86 /* Negative single-char repeats - only for chars < 256 */
87 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
88 3, 3, 3, /* NOT upto, minupto, exact */
89 /* Positive type repeats */
90 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
91 3, 3, 3, /* Type upto, minupto, exact */
92 /* Character class & ref repeats */
93 0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
94 0, 0, /* CRRANGE, CRMINRANGE */
95 0, /* CLASS */
96 0, /* NCLASS */
97 0, /* XCLASS - variable length */
98 0, /* REF */
99 0, /* RECURSE */
100 0, /* CALLOUT */
101 0, /* Alt */
102 0, /* Ket */
103 0, /* KetRmax */
104 0, /* KetRmin */
105 0, /* Assert */
106 0, /* Assert not */
107 0, /* Assert behind */
108 0, /* Assert behind not */
109 0, /* Reverse */
110 0, /* Once */
111 0, /* COND */
112 0, /* CREF */
113 0, 0, /* BRAZERO, BRAMINZERO */
114 0, /* BRANUMBER */
115 0 /* BRA */
116 };
117
118 /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
119 and \w */
120
121 static uschar toptable1[] = {
122 0, 0, 0, 0, 0,
123 ctype_digit, ctype_digit,
124 ctype_space, ctype_space,
125 ctype_word, ctype_word,
126 0 /* OP_ANY */
127 };
128
129 static uschar toptable2[] = {
130 0, 0, 0, 0, 0,
131 ctype_digit, 0,
132 ctype_space, 0,
133 ctype_word, 0,
134 1 /* OP_ANY */
135 };
136
137
138 /* Structure for holding data about a particular state, which is in effect the
139 current data for an active path through the match tree. It must consist
140 entirely of ints because the working vector we are passed, and which we put
141 these structures in, is a vector of ints. */
142
143 typedef struct stateblock {
144 int offset; /* Offset to opcode */
145 int count; /* Count for repeats */
146 int ims; /* ims flag bits */
147 int data; /* Some use extra data */
148 } stateblock;
149
150 #define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
151
152
153 #ifdef DEBUG
154 /*************************************************
155 * Print character string *
156 *************************************************/
157
158 /* Character string printing function for debugging.
159
160 Arguments:
161 p points to string
162 length number of bytes
163 f where to print
164
165 Returns: nothing
166 */
167
168 static void
169 pchars(unsigned char *p, int length, FILE *f)
170 {
171 int c;
172 while (length-- > 0)
173 {
174 if (isprint(c = *(p++)))
175 fprintf(f, "%c", c);
176 else
177 fprintf(f, "\\x%02x", c);
178 }
179 }
180 #endif
181
182
183
184 /*************************************************
185 * Execute a Regular Expression - DFA engine *
186 *************************************************/
187
188 /* This internal function applies a compiled pattern to a subject string,
189 starting at a given point, using a DFA engine. This function is called from the
190 external one, possibly multiple times if the pattern is not anchored. The
191 function calls itself recursively for some kinds of subpattern.
192
193 Arguments:
194 md the match_data block with fixed information
195 this_start_code the opening bracket of this subexpression's code
196 current_subject where we currently are in the subject string
197 start_offset start offset in the subject string
198 offsets vector to contain the matching string offsets
199 offsetcount size of same
200 workspace vector of workspace
201 wscount size of same
202 ims the current ims flags
203 rlevel function call recursion level
204 recursing regex recursive call level
205
206 Returns: > 0 =>
207 = 0 =>
208 -1 => failed to match
209 < -1 => some kind of unexpected problem
210
211 The following macros are used for adding states to the two state vectors (one
212 for the current character, one for the following character). */
213
214 #define ADD_ACTIVE(x,y) \
215 if (active_count++ < wscount) \
216 { \
217 next_active_state->offset = (x); \
218 next_active_state->count = (y); \
219 next_active_state->ims = ims; \
220 next_active_state++; \
221 DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
222 } \
223 else return PCRE_ERROR_DFA_WSSIZE
224
225 #define ADD_ACTIVE_DATA(x,y,z) \
226 if (active_count++ < wscount) \
227 { \
228 next_active_state->offset = (x); \
229 next_active_state->count = (y); \
230 next_active_state->ims = ims; \
231 next_active_state->data = (z); \
232 next_active_state++; \
233 DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
234 } \
235 else return PCRE_ERROR_DFA_WSSIZE
236
237 #define ADD_NEW(x,y) \
238 if (new_count++ < wscount) \
239 { \
240 next_new_state->offset = (x); \
241 next_new_state->count = (y); \
242 next_new_state->ims = ims; \
243 next_new_state++; \
244 DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
245 } \
246 else return PCRE_ERROR_DFA_WSSIZE
247
248 #define ADD_NEW_DATA(x,y,z) \
249 if (new_count++ < wscount) \
250 { \
251 next_new_state->offset = (x); \
252 next_new_state->count = (y); \
253 next_new_state->ims = ims; \
254 next_new_state->data = (z); \
255 next_new_state++; \
256 DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
257 } \
258 else return PCRE_ERROR_DFA_WSSIZE
259
260 /* And now, here is the code */
261
262 static int
263 internal_dfa_exec(
264 dfa_match_data *md,
265 const uschar *this_start_code,
266 const uschar *current_subject,
267 int start_offset,
268 int *offsets,
269 int offsetcount,
270 int *workspace,
271 int wscount,
272 int ims,
273 int rlevel,
274 int recursing)
275 {
276 stateblock *active_states, *new_states, *temp_states;
277 stateblock *next_active_state, *next_new_state;
278
279 const uschar *ctypes, *lcc, *fcc;
280 const uschar *ptr;
281 const uschar *end_code;
282
283 int active_count, new_count, match_count;
284
285 /* Some fields in the md block are frequently referenced, so we load them into
286 independent variables in the hope that this will perform better. */
287
288 const uschar *start_subject = md->start_subject;
289 const uschar *end_subject = md->end_subject;
290 const uschar *start_code = md->start_code;
291
292 #ifdef SUPPORT_UTF8
293 BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
294 #endif
295
296 rlevel++;
297 offsetcount &= (-2);
298
299 wscount -= 2;
300 wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
301 (2 * INTS_PER_STATEBLOCK);
302
303 DPRINTF(("\n%.*s---------------------\n"
304 "%.*sCall to internal_dfa_exec f=%d r=%d\n",
305 rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));
306
307 ctypes = md->tables + ctypes_offset;
308 lcc = md->tables + lcc_offset;
309 fcc = md->tables + fcc_offset;
310
311 match_count = PCRE_ERROR_NOMATCH; /* A negative number */
312
313 active_states = (stateblock *)(workspace + 2);
314 next_new_state = new_states = active_states + wscount;
315 new_count = 0;
316
317 /* The first thing in any (sub) pattern is a bracket of some sort. Push all
318 the alternative states onto the list, and find out where the end is. This
319 makes is possible to use this function recursively, when we want to stop at a
320 matching internal ket rather than at the end.
321
322 If the first opcode in the first alternative is OP_REVERSE, we are dealing with
323 a backward assertion. In that case, we have to find out the maximum amount to
324 move back, and set up each alternative appropriately. */
325
326 if (this_start_code[1+LINK_SIZE] == OP_REVERSE)
327 {
328 int max_back = 0;
329 int gone_back;
330
331 end_code = this_start_code;
332 do
333 {
334 int back = GET(end_code, 2+LINK_SIZE);
335 if (back > max_back) max_back = back;
336 end_code += GET(end_code, 1);
337 }
338 while (*end_code == OP_ALT);
339
340 /* If we can't go back the amount required for the longest lookbehind
341 pattern, go back as far as we can; some alternatives may still be viable. */
342
343 #ifdef SUPPORT_UTF8
344 /* In character mode we have to step back character by character */
345
346 if (utf8)
347 {
348 for (gone_back = 0; gone_back < max_back; gone_back++)
349 {
350 if (current_subject <= start_subject) break;
351 current_subject--;
352 while (current_subject > start_subject &&
353 (*current_subject & 0xc0) == 0x80)
354 current_subject--;
355 }
356 }
357 else
358 #endif
359
360 /* In byte-mode we can do this quickly. */
361
362 {
363 gone_back = (current_subject - max_back < start_subject)?
364 current_subject - start_subject : max_back;
365 current_subject -= gone_back;
366 }
367
368 /* Now we can process the individual branches. */
369
370 end_code = this_start_code;
371 do
372 {
373 int back = GET(end_code, 2+LINK_SIZE);
374 if (back <= gone_back)
375 {
376 int bstate = end_code - start_code + 2 + 2*LINK_SIZE;
377 ADD_NEW_DATA(-bstate, 0, gone_back - back);
378 }
379 end_code += GET(end_code, 1);
380 }
381 while (*end_code == OP_ALT);
382 }
383
384 /* This is the code for a "normal" subpattern (not a backward assertion). The
385 start of a whole pattern is always one of these. If we are at the top level,
386 we may be asked to restart matching from the same point that we reached for a
387 previous partial match. We still have to scan through the top-level branches to
388 find the end state. */
389
390 else
391 {
392 end_code = this_start_code;
393
394 /* Restarting */
395
396 if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
397 {
398 do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
399 new_count = workspace[1];
400 if (!workspace[0])
401 memcpy(new_states, active_states, new_count * sizeof(stateblock));
402 }
403
404 /* Not restarting */
405
406 else
407 {
408 do
409 {
410 ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);
411 end_code += GET(end_code, 1);
412 }
413 while (*end_code == OP_ALT);
414 }
415 }
416
417 workspace[0] = 0; /* Bit indicating which vector is current */
418
419 DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));
420
421 /* Loop for scanning the subject */
422
423 ptr = current_subject;
424 for (;;)
425 {
426 int i, j;
427 int clen, dlen;
428 unsigned int c, d;
429
430 /* Make the new state list into the active state list and empty the
431 new state list. */
432
433 temp_states = active_states;
434 active_states = new_states;
435 new_states = temp_states;
436 active_count = new_count;
437 new_count = 0;
438
439 workspace[0] ^= 1; /* Remember for the restarting feature */
440 workspace[1] = active_count;
441
442 #ifdef DEBUG
443 printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
444 pchars((uschar *)ptr, strlen((char *)ptr), stdout);
445 printf("\"\n");
446
447 printf("%.*sActive states: ", rlevel*2-2, SP);
448 for (i = 0; i < active_count; i++)
449 printf("%d/%d ", active_states[i].offset, active_states[i].count);
450 printf("\n");
451 #endif
452
453 /* Set the pointers for adding new states */
454
455 next_active_state = active_states + active_count;
456 next_new_state = new_states;
457
458 /* Load the current character from the subject outside the loop, as many
459 different states may want to look at it, and we assume that at least one
460 will. */
461
462 if (ptr < end_subject)
463 {
464 clen = 1;
465 #ifdef SUPPORT_UTF8
466 if (utf8) { GETCHARLEN(c, ptr, clen); } else
467 #endif /* SUPPORT_UTF8 */
468 c = *ptr;
469 }
470 else
471 {
472 clen = 0; /* At end subject */
473 c = -1;
474 }
475
476 /* Scan up the active states and act on each one. The result of an action
477 may be to add more states to the currently active list (e.g. on hitting a
478 parenthesis) or it may be to put states on the new list, for considering
479 when we move the character pointer on. */
480
481 for (i = 0; i < active_count; i++)
482 {
483 stateblock *current_state = active_states + i;
484 const uschar *code;
485 int state_offset = current_state->offset;
486 int count, codevalue;
487 int chartype, script;
488
489 #ifdef DEBUG
490 printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
491 if (c < 0) printf("-1\n");
492 else if (c > 32 && c < 127) printf("'%c'\n", c);
493 else printf("0x%02x\n", c);
494 #endif
495
496 /* This variable is referred to implicity in the ADD_xxx macros. */
497
498 ims = current_state->ims;
499
500 /* A negative offset is a special case meaning "hold off going to this
501 (negated) state until the number of characters in the data field have
502 been skipped". */
503
504 if (state_offset < 0)
505 {
506 if (current_state->data > 0)
507 {
508 DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
509 ADD_NEW_DATA(state_offset, current_state->count,
510 current_state->data - 1);
511 continue;
512 }
513 else
514 {
515 current_state->offset = state_offset = -state_offset;
516 }
517 }
518
519 /* Check for a duplicate state with the same count, and skip if found. */
520
521 for (j = 0; j < i; j++)
522 {
523 if (active_states[j].offset == state_offset &&
524 active_states[j].count == current_state->count)
525 {
526 DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
527 goto NEXT_ACTIVE_STATE;
528 }
529 }
530
531 /* The state offset is the offset to the opcode */
532
533 code = start_code + state_offset;
534 codevalue = *code;
535 if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */
536
537 /* If this opcode is followed by an inline character, load it. It is
538 tempting to test for the presence of a subject character here, but that
539 is wrong, because sometimes zero repetitions of the subject are
540 permitted.
541
542 We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
543 argument that is not a data character - but is always one byte long.
544 Unfortunately, we have to take special action to deal with \P, \p, and
545 \X in this case. To keep the other cases fast, convert these ones to new
546 opcodes. */
547
548 if (coptable[codevalue] > 0)
549 {
550 dlen = 1;
551 #ifdef SUPPORT_UTF8
552 if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
553 #endif /* SUPPORT_UTF8 */
554 d = code[coptable[codevalue]];
555 if (codevalue >= OP_TYPESTAR)
556 {
557 if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;
558 if (d >= OP_NOTPROP)
559 codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;
560 }
561 }
562 else
563 {
564 dlen = 0; /* Not strictly necessary, but compilers moan */
565 d = -1; /* if these variables are not set. */
566 }
567
568
569 /* Now process the individual opcodes */
570
571 switch (codevalue)
572 {
573
574 /* ========================================================================== */
575 /* Reached a closing bracket. If not at the end of the pattern, carry
576 on with the next opcode. Otherwise, unless we have an empty string and
577 PCRE_NOTEMPTY is set, save the match data, shifting up all previous
578 matches so we always have the longest first. */
579
580 case OP_KET:
581 case OP_KETRMIN:
582 case OP_KETRMAX:
583 if (code != end_code)
584 {
585 ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
586 if (codevalue != OP_KET)
587 {
588 ADD_ACTIVE(state_offset - GET(code, 1), 0);
589 }
590 }
591 else if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)
592 {
593 if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
594 else if (match_count > 0 && ++match_count * 2 >= offsetcount)
595 match_count = 0;
596 count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
597 if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
598 if (offsetcount >= 2)
599 {
600 offsets[0] = current_subject - start_subject;
601 offsets[1] = ptr - start_subject;
602 DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
603 offsets[1] - offsets[0], current_subject));
604 }
605 if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
606 {
607 DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
608 "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
609 match_count, rlevel*2-2, SP));
610 return match_count;
611 }
612 }
613 break;
614
615 /* ========================================================================== */
616 /* These opcodes add to the current list of states without looking
617 at the current character. */
618
619 /*-----------------------------------------------------------------*/
620 case OP_ALT:
621 do { code += GET(code, 1); } while (*code == OP_ALT);
622 ADD_ACTIVE(code - start_code, 0);
623 break;
624
625 /*-----------------------------------------------------------------*/
626 case OP_BRA:
627 do
628 {
629 ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
630 code += GET(code, 1);
631 }
632 while (*code == OP_ALT);
633 break;
634
635 /*-----------------------------------------------------------------*/
636 case OP_BRAZERO:
637 case OP_BRAMINZERO:
638 ADD_ACTIVE(state_offset + 1, 0);
639 code += 1 + GET(code, 2);
640 while (*code == OP_ALT) code += GET(code, 1);
641 ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
642 break;
643
644 /*-----------------------------------------------------------------*/
645 case OP_BRANUMBER:
646 ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
647 break;
648
649 /*-----------------------------------------------------------------*/
650 case OP_CIRC:
651 if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
652 ((ims & PCRE_MULTILINE) != 0 &&
653 ptr >= start_subject + md->nllen &&
654 ptr != end_subject &&
655 IS_NEWLINE(ptr - md->nllen)))
656 { ADD_ACTIVE(state_offset + 1, 0); }
657 break;
658
659 /*-----------------------------------------------------------------*/
660 case OP_EOD:
661 if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }
662 break;
663
664 /*-----------------------------------------------------------------*/
665 case OP_OPT:
666 ims = code[1];
667 ADD_ACTIVE(state_offset + 2, 0);
668 break;
669
670 /*-----------------------------------------------------------------*/
671 case OP_SOD:
672 if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
673 break;
674
675 /*-----------------------------------------------------------------*/
676 case OP_SOM:
677 if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
678 break;
679
680
681 /* ========================================================================== */
682 /* These opcodes inspect the next subject character, and sometimes
683 the previous one as well, but do not have an argument. The variable
684 clen contains the length of the current character and is zero if we are
685 at the end of the subject. */
686
687 /*-----------------------------------------------------------------*/
688 case OP_ANY:
689 if (clen > 0 && ((ims & PCRE_DOTALL) != 0 ||
690 ptr > end_subject - md->nllen ||
691 !IS_NEWLINE(ptr)))
692 { ADD_NEW(state_offset + 1, 0); }
693 break;
694
695 /*-----------------------------------------------------------------*/
696 case OP_EODN:
697 if (clen == 0 ||
698 (ptr == end_subject - md->nllen && IS_NEWLINE(ptr)))
699 { ADD_ACTIVE(state_offset + 1, 0); }
700 break;
701
702 /*-----------------------------------------------------------------*/
703 case OP_DOLL:
704 if ((md->moptions & PCRE_NOTEOL) == 0)
705 {
706 if (clen == 0 ||
707 (ptr <= end_subject - md->nllen && IS_NEWLINE(ptr) &&
708 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
709 ))
710 { ADD_ACTIVE(state_offset + 1, 0); }
711 }
712 else if ((ims & PCRE_MULTILINE) != 0 &&
713 ptr <= end_subject - md->nllen && IS_NEWLINE(ptr))
714 { ADD_ACTIVE(state_offset + 1, 0); }
715 break;
716
717 /*-----------------------------------------------------------------*/
718
719 case OP_DIGIT:
720 case OP_WHITESPACE:
721 case OP_WORDCHAR:
722 if (clen > 0 && c < 256 &&
723 ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
724 { ADD_NEW(state_offset + 1, 0); }
725 break;
726
727 /*-----------------------------------------------------------------*/
728 case OP_NOT_DIGIT:
729 case OP_NOT_WHITESPACE:
730 case OP_NOT_WORDCHAR:
731 if (clen > 0 && (c >= 256 ||
732 ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
733 { ADD_NEW(state_offset + 1, 0); }
734 break;
735
736 /*-----------------------------------------------------------------*/
737 case OP_WORD_BOUNDARY:
738 case OP_NOT_WORD_BOUNDARY:
739 {
740 int left_word, right_word;
741
742 if (ptr > start_subject)
743 {
744 const uschar *temp = ptr - 1;
745 #ifdef SUPPORT_UTF8
746 if (utf8) BACKCHAR(temp);
747 #endif
748 GETCHARTEST(d, temp);
749 left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
750 }
751 else left_word = 0;
752
753 if (clen > 0) right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
754 else right_word = 0;
755
756 if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
757 { ADD_ACTIVE(state_offset + 1, 0); }
758 }
759 break;
760
761
762 #ifdef SUPPORT_UCP
763
764 /*-----------------------------------------------------------------*/
765 /* Check the next character by Unicode property. We will get here only
766 if the support is in the binary; otherwise a compile-time error occurs.
767 */
768
769 case OP_PROP:
770 case OP_NOTPROP:
771 if (clen > 0)
772 {
773 BOOL OK;
774 int category = _pcre_ucp_findprop(c, &chartype, &script);
775 switch(code[1])
776 {
777 case PT_ANY:
778 OK = TRUE;
779 break;
780
781 case PT_LAMP:
782 OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
783 break;
784
785 case PT_GC:
786 OK = category == code[2];
787 break;
788
789 case PT_PC:
790 OK = chartype == code[2];
791 break;
792
793 case PT_SC:
794 OK = script == code[2];
795 break;
796
797 /* Should never occur, but keep compilers from grumbling. */
798
799 default:
800 OK = codevalue != OP_PROP;
801 break;
802 }
803
804 if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
805 }
806 break;
807 #endif
808
809
810
811 /* ========================================================================== */
812 /* These opcodes likewise inspect the subject character, but have an
813 argument that is not a data character. It is one of these opcodes:
814 OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,
815 OP_NOT_WORDCHAR. The value is loaded into d. */
816
817 case OP_TYPEPLUS:
818 case OP_TYPEMINPLUS:
819 count = current_state->count; /* Already matched */
820 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
821 if (clen > 0)
822 {
823 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
824 (c < 256 &&
825 (d != OP_ANY ||
826 (ims & PCRE_DOTALL) != 0 ||
827 ptr > end_subject - md->nllen ||
828 !IS_NEWLINE(ptr)
829 ) &&
830 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
831 {
832 count++;
833 ADD_NEW(state_offset, count);
834 }
835 }
836 break;
837
838 /*-----------------------------------------------------------------*/
839 case OP_TYPEQUERY:
840 case OP_TYPEMINQUERY:
841 ADD_ACTIVE(state_offset + 2, 0);
842 if (clen > 0)
843 {
844 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
845 (c < 256 &&
846 (d != OP_ANY ||
847 (ims & PCRE_DOTALL) != 0 ||
848 ptr > end_subject - md->nllen ||
849 !IS_NEWLINE(ptr)
850 ) &&
851 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
852 {
853 ADD_NEW(state_offset + 2, 0);
854 }
855 }
856 break;
857
858 /*-----------------------------------------------------------------*/
859 case OP_TYPESTAR:
860 case OP_TYPEMINSTAR:
861 ADD_ACTIVE(state_offset + 2, 0);
862 if (clen > 0)
863 {
864 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
865 (c < 256 &&
866 (d != OP_ANY ||
867 (ims & PCRE_DOTALL) != 0 ||
868 ptr > end_subject - md->nllen ||
869 !IS_NEWLINE(ptr)
870 ) &&
871 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
872 {
873 ADD_NEW(state_offset, 0);
874 }
875 }
876 break;
877
878 /*-----------------------------------------------------------------*/
879 case OP_TYPEEXACT:
880 case OP_TYPEUPTO:
881 case OP_TYPEMINUPTO:
882 if (codevalue != OP_TYPEEXACT)
883 { ADD_ACTIVE(state_offset + 4, 0); }
884 count = current_state->count; /* Number already matched */
885 if (clen > 0)
886 {
887 if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
888 (c < 256 &&
889 (d != OP_ANY ||
890 (ims & PCRE_DOTALL) != 0 ||
891 ptr > end_subject - md->nllen ||
892 !IS_NEWLINE(ptr)
893 ) &&
894 ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
895 {
896 if (++count >= GET2(code, 1))
897 { ADD_NEW(state_offset + 4, 0); }
898 else
899 { ADD_NEW(state_offset, count); }
900 }
901 }
902 break;
903
904 /* ========================================================================== */
905 /* These are virtual opcodes that are used when something like
906 OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It
907 keeps the code above fast for the other cases. The argument is in the
908 d variable. */
909
910 case OP_PROP_EXTRA + OP_TYPEPLUS:
911 case OP_PROP_EXTRA + OP_TYPEMINPLUS:
912 count = current_state->count; /* Already matched */
913 if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
914 if (clen > 0)
915 {
916 BOOL OK;
917 int category = _pcre_ucp_findprop(c, &chartype, &script);
918 switch(code[2])
919 {
920 case PT_ANY:
921 OK = TRUE;
922 break;
923
924 case PT_LAMP:
925 OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
926 break;
927
928 case PT_GC:
929 OK = category == code[3];
930 break;
931
932 case PT_PC:
933 OK = chartype == code[3];
934 break;
935
936 case PT_SC:
937 OK = script == code[3];
938 break;
939
940 /* Should never occur, but keep compilers from grumbling. */
941
942 default:
943 OK = codevalue != OP_PROP;
944 break;
945 }
946
947 if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }
948 }
949 break;
950
951 /*-----------------------------------------------------------------*/
952 case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
953 case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
954 count = current_state->count; /* Already matched */
955 if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
956 if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
957 {
958 const uschar *nptr = ptr + clen;
959 int ncount = 0;
960 while (nptr < end_subject)
961 {
962 int nd;
963 int ndlen = 1;
964 GETCHARLEN(nd, nptr, ndlen);
965 if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
966 ncount++;
967 nptr += ndlen;
968 }
969 count++;
970 ADD_NEW_DATA(-state_offset, count, ncount);
971 }
972 break;
973
974 /*-----------------------------------------------------------------*/
975 case OP_PROP_EXTRA + OP_TYPEQUERY:
976 case OP_PROP_EXTRA + OP_TYPEMINQUERY:
977 count = 4;
978 goto QS1;
979
980 case OP_PROP_EXTRA + OP_TYPESTAR:
981 case OP_PROP_EXTRA + OP_TYPEMINSTAR:
982 count = 0;
983
984 QS1:
985
986 ADD_ACTIVE(state_offset + 4, 0);
987 if (clen > 0)
988 {
989 BOOL OK;
990 int category = _pcre_ucp_findprop(c, &chartype, &script);
991 switch(code[2])
992 {
993 case PT_ANY:
994 OK = TRUE;
995 break;
996
997 case PT_LAMP:
998 OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
999 break;
1000
1001 case PT_GC:
1002 OK = category == code[3];
1003 break;
1004
1005 case PT_PC:
1006 OK = chartype == code[3];
1007 break;
1008
1009 case PT_SC:
1010 OK = script == code[3];
1011 break;
1012
1013 /* Should never occur, but keep compilers from grumbling. */
1014
1015 default:
1016 OK = codevalue != OP_PROP;
1017 break;
1018 }
1019
1020 if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }
1021 }
1022 break;
1023
1024 /*-----------------------------------------------------------------*/
1025 case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1026 case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1027 count = 2;
1028 goto QS2;
1029
1030 case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1031 case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1032 count = 0;
1033
1034 QS2:
1035
1036 ADD_ACTIVE(state_offset + 2, 0);
1037 if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1038 {
1039 const uschar *nptr = ptr + clen;
1040 int ncount = 0;
1041 while (nptr < end_subject)
1042 {
1043 int nd;
1044 int ndlen = 1;
1045 GETCHARLEN(nd, nptr, ndlen);
1046 if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1047 ncount++;
1048 nptr += ndlen;
1049 }
1050 ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1051 }
1052 break;
1053
1054 /*-----------------------------------------------------------------*/
1055 case OP_PROP_EXTRA + OP_TYPEEXACT:
1056 case OP_PROP_EXTRA + OP_TYPEUPTO:
1057 case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1058 if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1059 { ADD_ACTIVE(state_offset + 6, 0); }
1060 count = current_state->count; /* Number already matched */
1061 if (clen > 0)
1062 {
1063 BOOL OK;
1064 int category = _pcre_ucp_findprop(c, &chartype, &script);
1065 switch(code[4])
1066 {
1067 case PT_ANY:
1068 OK = TRUE;
1069 break;
1070
1071 case PT_LAMP:
1072 OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1073 break;
1074
1075 case PT_GC:
1076 OK = category == code[5];
1077 break;
1078
1079 case PT_PC:
1080 OK = chartype == code[5];
1081 break;
1082
1083 case PT_SC:
1084 OK = script == code[5];
1085 break;
1086
1087 /* Should never occur, but keep compilers from grumbling. */
1088
1089 default:
1090 OK = codevalue != OP_PROP;
1091 break;
1092 }
1093
1094 if (OK == (d == OP_PROP))
1095 {
1096 if (++count >= GET2(code, 1))
1097 { ADD_NEW(state_offset + 6, 0); }
1098 else
1099 { ADD_NEW(state_offset, count); }
1100 }
1101 }
1102 break;
1103
1104 /*-----------------------------------------------------------------*/
1105 case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1106 case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1107 case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1108 if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1109 { ADD_ACTIVE(state_offset + 4, 0); }
1110 count = current_state->count; /* Number already matched */
1111 if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1112 {
1113 const uschar *nptr = ptr + clen;
1114 int ncount = 0;
1115 while (nptr < end_subject)
1116 {
1117 int nd;
1118 int ndlen = 1;
1119 GETCHARLEN(nd, nptr, ndlen);
1120 if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1121 ncount++;
1122 nptr += ndlen;
1123 }
1124 if (++count >= GET2(code, 1))
1125 { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1126 else
1127 { ADD_NEW_DATA(-state_offset, count, ncount); }
1128 }
1129 break;
1130
1131 /* ========================================================================== */
1132 /* These opcodes are followed by a character that is usually compared
1133 to the current subject character; it is loaded into d. We still get
1134 here even if there is no subject character, because in some cases zero
1135 repetitions are permitted. */
1136
1137 /*-----------------------------------------------------------------*/
1138 case OP_CHAR:
1139 if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
1140 break;
1141
1142 /*-----------------------------------------------------------------*/
1143 case OP_CHARNC:
1144 if (clen == 0) break;
1145
1146 #ifdef SUPPORT_UTF8
1147 if (utf8)
1148 {
1149 if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1150 {
1151 int othercase;
1152 if (c < 128) othercase = fcc[c]; else
1153
1154 /* If we have Unicode property support, we can use it to test the
1155 other case of the character. */
1156
1157 #ifdef SUPPORT_UCP
1158 othercase = _pcre_ucp_othercase(c);
1159 #else
1160 othercase = -1;
1161 #endif
1162
1163 if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
1164 }
1165 }
1166 else
1167 #endif /* SUPPORT_UTF8 */
1168
1169 /* Non-UTF-8 mode */
1170 {
1171 if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }
1172 }
1173 break;
1174
1175
1176 #ifdef SUPPORT_UCP
1177 /*-----------------------------------------------------------------*/
1178 /* This is a tricky one because it can match more than one character.
1179 Find out how many characters to skip, and then set up a negative state
1180 to wait for them to pass before continuing. */
1181
1182 case OP_EXTUNI:
1183 if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1184 {
1185 const uschar *nptr = ptr + clen;
1186 int ncount = 0;
1187 while (nptr < end_subject)
1188 {
1189 int nclen = 1;
1190 GETCHARLEN(c, nptr, nclen);
1191 if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
1192 ncount++;
1193 nptr += nclen;
1194 }
1195 ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
1196 }
1197 break;
1198 #endif
1199
1200 /*-----------------------------------------------------------------*/
1201 /* Match a negated single character. This is only used for one-byte
1202 characters, that is, we know that d < 256. The character we are
1203 checking (c) can be multibyte. */
1204
1205 case OP_NOT:
1206 if (clen > 0)
1207 {
1208 int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1209 if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1210 }
1211 break;
1212
1213 /*-----------------------------------------------------------------*/
1214 case OP_PLUS:
1215 case OP_MINPLUS:
1216 case OP_NOTPLUS:
1217 case OP_NOTMINPLUS:
1218 count = current_state->count; /* Already matched */
1219 if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1220 if (clen > 0)
1221 {
1222 int otherd = -1;
1223 if ((ims & PCRE_CASELESS) != 0)
1224 {
1225 #ifdef SUPPORT_UTF8
1226 if (utf8 && d >= 128)
1227 {
1228 #ifdef SUPPORT_UCP
1229 otherd = _pcre_ucp_othercase(d);
1230 #endif /* SUPPORT_UCP */
1231 }
1232 else
1233 #endif /* SUPPORT_UTF8 */
1234 otherd = fcc[d];
1235 }
1236 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1237 { count++; ADD_NEW(state_offset, count); }
1238 }
1239 break;
1240
1241 /*-----------------------------------------------------------------*/
1242 case OP_QUERY:
1243 case OP_MINQUERY:
1244 case OP_NOTQUERY:
1245 case OP_NOTMINQUERY:
1246 ADD_ACTIVE(state_offset + dlen + 1, 0);
1247 if (clen > 0)
1248 {
1249 int otherd = -1;
1250 if ((ims & PCRE_CASELESS) != 0)
1251 {
1252 #ifdef SUPPORT_UTF8
1253 if (utf8 && d >= 128)
1254 {
1255 #ifdef SUPPORT_UCP
1256 otherd = _pcre_ucp_othercase(d);
1257 #endif /* SUPPORT_UCP */
1258 }
1259 else
1260 #endif /* SUPPORT_UTF8 */
1261 otherd = fcc[d];
1262 }
1263 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1264 { ADD_NEW(state_offset + dlen + 1, 0); }
1265 }
1266 break;
1267
1268 /*-----------------------------------------------------------------*/
1269 case OP_STAR:
1270 case OP_MINSTAR:
1271 case OP_NOTSTAR:
1272 case OP_NOTMINSTAR:
1273 ADD_ACTIVE(state_offset + dlen + 1, 0);
1274 if (clen > 0)
1275 {
1276 int otherd = -1;
1277 if ((ims & PCRE_CASELESS) != 0)
1278 {
1279 #ifdef SUPPORT_UTF8
1280 if (utf8 && d >= 128)
1281 {
1282 #ifdef SUPPORT_UCP
1283 otherd = _pcre_ucp_othercase(d);
1284 #endif /* SUPPORT_UCP */
1285 }
1286 else
1287 #endif /* SUPPORT_UTF8 */
1288 otherd = fcc[d];
1289 }
1290 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1291 { ADD_NEW(state_offset, 0); }
1292 }
1293 break;
1294
1295 /*-----------------------------------------------------------------*/
1296 case OP_EXACT:
1297 case OP_UPTO:
1298 case OP_MINUPTO:
1299 case OP_NOTEXACT:
1300 case OP_NOTUPTO:
1301 case OP_NOTMINUPTO:
1302 if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)
1303 { ADD_ACTIVE(state_offset + dlen + 3, 0); }
1304 count = current_state->count; /* Number already matched */
1305 if (clen > 0)
1306 {
1307 int otherd = -1;
1308 if ((ims & PCRE_CASELESS) != 0)
1309 {
1310 #ifdef SUPPORT_UTF8
1311 if (utf8 && d >= 128)
1312 {
1313 #ifdef SUPPORT_UCP
1314 otherd = _pcre_ucp_othercase(d);
1315 #endif /* SUPPORT_UCP */
1316 }
1317 else
1318 #endif /* SUPPORT_UTF8 */
1319 otherd = fcc[d];
1320 }
1321 if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1322 {
1323 if (++count >= GET2(code, 1))
1324 { ADD_NEW(state_offset + dlen + 3, 0); }
1325 else
1326 { ADD_NEW(state_offset, count); }
1327 }
1328 }
1329 break;
1330
1331
1332 /* ========================================================================== */
1333 /* These are the class-handling opcodes */
1334
1335 case OP_CLASS:
1336 case OP_NCLASS:
1337 case OP_XCLASS:
1338 {
1339 BOOL isinclass = FALSE;
1340 int next_state_offset;
1341 const uschar *ecode;
1342
1343 /* For a simple class, there is always just a 32-byte table, and we
1344 can set isinclass from it. */
1345
1346 if (codevalue != OP_XCLASS)
1347 {
1348 ecode = code + 33;
1349 if (clen > 0)
1350 {
1351 isinclass = (c > 255)? (codevalue == OP_NCLASS) :
1352 ((code[1 + c/8] & (1 << (c&7))) != 0);
1353 }
1354 }
1355
1356 /* An extended class may have a table or a list of single characters,
1357 ranges, or both, and it may be positive or negative. There's a
1358 function that sorts all this out. */
1359
1360 else
1361 {
1362 ecode = code + GET(code, 1);
1363 if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);
1364 }
1365
1366 /* At this point, isinclass is set for all kinds of class, and ecode
1367 points to the byte after the end of the class. If there is a
1368 quantifier, this is where it will be. */
1369
1370 next_state_offset = ecode - start_code;
1371
1372 switch (*ecode)
1373 {
1374 case OP_CRSTAR:
1375 case OP_CRMINSTAR:
1376 ADD_ACTIVE(next_state_offset + 1, 0);
1377 if (isinclass) { ADD_NEW(state_offset, 0); }
1378 break;
1379
1380 case OP_CRPLUS:
1381 case OP_CRMINPLUS:
1382 count = current_state->count; /* Already matched */
1383 if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
1384 if (isinclass) { count++; ADD_NEW(state_offset, count); }
1385 break;
1386
1387 case OP_CRQUERY:
1388 case OP_CRMINQUERY:
1389 ADD_ACTIVE(next_state_offset + 1, 0);
1390 if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
1391 break;
1392
1393 case OP_CRRANGE:
1394 case OP_CRMINRANGE:
1395 count = current_state->count; /* Already matched */
1396 if (count >= GET2(ecode, 1))
1397 { ADD_ACTIVE(next_state_offset + 5, 0); }
1398 if (isinclass)
1399 {
1400 int max = GET2(ecode, 3);
1401 if (++count >= max && max != 0) /* Max 0 => no limit */
1402 { ADD_NEW(next_state_offset + 5, 0); }
1403 else
1404 { ADD_NEW(state_offset, count); }
1405 }
1406 break;
1407
1408 default:
1409 if (isinclass) { ADD_NEW(next_state_offset, 0); }
1410 break;
1411 }
1412 }
1413 break;
1414
1415 /* ========================================================================== */
1416 /* These are the opcodes for fancy brackets of various kinds. We have
1417 to use recursion in order to handle them. */
1418
1419 case OP_ASSERT:
1420 case OP_ASSERT_NOT:
1421 case OP_ASSERTBACK:
1422 case OP_ASSERTBACK_NOT:
1423 {
1424 int rc;
1425 int local_offsets[2];
1426 int local_workspace[1000];
1427 const uschar *endasscode = code + GET(code, 1);
1428
1429 while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
1430
1431 rc = internal_dfa_exec(
1432 md, /* static match data */
1433 code, /* this subexpression's code */
1434 ptr, /* where we currently are */
1435 ptr - start_subject, /* start offset */
1436 local_offsets, /* offset vector */
1437 sizeof(local_offsets)/sizeof(int), /* size of same */
1438 local_workspace, /* workspace vector */
1439 sizeof(local_workspace)/sizeof(int), /* size of same */
1440 ims, /* the current ims flags */
1441 rlevel, /* function recursion level */
1442 recursing); /* pass on regex recursion */
1443
1444 if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
1445 { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
1446 }
1447 break;
1448
1449 /*-----------------------------------------------------------------*/
1450 case OP_COND:
1451 {
1452 int local_offsets[1000];
1453 int local_workspace[1000];
1454 int condcode = code[LINK_SIZE+1];
1455
1456 /* The only supported version of OP_CREF is for the value 0xffff, which
1457 means "test if in a recursion". */
1458
1459 if (condcode == OP_CREF)
1460 {
1461 int value = GET2(code, LINK_SIZE+2);
1462 if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;
1463 if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
1464 else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
1465 }
1466
1467 /* Otherwise, the condition is an assertion */
1468
1469 else
1470 {
1471 int rc;
1472 const uschar *asscode = code + LINK_SIZE + 1;
1473 const uschar *endasscode = asscode + GET(asscode, 1);
1474
1475 while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
1476
1477 rc = internal_dfa_exec(
1478 md, /* fixed match data */
1479 asscode, /* this subexpression's code */
1480 ptr, /* where we currently are */
1481 ptr - start_subject, /* start offset */
1482 local_offsets, /* offset vector */
1483 sizeof(local_offsets)/sizeof(int), /* size of same */
1484 local_workspace, /* workspace vector */
1485 sizeof(local_workspace)/sizeof(int), /* size of same */
1486 ims, /* the current ims flags */
1487 rlevel, /* function recursion level */
1488 recursing); /* pass on regex recursion */
1489
1490 if ((rc >= 0) ==
1491 (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
1492 { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
1493 else
1494 { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
1495 }
1496 }
1497 break;
1498
1499 /*-----------------------------------------------------------------*/
1500 case OP_RECURSE:
1501 {
1502 int local_offsets[1000];
1503 int local_workspace[1000];
1504 int rc;
1505
1506 DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,
1507 recursing + 1));
1508
1509 rc = internal_dfa_exec(
1510 md, /* fixed match data */
1511 start_code + GET(code, 1), /* this subexpression's code */
1512 ptr, /* where we currently are */
1513 ptr - start_subject, /* start offset */
1514 local_offsets, /* offset vector */
1515 sizeof(local_offsets)/sizeof(int), /* size of same */
1516 local_workspace, /* workspace vector */
1517 sizeof(local_workspace)/sizeof(int), /* size of same */
1518 ims, /* the current ims flags */
1519 rlevel, /* function recursion level */
1520 recursing + 1); /* regex recurse level */
1521
1522 DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,
1523 recursing + 1, rc));
1524
1525 /* Ran out of internal offsets */
1526
1527 if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
1528
1529 /* For each successful matched substring, set up the next state with a
1530 count of characters to skip before trying it. Note that the count is in
1531 characters, not bytes. */
1532
1533 if (rc > 0)
1534 {
1535 for (rc = rc*2 - 2; rc >= 0; rc -= 2)
1536 {
1537 const uschar *p = start_subject + local_offsets[rc];
1538 const uschar *pp = start_subject + local_offsets[rc+1];
1539 int charcount = local_offsets[rc+1] - local_offsets[rc];
1540 while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
1541 if (charcount > 0)
1542 {
1543 ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
1544 }
1545 else
1546 {
1547 ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
1548 }
1549 }
1550 }
1551 else if (rc != PCRE_ERROR_NOMATCH) return rc;
1552 }
1553 break;
1554
1555 /*-----------------------------------------------------------------*/
1556 case OP_ONCE:
1557 {
1558 int local_offsets[2];
1559 int local_workspace[1000];
1560
1561 int rc = internal_dfa_exec(
1562 md, /* fixed match data */
1563 code, /* this subexpression's code */
1564 ptr, /* where we currently are */
1565 ptr - start_subject, /* start offset */
1566 local_offsets, /* offset vector */
1567 sizeof(local_offsets)/sizeof(int), /* size of same */
1568 local_workspace, /* workspace vector */
1569 sizeof(local_workspace)/sizeof(int), /* size of same */
1570 ims, /* the current ims flags */
1571 rlevel, /* function recursion level */
1572 recursing); /* pass on regex recursion */
1573
1574 if (rc >= 0)
1575 {
1576 const uschar *end_subpattern = code;
1577 int charcount = local_offsets[1] - local_offsets[0];
1578 int next_state_offset, repeat_state_offset;
1579
1580 do { end_subpattern += GET(end_subpattern, 1); }
1581 while (*end_subpattern == OP_ALT);
1582 next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;
1583
1584 /* If the end of this subpattern is KETRMAX or KETRMIN, we must
1585 arrange for the repeat state also to be added to the relevant list.
1586 Calculate the offset, or set -1 for no repeat. */
1587
1588 repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
1589 *end_subpattern == OP_KETRMIN)?
1590 end_subpattern - start_code - GET(end_subpattern, 1) : -1;
1591
1592 /* If we have matched an empty string, add the next state at the
1593 current character pointer. This is important so that the duplicate
1594 checking kicks in, which is what breaks infinite loops that match an
1595 empty string. */
1596
1597 if (charcount == 0)
1598 {
1599 ADD_ACTIVE(next_state_offset, 0);
1600 }
1601
1602 /* Optimization: if there are no more active states, and there
1603 are no new states yet set up, then skip over the subject string
1604 right here, to save looping. Otherwise, set up the new state to swing
1605 into action when the end of the substring is reached. */
1606
1607 else if (i + 1 >= active_count && new_count == 0)
1608 {
1609 ptr += charcount;
1610 clen = 0;
1611 ADD_NEW(next_state_offset, 0);
1612
1613 /* If we are adding a repeat state at the new character position,
1614 we must fudge things so that it is the only current state.
1615 Otherwise, it might be a duplicate of one we processed before, and
1616 that would cause it to be skipped. */
1617
1618 if (repeat_state_offset >= 0)
1619 {
1620 next_active_state = active_states;
1621 active_count = 0;
1622 i = -1;
1623 ADD_ACTIVE(repeat_state_offset, 0);
1624 }
1625 }
1626 else
1627 {
1628 const uschar *p = start_subject + local_offsets[0];
1629 const uschar *pp = start_subject + local_offsets[1];
1630 while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
1631 ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
1632 if (repeat_state_offset >= 0)
1633 { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
1634 }
1635
1636 }
1637 else if (rc != PCRE_ERROR_NOMATCH) return rc;
1638 }
1639 break;
1640
1641
1642 /* ========================================================================== */
1643 /* Handle callouts */
1644
1645 case OP_CALLOUT:
1646 if (pcre_callout != NULL)
1647 {
1648 int rrc;
1649 pcre_callout_block cb;
1650 cb.version = 1; /* Version 1 of the callout block */
1651 cb.callout_number = code[1];
1652 cb.offset_vector = offsets;
1653 cb.subject = (PCRE_SPTR)start_subject;
1654 cb.subject_length = end_subject - start_subject;
1655 cb.start_match = current_subject - start_subject;
1656 cb.current_position = ptr - start_subject;
1657 cb.pattern_position = GET(code, 2);
1658 cb.next_item_length = GET(code, 2 + LINK_SIZE);
1659 cb.capture_top = 1;
1660 cb.capture_last = -1;
1661 cb.callout_data = md->callout_data;
1662 if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */
1663 if (rrc == 0) { ADD_ACTIVE(state_offset + 2 + 2*LINK_SIZE, 0); }
1664 }
1665 break;
1666
1667
1668 /* ========================================================================== */
1669 default: /* Unsupported opcode */
1670 return PCRE_ERROR_DFA_UITEM;
1671 }
1672
1673 NEXT_ACTIVE_STATE: continue;
1674
1675 } /* End of loop scanning active states */
1676
1677 /* We have finished the processing at the current subject character. If no
1678 new states have been set for the next character, we have found all the
1679 matches that we are going to find. If we are at the top level and partial
1680 matching has been requested, check for appropriate conditions. */
1681
1682 if (new_count <= 0)
1683 {
1684 if (match_count < 0 && /* No matches found */
1685 rlevel == 1 && /* Top level match function */
1686 (md->moptions & PCRE_PARTIAL) != 0 && /* Want partial matching */
1687 ptr >= end_subject && /* Reached end of subject */
1688 ptr > current_subject) /* Matched non-empty string */
1689 {
1690 if (offsetcount >= 2)
1691 {
1692 offsets[0] = current_subject - start_subject;
1693 offsets[1] = end_subject - start_subject;
1694 }
1695 match_count = PCRE_ERROR_PARTIAL;
1696 }
1697
1698 DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
1699 "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
1700 rlevel*2-2, SP));
1701 break; /* In effect, "return", but see the comment below */
1702 }
1703
1704 /* One or more states are active for the next character. */
1705
1706 ptr += clen; /* Advance to next subject character */
1707 } /* Loop to move along the subject string */
1708
1709 /* Control gets here from "break" a few lines above. We do it this way because
1710 if we use "return" above, we have compiler trouble. Some compilers warn if
1711 there's nothing here because they think the function doesn't return a value. On
1712 the other hand, if we put a dummy statement here, some more clever compilers
1713 complain that it can't be reached. Sigh. */
1714
1715 return match_count;
1716 }
1717
1718
1719
1720
1721 /*************************************************
1722 * Execute a Regular Expression - DFA engine *
1723 *************************************************/
1724
1725 /* This external function applies a compiled re to a subject string using a DFA
1726 engine. This function calls the internal function multiple times if the pattern
1727 is not anchored.
1728
1729 Arguments:
1730 argument_re points to the compiled expression
1731 extra_data points to extra data or is NULL (not currently used)
1732 subject points to the subject string
1733 length length of subject string (may contain binary zeros)
1734 start_offset where to start in the subject string
1735 options option bits
1736 offsets vector of match offsets
1737 offsetcount size of same
1738 workspace workspace vector
1739 wscount size of same
1740
1741 Returns: > 0 => number of match offset pairs placed in offsets
1742 = 0 => offsets overflowed; longest matches are present
1743 -1 => failed to match
1744 < -1 => some kind of unexpected problem
1745 */
1746
1747 PCRE_DATA_SCOPE int
1748 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
1749 const char *subject, int length, int start_offset, int options, int *offsets,
1750 int offsetcount, int *workspace, int wscount)
1751 {
1752 real_pcre *re = (real_pcre *)argument_re;
1753 dfa_match_data match_block;
1754 dfa_match_data *md = &match_block;
1755 BOOL utf8, anchored, startline, firstline;
1756 const uschar *current_subject, *end_subject, *lcc;
1757
1758 pcre_study_data internal_study;
1759 const pcre_study_data *study = NULL;
1760 real_pcre internal_re;
1761
1762 const uschar *req_byte_ptr;
1763 const uschar *start_bits = NULL;
1764 BOOL first_byte_caseless = FALSE;
1765 BOOL req_byte_caseless = FALSE;
1766 int first_byte = -1;
1767 int req_byte = -1;
1768 int req_byte2 = -1;
1769 int newline;
1770
1771 /* Plausibility checks */
1772
1773 if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
1774 if (re == NULL || subject == NULL || workspace == NULL ||
1775 (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
1776 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
1777 if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
1778
1779 /* We need to find the pointer to any study data before we test for byte
1780 flipping, so we scan the extra_data block first. This may set two fields in the
1781 match block, so we must initialize them beforehand. However, the other fields
1782 in the match block must not be set until after the byte flipping. */
1783
1784 md->tables = re->tables;
1785 md->callout_data = NULL;
1786
1787 if (extra_data != NULL)
1788 {
1789 unsigned int flags = extra_data->flags;
1790 if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
1791 study = (const pcre_study_data *)extra_data->study_data;
1792 if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
1793 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
1794 return PCRE_ERROR_DFA_UMLIMIT;
1795 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
1796 md->callout_data = extra_data->callout_data;
1797 if ((flags & PCRE_EXTRA_TABLES) != 0)
1798 md->tables = extra_data->tables;
1799 }
1800
1801 /* Check that the first field in the block is the magic number. If it is not,
1802 test for a regex that was compiled on a host of opposite endianness. If this is
1803 the case, flipped values are put in internal_re and internal_study if there was
1804 study data too. */
1805
1806 if (re->magic_number != MAGIC_NUMBER)
1807 {
1808 re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
1809 if (re == NULL) return PCRE_ERROR_BADMAGIC;
1810 if (study != NULL) study = &internal_study;
1811 }
1812
1813 /* Set some local values */
1814
1815 current_subject = (const unsigned char *)subject + start_offset;
1816 end_subject = (const unsigned char *)subject + length;
1817 req_byte_ptr = current_subject - 1;
1818
1819 #ifdef SUPPORT_UTF8
1820 utf8 = (re->options & PCRE_UTF8) != 0;
1821 #else
1822 utf8 = FALSE;
1823 #endif
1824
1825 anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
1826 (re->options & PCRE_ANCHORED) != 0;
1827
1828 /* The remaining fixed data for passing around. */
1829
1830 md->start_code = (const uschar *)argument_re +
1831 re->name_table_offset + re->name_count * re->name_entry_size;
1832 md->start_subject = (const unsigned char *)subject;
1833 md->end_subject = end_subject;
1834 md->moptions = options;
1835 md->poptions = re->options;
1836
1837 /* Handle different types of newline. The two bits give four cases. If nothing
1838 is set at run time, whatever was used at compile time applies. */
1839
1840 switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &
1841 PCRE_NEWLINE_CRLF)
1842 {
1843 default: newline = NEWLINE; break; /* Compile-time default */
1844 case PCRE_NEWLINE_CR: newline = '\r'; break;
1845 case PCRE_NEWLINE_LF: newline = '\n'; break;
1846 case PCRE_NEWLINE_CR+
1847 PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
1848 }
1849
1850 if (newline > 255)
1851 {
1852 md->nllen = 2;
1853 md->nl[0] = (newline >> 8) & 255;
1854 md->nl[1] = newline & 255;
1855 }
1856 else
1857 {
1858 md->nllen = 1;
1859 md->nl[0] = newline;
1860 }
1861
1862 /* Check a UTF-8 string if required. Unfortunately there's no way of passing
1863 back the character offset. */
1864
1865 #ifdef SUPPORT_UTF8
1866 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
1867 {
1868 if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
1869 return PCRE_ERROR_BADUTF8;
1870 if (start_offset > 0 && start_offset < length)
1871 {
1872 int tb = ((uschar *)subject)[start_offset];
1873 if (tb > 127)
1874 {
1875 tb &= 0xc0;
1876 if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
1877 }
1878 }
1879 }
1880 #endif
1881
1882 /* If the exec call supplied NULL for tables, use the inbuilt ones. This
1883 is a feature that makes it possible to save compiled regex and re-use them
1884 in other programs later. */
1885
1886 if (md->tables == NULL) md->tables = _pcre_default_tables;
1887
1888 /* The lower casing table and the "must be at the start of a line" flag are
1889 used in a loop when finding where to start. */
1890
1891 lcc = md->tables + lcc_offset;
1892 startline = (re->options & PCRE_STARTLINE) != 0;
1893 firstline = (re->options & PCRE_FIRSTLINE) != 0;
1894
1895 /* Set up the first character to match, if available. The first_byte value is
1896 never set for an anchored regular expression, but the anchoring may be forced
1897 at run time, so we have to test for anchoring. The first char may be unset for
1898 an unanchored pattern, of course. If there's no first char and the pattern was
1899 studied, there may be a bitmap of possible first characters. */
1900
1901 if (!anchored)
1902 {
1903 if ((re->options & PCRE_FIRSTSET) != 0)
1904 {
1905 first_byte = re->first_byte & 255;
1906 if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
1907 first_byte = lcc[first_byte];
1908 }
1909 else
1910 {
1911 if (startline && study != NULL &&
1912 (study->options & PCRE_STUDY_MAPPED) != 0)
1913 start_bits = study->start_bits;
1914 }
1915 }
1916
1917 /* For anchored or unanchored matches, there may be a "last known required
1918 character" set. */
1919
1920 if ((re->options & PCRE_REQCHSET) != 0)
1921 {
1922 req_byte = re->req_byte & 255;
1923 req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
1924 req_byte2 = (md->tables + fcc_offset)[req_byte]; /* case flipped */
1925 }
1926
1927 /* Call the main matching function, looping for a non-anchored regex after a
1928 failed match. Unless restarting, optimize by moving to the first match
1929 character if possible, when not anchored. Then unless wanting a partial match,
1930 check for a required later character. */
1931
1932 for (;;)
1933 {
1934 int rc;
1935
1936 if ((options & PCRE_DFA_RESTART) == 0)
1937 {
1938 const uschar *save_end_subject = end_subject;
1939
1940 /* Advance to a unique first char if possible. If firstline is TRUE, the
1941 start of the match is constrained to the first line of a multiline string.
1942 Implement this by temporarily adjusting end_subject so that we stop
1943 scanning at a newline. If the match fails at the newline, later code breaks
1944 this loop. */
1945
1946 if (firstline)
1947 {
1948 const uschar *t = current_subject;
1949 while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;
1950 end_subject = t;
1951 }
1952
1953 if (first_byte >= 0)
1954 {
1955 if (first_byte_caseless)
1956 while (current_subject < end_subject &&
1957 lcc[*current_subject] != first_byte)
1958 current_subject++;
1959 else
1960 while (current_subject < end_subject && *current_subject != first_byte)
1961 current_subject++;
1962 }
1963
1964 /* Or to just after a linebreak for a multiline match if possible */
1965
1966 else if (startline)
1967 {
1968 if (current_subject > md->start_subject + md->nllen +
1969 start_offset)
1970 {
1971 while (current_subject <= end_subject &&
1972 !IS_NEWLINE(current_subject - md->nllen))
1973 current_subject++;
1974 }
1975 }
1976
1977 /* Or to a non-unique first char after study */
1978
1979 else if (start_bits != NULL)
1980 {
1981 while (current_subject < end_subject)
1982 {
1983 register unsigned int c = *current_subject;
1984 if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
1985 else break;
1986 }
1987 }
1988
1989 /* Restore fudged end_subject */
1990
1991 end_subject = save_end_subject;
1992 }
1993
1994 /* If req_byte is set, we know that that character must appear in the subject
1995 for the match to succeed. If the first character is set, req_byte must be
1996 later in the subject; otherwise the test starts at the match point. This
1997 optimization can save a huge amount of work in patterns with nested unlimited
1998 repeats that aren't going to match. Writing separate code for cased/caseless
1999 versions makes it go faster, as does using an autoincrement and backing off
2000 on a match.
2001
2002 HOWEVER: when the subject string is very, very long, searching to its end can
2003 take a long time, and give bad performance on quite ordinary patterns. This
2004 showed up when somebody was matching /^C/ on a 32-megabyte string... so we
2005 don't do this when the string is sufficiently long.
2006
2007 ALSO: this processing is disabled when partial matching is requested.
2008 */
2009
2010 if (req_byte >= 0 &&
2011 end_subject - current_subject < REQ_BYTE_MAX &&
2012 (options & PCRE_PARTIAL) == 0)
2013 {
2014 register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
2015
2016 /* We don't need to repeat the search if we haven't yet reached the
2017 place we found it at last time. */
2018
2019 if (p > req_byte_ptr)
2020 {
2021 if (req_byte_caseless)
2022 {
2023 while (p < end_subject)
2024 {
2025 register int pp = *p++;
2026 if (pp == req_byte || pp == req_byte2) { p--; break; }
2027 }
2028 }
2029 else
2030 {
2031 while (p < end_subject)
2032 {
2033 if (*p++ == req_byte) { p--; break; }
2034 }
2035 }
2036
2037 /* If we can't find the required character, break the matching loop,
2038 which will cause a return or PCRE_ERROR_NOMATCH. */
2039
2040 if (p >= end_subject) break;
2041
2042 /* If we have found the required character, save the point where we
2043 found it, so that we don't search again next time round the loop if
2044 the start hasn't passed this character yet. */
2045
2046 req_byte_ptr = p;
2047 }
2048 }
2049
2050 /* OK, now we can do the business */
2051
2052 rc = internal_dfa_exec(
2053 md, /* fixed match data */
2054 md->start_code, /* this subexpression's code */
2055 current_subject, /* where we currently are */
2056 start_offset, /* start offset in subject */
2057 offsets, /* offset vector */
2058 offsetcount, /* size of same */
2059 workspace, /* workspace vector */
2060 wscount, /* size of same */
2061 re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
2062 0, /* function recurse level */
2063 0); /* regex recurse level */
2064
2065 /* Anything other than "no match" means we are done, always; otherwise, carry
2066 on only if not anchored. */
2067
2068 if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
2069
2070 /* Advance to the next subject character unless we are at the end of a line
2071 and firstline is set. */
2072
2073 if (firstline &&
2074 current_subject <= end_subject - md->nllen &&
2075 IS_NEWLINE(current_subject)) break;
2076 current_subject++;
2077 if (utf8)
2078 {
2079 while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
2080 current_subject++;
2081 }
2082 if (current_subject > end_subject) break;
2083 }
2084
2085 return PCRE_ERROR_NOMATCH;
2086 }
2087
2088 /* End of pcre_dfa_exec.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12