/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Contents of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 87 - (hide annotations) (download)
Sat Feb 24 21:41:21 2007 UTC (7 years, 9 months ago) by nigel
File MIME type: text/plain
File size: 68288 byte(s)
Load pcre-6.5 into code/trunk.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 nigel 87 Copyright (c) 1997-2006 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains the external function pcre_dfa_exec(), which is an
42     alternative matching function that uses a DFA algorithm. This is NOT Perl-
43     compatible, but it has advantages in certain applications. */
44    
45    
46     #include "pcre_internal.h"
47    
48    
49     /* For use to indent debugging output */
50    
51     #define SP " "
52    
53    
54    
55     /*************************************************
56     * Code parameters and static tables *
57     *************************************************/
58    
59     /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
60     into others, under special conditions. A gap of 10 between the blocks should be
61     enough. */
62    
63     #define OP_PROP_EXTRA (EXTRACT_BASIC_MAX+1)
64     #define OP_EXTUNI_EXTRA (EXTRACT_BASIC_MAX+11)
65    
66    
67     /* This table identifies those opcodes that are followed immediately by a
68     character that is to be tested in some way. This makes is possible to
69     centralize the loading of these characters. In the case of Type * etc, the
70     "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
71     small value. */
72    
73     static uschar coptable[] = {
74     0, /* End */
75     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */
76     0, 0, /* Any, Anybyte */
77     0, 0, 0, /* NOTPROP, PROP, EXTUNI */
78     0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
79     1, /* Char */
80     1, /* Charnc */
81     1, /* not */
82     /* Positive single-char repeats */
83     1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
84     3, 3, 3, /* upto, minupto, exact */
85     /* Negative single-char repeats - only for chars < 256 */
86     1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
87     3, 3, 3, /* NOT upto, minupto, exact */
88     /* Positive type repeats */
89     1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
90     3, 3, 3, /* Type upto, minupto, exact */
91     /* Character class & ref repeats */
92     0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
93     0, 0, /* CRRANGE, CRMINRANGE */
94     0, /* CLASS */
95     0, /* NCLASS */
96     0, /* XCLASS - variable length */
97     0, /* REF */
98     0, /* RECURSE */
99     0, /* CALLOUT */
100     0, /* Alt */
101     0, /* Ket */
102     0, /* KetRmax */
103     0, /* KetRmin */
104     0, /* Assert */
105     0, /* Assert not */
106     0, /* Assert behind */
107     0, /* Assert behind not */
108     0, /* Reverse */
109     0, /* Once */
110     0, /* COND */
111     0, /* CREF */
112     0, 0, /* BRAZERO, BRAMINZERO */
113     0, /* BRANUMBER */
114     0 /* BRA */
115     };
116    
117     /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
118     and \w */
119    
120     static uschar toptable1[] = {
121     0, 0, 0, 0, 0,
122     ctype_digit, ctype_digit,
123     ctype_space, ctype_space,
124     ctype_word, ctype_word,
125     0 /* OP_ANY */
126     };
127    
128     static uschar toptable2[] = {
129     0, 0, 0, 0, 0,
130     ctype_digit, 0,
131     ctype_space, 0,
132     ctype_word, 0,
133     1 /* OP_ANY */
134     };
135    
136    
137     /* Structure for holding data about a particular state, which is in effect the
138     current data for an active path through the match tree. It must consist
139     entirely of ints because the working vector we are passed, and which we put
140     these structures in, is a vector of ints. */
141    
142     typedef struct stateblock {
143     int offset; /* Offset to opcode */
144     int count; /* Count for repeats */
145     int ims; /* ims flag bits */
146     int data; /* Some use extra data */
147     } stateblock;
148    
149     #define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int))
150    
151    
152     #ifdef DEBUG
153     /*************************************************
154     * Print character string *
155     *************************************************/
156    
157     /* Character string printing function for debugging.
158    
159     Arguments:
160     p points to string
161     length number of bytes
162     f where to print
163    
164     Returns: nothing
165     */
166    
167     static void
168     pchars(unsigned char *p, int length, FILE *f)
169     {
170     int c;
171     while (length-- > 0)
172     {
173     if (isprint(c = *(p++)))
174     fprintf(f, "%c", c);
175     else
176     fprintf(f, "\\x%02x", c);
177     }
178     }
179     #endif
180    
181    
182    
183     /*************************************************
184     * Execute a Regular Expression - DFA engine *
185     *************************************************/
186    
187     /* This internal function applies a compiled pattern to a subject string,
188     starting at a given point, using a DFA engine. This function is called from the
189     external one, possibly multiple times if the pattern is not anchored. The
190     function calls itself recursively for some kinds of subpattern.
191    
192     Arguments:
193     md the match_data block with fixed information
194     this_start_code the opening bracket of this subexpression's code
195     current_subject where we currently are in the subject string
196     start_offset start offset in the subject string
197     offsets vector to contain the matching string offsets
198     offsetcount size of same
199     workspace vector of workspace
200     wscount size of same
201     ims the current ims flags
202     rlevel function call recursion level
203     recursing regex recursive call level
204    
205     Returns: > 0 =>
206     = 0 =>
207     -1 => failed to match
208     < -1 => some kind of unexpected problem
209    
210     The following macros are used for adding states to the two state vectors (one
211     for the current character, one for the following character). */
212    
213     #define ADD_ACTIVE(x,y) \
214     if (active_count++ < wscount) \
215     { \
216     next_active_state->offset = (x); \
217     next_active_state->count = (y); \
218     next_active_state->ims = ims; \
219     next_active_state++; \
220     DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
221     } \
222     else return PCRE_ERROR_DFA_WSSIZE
223    
224     #define ADD_ACTIVE_DATA(x,y,z) \
225     if (active_count++ < wscount) \
226     { \
227     next_active_state->offset = (x); \
228     next_active_state->count = (y); \
229     next_active_state->ims = ims; \
230     next_active_state->data = (z); \
231     next_active_state++; \
232     DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
233     } \
234     else return PCRE_ERROR_DFA_WSSIZE
235    
236     #define ADD_NEW(x,y) \
237     if (new_count++ < wscount) \
238     { \
239     next_new_state->offset = (x); \
240     next_new_state->count = (y); \
241     next_new_state->ims = ims; \
242     next_new_state++; \
243     DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
244     } \
245     else return PCRE_ERROR_DFA_WSSIZE
246    
247     #define ADD_NEW_DATA(x,y,z) \
248     if (new_count++ < wscount) \
249     { \
250     next_new_state->offset = (x); \
251     next_new_state->count = (y); \
252     next_new_state->ims = ims; \
253     next_new_state->data = (z); \
254     next_new_state++; \
255     DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
256     } \
257     else return PCRE_ERROR_DFA_WSSIZE
258    
259     /* And now, here is the code */
260    
261     static int
262     internal_dfa_exec(
263     dfa_match_data *md,
264     const uschar *this_start_code,
265     const uschar *current_subject,
266     int start_offset,
267     int *offsets,
268     int offsetcount,
269     int *workspace,
270     int wscount,
271     int ims,
272     int rlevel,
273     int recursing)
274     {
275     stateblock *active_states, *new_states, *temp_states;
276     stateblock *next_active_state, *next_new_state;
277    
278     const uschar *ctypes, *lcc, *fcc;
279     const uschar *ptr;
280     const uschar *end_code;
281    
282     int active_count, new_count, match_count;
283    
284     /* Some fields in the md block are frequently referenced, so we load them into
285     independent variables in the hope that this will perform better. */
286    
287     const uschar *start_subject = md->start_subject;
288     const uschar *end_subject = md->end_subject;
289     const uschar *start_code = md->start_code;
290    
291 nigel 87 #ifdef SUPPORT_UTF8
292 nigel 77 BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
293 nigel 87 #endif
294 nigel 77
295     rlevel++;
296     offsetcount &= (-2);
297    
298     wscount -= 2;
299     wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) /
300     (2 * INTS_PER_STATEBLOCK);
301    
302     DPRINTF(("\n%.*s---------------------\n"
303     "%.*sCall to internal_dfa_exec f=%d r=%d\n",
304     rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));
305    
306     ctypes = md->tables + ctypes_offset;
307     lcc = md->tables + lcc_offset;
308     fcc = md->tables + fcc_offset;
309    
310     match_count = PCRE_ERROR_NOMATCH; /* A negative number */
311    
312     active_states = (stateblock *)(workspace + 2);
313     next_new_state = new_states = active_states + wscount;
314     new_count = 0;
315    
316     /* The first thing in any (sub) pattern is a bracket of some sort. Push all
317     the alternative states onto the list, and find out where the end is. This
318     makes is possible to use this function recursively, when we want to stop at a
319     matching internal ket rather than at the end.
320    
321     If the first opcode in the first alternative is OP_REVERSE, we are dealing with
322     a backward assertion. In that case, we have to find out the maximum amount to
323     move back, and set up each alternative appropriately. */
324    
325     if (this_start_code[1+LINK_SIZE] == OP_REVERSE)
326     {
327     int max_back = 0;
328     int gone_back;
329    
330     end_code = this_start_code;
331     do
332     {
333     int back = GET(end_code, 2+LINK_SIZE);
334     if (back > max_back) max_back = back;
335     end_code += GET(end_code, 1);
336     }
337     while (*end_code == OP_ALT);
338    
339     /* If we can't go back the amount required for the longest lookbehind
340     pattern, go back as far as we can; some alternatives may still be viable. */
341    
342     #ifdef SUPPORT_UTF8
343     /* In character mode we have to step back character by character */
344    
345     if (utf8)
346     {
347     for (gone_back = 0; gone_back < max_back; gone_back++)
348     {
349     if (current_subject <= start_subject) break;
350     current_subject--;
351     while (current_subject > start_subject &&
352     (*current_subject & 0xc0) == 0x80)
353     current_subject--;
354     }
355     }
356     else
357     #endif
358    
359     /* In byte-mode we can do this quickly. */
360    
361     {
362     gone_back = (current_subject - max_back < start_subject)?
363     current_subject - start_subject : max_back;
364     current_subject -= gone_back;
365     }
366    
367     /* Now we can process the individual branches. */
368    
369     end_code = this_start_code;
370     do
371     {
372     int back = GET(end_code, 2+LINK_SIZE);
373     if (back <= gone_back)
374     {
375     int bstate = end_code - start_code + 2 + 2*LINK_SIZE;
376     ADD_NEW_DATA(-bstate, 0, gone_back - back);
377     }
378     end_code += GET(end_code, 1);
379     }
380     while (*end_code == OP_ALT);
381     }
382    
383     /* This is the code for a "normal" subpattern (not a backward assertion). The
384     start of a whole pattern is always one of these. If we are at the top level,
385     we may be asked to restart matching from the same point that we reached for a
386     previous partial match. We still have to scan through the top-level branches to
387     find the end state. */
388    
389     else
390     {
391     end_code = this_start_code;
392    
393     /* Restarting */
394    
395     if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0)
396     {
397     do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT);
398     new_count = workspace[1];
399     if (!workspace[0])
400     memcpy(new_states, active_states, new_count * sizeof(stateblock));
401     }
402    
403     /* Not restarting */
404    
405     else
406     {
407     do
408     {
409     ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);
410     end_code += GET(end_code, 1);
411     }
412     while (*end_code == OP_ALT);
413     }
414     }
415    
416     workspace[0] = 0; /* Bit indicating which vector is current */
417    
418     DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));
419    
420     /* Loop for scanning the subject */
421    
422     ptr = current_subject;
423     for (;;)
424     {
425     int i, j;
426     int c, d, clen, dlen;
427    
428     /* Make the new state list into the active state list and empty the
429     new state list. */
430    
431     temp_states = active_states;
432     active_states = new_states;
433     new_states = temp_states;
434     active_count = new_count;
435     new_count = 0;
436    
437     workspace[0] ^= 1; /* Remember for the restarting feature */
438     workspace[1] = active_count;
439    
440     #ifdef DEBUG
441     printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
442     pchars((uschar *)ptr, strlen((char *)ptr), stdout);
443     printf("\"\n");
444    
445     printf("%.*sActive states: ", rlevel*2-2, SP);
446     for (i = 0; i < active_count; i++)
447     printf("%d/%d ", active_states[i].offset, active_states[i].count);
448     printf("\n");
449     #endif
450    
451     /* Set the pointers for adding new states */
452    
453     next_active_state = active_states + active_count;
454     next_new_state = new_states;
455    
456     /* Load the current character from the subject outside the loop, as many
457     different states may want to look at it, and we assume that at least one
458     will. */
459    
460     if (ptr < end_subject)
461     {
462     clen = 1;
463     #ifdef SUPPORT_UTF8
464     if (utf8) { GETCHARLEN(c, ptr, clen); } else
465     #endif /* SUPPORT_UTF8 */
466     c = *ptr;
467     }
468     else
469     {
470     clen = 0; /* At end subject */
471     c = -1;
472     }
473    
474     /* Scan up the active states and act on each one. The result of an action
475     may be to add more states to the currently active list (e.g. on hitting a
476     parenthesis) or it may be to put states on the new list, for considering
477     when we move the character pointer on. */
478    
479     for (i = 0; i < active_count; i++)
480     {
481     stateblock *current_state = active_states + i;
482     const uschar *code;
483     int state_offset = current_state->offset;
484     int count, codevalue;
485 nigel 87 int chartype, script;
486 nigel 77
487     #ifdef DEBUG
488     printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
489     if (c < 0) printf("-1\n");
490     else if (c > 32 && c < 127) printf("'%c'\n", c);
491     else printf("0x%02x\n", c);
492     #endif
493    
494     /* This variable is referred to implicity in the ADD_xxx macros. */
495    
496     ims = current_state->ims;
497    
498     /* A negative offset is a special case meaning "hold off going to this
499     (negated) state until the number of characters in the data field have
500     been skipped". */
501    
502     if (state_offset < 0)
503     {
504     if (current_state->data > 0)
505     {
506     DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
507     ADD_NEW_DATA(state_offset, current_state->count,
508     current_state->data - 1);
509     continue;
510     }
511     else
512     {
513     current_state->offset = state_offset = -state_offset;
514     }
515     }
516    
517     /* Check for a duplicate state with the same count, and skip if found. */
518    
519     for (j = 0; j < i; j++)
520     {
521     if (active_states[j].offset == state_offset &&
522     active_states[j].count == current_state->count)
523     {
524     DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP));
525     goto NEXT_ACTIVE_STATE;
526     }
527     }
528    
529     /* The state offset is the offset to the opcode */
530    
531     code = start_code + state_offset;
532     codevalue = *code;
533     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */
534    
535     /* If this opcode is followed by an inline character, load it. It is
536     tempting to test for the presence of a subject character here, but that
537     is wrong, because sometimes zero repetitions of the subject are
538     permitted.
539    
540     We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
541     argument that is not a data character - but is always one byte long.
542     Unfortunately, we have to take special action to deal with \P, \p, and
543     \X in this case. To keep the other cases fast, convert these ones to new
544     opcodes. */
545    
546     if (coptable[codevalue] > 0)
547     {
548     dlen = 1;
549     #ifdef SUPPORT_UTF8
550     if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
551     #endif /* SUPPORT_UTF8 */
552     d = code[coptable[codevalue]];
553     if (codevalue >= OP_TYPESTAR)
554     {
555     if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;
556     if (d >= OP_NOTPROP)
557     codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;
558     }
559     }
560     else
561     {
562     dlen = 0; /* Not strictly necessary, but compilers moan */
563     d = -1; /* if these variables are not set. */
564     }
565    
566    
567     /* Now process the individual opcodes */
568    
569     switch (codevalue)
570     {
571    
572     /* ========================================================================== */
573     /* Reached a closing bracket. If not at the end of the pattern, carry
574     on with the next opcode. Otherwise, unless we have an empty string and
575     PCRE_NOTEMPTY is set, save the match data, shifting up all previous
576     matches so we always have the longest first. */
577    
578     case OP_KET:
579     case OP_KETRMIN:
580     case OP_KETRMAX:
581     if (code != end_code)
582     {
583     ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
584     if (codevalue != OP_KET)
585     {
586     ADD_ACTIVE(state_offset - GET(code, 1), 0);
587     }
588     }
589     else if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)
590     {
591     if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
592     else if (match_count > 0 && ++match_count * 2 >= offsetcount)
593     match_count = 0;
594     count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
595     if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
596     if (offsetcount >= 2)
597     {
598     offsets[0] = current_subject - start_subject;
599     offsets[1] = ptr - start_subject;
600     DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
601     offsets[1] - offsets[0], current_subject));
602     }
603     if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
604     {
605     DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
606     "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
607     match_count, rlevel*2-2, SP));
608     return match_count;
609     }
610     }
611     break;
612    
613     /* ========================================================================== */
614     /* These opcodes add to the current list of states without looking
615     at the current character. */
616    
617     /*-----------------------------------------------------------------*/
618     case OP_ALT:
619     do { code += GET(code, 1); } while (*code == OP_ALT);
620     ADD_ACTIVE(code - start_code, 0);
621     break;
622    
623     /*-----------------------------------------------------------------*/
624     case OP_BRA:
625     do
626     {
627     ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
628     code += GET(code, 1);
629     }
630     while (*code == OP_ALT);
631     break;
632    
633     /*-----------------------------------------------------------------*/
634     case OP_BRAZERO:
635     case OP_BRAMINZERO:
636     ADD_ACTIVE(state_offset + 1, 0);
637     code += 1 + GET(code, 2);
638     while (*code == OP_ALT) code += GET(code, 1);
639     ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
640     break;
641    
642     /*-----------------------------------------------------------------*/
643     case OP_BRANUMBER:
644     ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
645     break;
646    
647     /*-----------------------------------------------------------------*/
648     case OP_CIRC:
649     if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
650     ((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE))
651     { ADD_ACTIVE(state_offset + 1, 0); }
652     break;
653    
654     /*-----------------------------------------------------------------*/
655     case OP_EOD:
656     if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }
657     break;
658    
659     /*-----------------------------------------------------------------*/
660     case OP_OPT:
661     ims = code[1];
662     ADD_ACTIVE(state_offset + 2, 0);
663     break;
664    
665     /*-----------------------------------------------------------------*/
666     case OP_SOD:
667     if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); }
668     break;
669    
670     /*-----------------------------------------------------------------*/
671     case OP_SOM:
672     if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); }
673     break;
674    
675    
676     /* ========================================================================== */
677     /* These opcodes inspect the next subject character, and sometimes
678     the previous one as well, but do not have an argument. The variable
679     clen contains the length of the current character and is zero if we are
680     at the end of the subject. */
681    
682     /*-----------------------------------------------------------------*/
683     case OP_ANY:
684     if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0))
685     { ADD_NEW(state_offset + 1, 0); }
686     break;
687    
688     /*-----------------------------------------------------------------*/
689     case OP_EODN:
690     if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject))
691     { ADD_ACTIVE(state_offset + 1, 0); }
692     break;
693    
694     /*-----------------------------------------------------------------*/
695     case OP_DOLL:
696     if ((md->moptions & PCRE_NOTEOL) == 0)
697     {
698     if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject ||
699     (ims & PCRE_MULTILINE) != 0)))
700     { ADD_ACTIVE(state_offset + 1, 0); }
701     }
702     else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0)
703     { ADD_ACTIVE(state_offset + 1, 0); }
704     break;
705    
706     /*-----------------------------------------------------------------*/
707    
708     case OP_DIGIT:
709     case OP_WHITESPACE:
710     case OP_WORDCHAR:
711     if (clen > 0 && c < 256 &&
712     ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)
713     { ADD_NEW(state_offset + 1, 0); }
714     break;
715    
716     /*-----------------------------------------------------------------*/
717     case OP_NOT_DIGIT:
718     case OP_NOT_WHITESPACE:
719     case OP_NOT_WORDCHAR:
720     if (clen > 0 && (c >= 256 ||
721     ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0))
722     { ADD_NEW(state_offset + 1, 0); }
723     break;
724    
725     /*-----------------------------------------------------------------*/
726     case OP_WORD_BOUNDARY:
727     case OP_NOT_WORD_BOUNDARY:
728     {
729     int left_word, right_word;
730    
731     if (ptr > start_subject)
732     {
733     const uschar *temp = ptr - 1;
734     #ifdef SUPPORT_UTF8
735     if (utf8) BACKCHAR(temp);
736     #endif
737     GETCHARTEST(d, temp);
738     left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
739     }
740     else left_word = 0;
741    
742     if (clen > 0) right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
743     else right_word = 0;
744    
745     if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
746     { ADD_ACTIVE(state_offset + 1, 0); }
747     }
748     break;
749    
750    
751     #ifdef SUPPORT_UCP
752    
753     /*-----------------------------------------------------------------*/
754     /* Check the next character by Unicode property. We will get here only
755     if the support is in the binary; otherwise a compile-time error occurs.
756     */
757    
758     case OP_PROP:
759     case OP_NOTPROP:
760     if (clen > 0)
761     {
762 nigel 87 BOOL OK;
763     int category = _pcre_ucp_findprop(c, &chartype, &script);
764     switch(code[1])
765 nigel 77 {
766 nigel 87 case PT_ANY:
767     OK = TRUE;
768     break;
769    
770     case PT_LAMP:
771     OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
772     break;
773    
774     case PT_GC:
775     OK = category == code[2];
776     break;
777    
778     case PT_PC:
779     OK = chartype == code[2];
780     break;
781    
782     case PT_SC:
783     OK = script == code[2];
784     break;
785    
786     /* Should never occur, but keep compilers from grumbling. */
787    
788     default:
789     OK = codevalue != OP_PROP;
790     break;
791 nigel 77 }
792 nigel 87
793     if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
794 nigel 77 }
795     break;
796     #endif
797    
798    
799    
800     /* ========================================================================== */
801     /* These opcodes likewise inspect the subject character, but have an
802     argument that is not a data character. It is one of these opcodes:
803     OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,
804     OP_NOT_WORDCHAR. The value is loaded into d. */
805    
806     case OP_TYPEPLUS:
807     case OP_TYPEMINPLUS:
808     count = current_state->count; /* Already matched */
809     if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
810     if (clen > 0)
811     {
812     if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
813     (c < 256 &&
814     (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&
815     ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
816     {
817     count++;
818     ADD_NEW(state_offset, count);
819     }
820     }
821     break;
822    
823     /*-----------------------------------------------------------------*/
824     case OP_TYPEQUERY:
825     case OP_TYPEMINQUERY:
826     ADD_ACTIVE(state_offset + 2, 0);
827     if (clen > 0)
828     {
829     if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
830     (c < 256 &&
831     (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&
832     ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
833     {
834     ADD_NEW(state_offset + 2, 0);
835     }
836     }
837     break;
838    
839     /*-----------------------------------------------------------------*/
840     case OP_TYPESTAR:
841     case OP_TYPEMINSTAR:
842     ADD_ACTIVE(state_offset + 2, 0);
843     if (clen > 0)
844     {
845     if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
846     (c < 256 &&
847     (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&
848     ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
849     {
850     ADD_NEW(state_offset, 0);
851     }
852     }
853     break;
854    
855     /*-----------------------------------------------------------------*/
856     case OP_TYPEEXACT:
857     case OP_TYPEUPTO:
858     case OP_TYPEMINUPTO:
859     if (codevalue != OP_TYPEEXACT)
860     { ADD_ACTIVE(state_offset + 4, 0); }
861     count = current_state->count; /* Number already matched */
862     if (clen > 0)
863     {
864     if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
865     (c < 256 &&
866     (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&
867     ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
868     {
869     if (++count >= GET2(code, 1))
870     { ADD_NEW(state_offset + 4, 0); }
871     else
872     { ADD_NEW(state_offset, count); }
873     }
874     }
875     break;
876    
877     /* ========================================================================== */
878     /* These are virtual opcodes that are used when something like
879     OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It
880     keeps the code above fast for the other cases. The argument is in the
881     d variable. */
882    
883     case OP_PROP_EXTRA + OP_TYPEPLUS:
884     case OP_PROP_EXTRA + OP_TYPEMINPLUS:
885     count = current_state->count; /* Already matched */
886 nigel 87 if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
887 nigel 77 if (clen > 0)
888     {
889 nigel 87 BOOL OK;
890     int category = _pcre_ucp_findprop(c, &chartype, &script);
891     switch(code[2])
892     {
893     case PT_ANY:
894     OK = TRUE;
895     break;
896    
897     case PT_LAMP:
898     OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
899     break;
900    
901     case PT_GC:
902     OK = category == code[3];
903     break;
904    
905     case PT_PC:
906     OK = chartype == code[3];
907     break;
908    
909     case PT_SC:
910     OK = script == code[3];
911     break;
912    
913     /* Should never occur, but keep compilers from grumbling. */
914    
915     default:
916     OK = codevalue != OP_PROP;
917     break;
918     }
919    
920     if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }
921 nigel 77 }
922     break;
923    
924     /*-----------------------------------------------------------------*/
925     case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
926     case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
927     count = current_state->count; /* Already matched */
928     if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
929 nigel 87 if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
930 nigel 77 {
931     const uschar *nptr = ptr + clen;
932     int ncount = 0;
933     while (nptr < end_subject)
934     {
935     int nd;
936     int ndlen = 1;
937     GETCHARLEN(nd, nptr, ndlen);
938 nigel 87 if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
939 nigel 77 ncount++;
940     nptr += ndlen;
941     }
942     count++;
943     ADD_NEW_DATA(-state_offset, count, ncount);
944     }
945     break;
946    
947     /*-----------------------------------------------------------------*/
948     case OP_PROP_EXTRA + OP_TYPEQUERY:
949     case OP_PROP_EXTRA + OP_TYPEMINQUERY:
950 nigel 87 count = 4;
951 nigel 77 goto QS1;
952    
953     case OP_PROP_EXTRA + OP_TYPESTAR:
954     case OP_PROP_EXTRA + OP_TYPEMINSTAR:
955     count = 0;
956    
957     QS1:
958    
959 nigel 87 ADD_ACTIVE(state_offset + 4, 0);
960 nigel 77 if (clen > 0)
961     {
962 nigel 87 BOOL OK;
963     int category = _pcre_ucp_findprop(c, &chartype, &script);
964     switch(code[2])
965     {
966     case PT_ANY:
967     OK = TRUE;
968     break;
969    
970     case PT_LAMP:
971     OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
972     break;
973    
974     case PT_GC:
975     OK = category == code[3];
976     break;
977    
978     case PT_PC:
979     OK = chartype == code[3];
980     break;
981    
982     case PT_SC:
983     OK = script == code[3];
984     break;
985    
986     /* Should never occur, but keep compilers from grumbling. */
987    
988     default:
989     OK = codevalue != OP_PROP;
990     break;
991     }
992    
993     if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }
994 nigel 77 }
995     break;
996    
997     /*-----------------------------------------------------------------*/
998     case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
999     case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1000     count = 2;
1001     goto QS2;
1002    
1003     case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1004     case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1005     count = 0;
1006    
1007     QS2:
1008    
1009     ADD_ACTIVE(state_offset + 2, 0);
1010 nigel 87 if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1011 nigel 77 {
1012     const uschar *nptr = ptr + clen;
1013     int ncount = 0;
1014     while (nptr < end_subject)
1015     {
1016     int nd;
1017     int ndlen = 1;
1018     GETCHARLEN(nd, nptr, ndlen);
1019 nigel 87 if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1020 nigel 77 ncount++;
1021     nptr += ndlen;
1022     }
1023     ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1024     }
1025     break;
1026    
1027     /*-----------------------------------------------------------------*/
1028     case OP_PROP_EXTRA + OP_TYPEEXACT:
1029     case OP_PROP_EXTRA + OP_TYPEUPTO:
1030     case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1031     if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1032 nigel 87 { ADD_ACTIVE(state_offset + 6, 0); }
1033 nigel 77 count = current_state->count; /* Number already matched */
1034     if (clen > 0)
1035     {
1036 nigel 87 BOOL OK;
1037     int category = _pcre_ucp_findprop(c, &chartype, &script);
1038     switch(code[4])
1039 nigel 77 {
1040 nigel 87 case PT_ANY:
1041     OK = TRUE;
1042     break;
1043    
1044     case PT_LAMP:
1045     OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1046     break;
1047    
1048     case PT_GC:
1049     OK = category == code[5];
1050     break;
1051    
1052     case PT_PC:
1053     OK = chartype == code[5];
1054     break;
1055    
1056     case PT_SC:
1057     OK = script == code[5];
1058     break;
1059    
1060     /* Should never occur, but keep compilers from grumbling. */
1061    
1062     default:
1063     OK = codevalue != OP_PROP;
1064     break;
1065     }
1066    
1067     if (OK == (d == OP_PROP))
1068     {
1069 nigel 77 if (++count >= GET2(code, 1))
1070 nigel 87 { ADD_NEW(state_offset + 6, 0); }
1071 nigel 77 else
1072     { ADD_NEW(state_offset, count); }
1073     }
1074     }
1075     break;
1076    
1077     /*-----------------------------------------------------------------*/
1078     case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1079     case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1080     case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1081     if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1082     { ADD_ACTIVE(state_offset + 4, 0); }
1083     count = current_state->count; /* Number already matched */
1084 nigel 87 if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1085 nigel 77 {
1086     const uschar *nptr = ptr + clen;
1087     int ncount = 0;
1088     while (nptr < end_subject)
1089     {
1090     int nd;
1091     int ndlen = 1;
1092     GETCHARLEN(nd, nptr, ndlen);
1093 nigel 87 if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1094 nigel 77 ncount++;
1095     nptr += ndlen;
1096     }
1097     if (++count >= GET2(code, 1))
1098     { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1099     else
1100     { ADD_NEW_DATA(-state_offset, count, ncount); }
1101     }
1102     break;
1103    
1104     /* ========================================================================== */
1105     /* These opcodes are followed by a character that is usually compared
1106     to the current subject character; it is loaded into d. We still get
1107     here even if there is no subject character, because in some cases zero
1108     repetitions are permitted. */
1109    
1110     /*-----------------------------------------------------------------*/
1111     case OP_CHAR:
1112     if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); }
1113     break;
1114    
1115     /*-----------------------------------------------------------------*/
1116     case OP_CHARNC:
1117     if (clen == 0) break;
1118    
1119     #ifdef SUPPORT_UTF8
1120     if (utf8)
1121     {
1122     if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1123     {
1124 nigel 87 int othercase;
1125 nigel 77 if (c < 128) othercase = fcc[c]; else
1126    
1127     /* If we have Unicode property support, we can use it to test the
1128 nigel 87 other case of the character. */
1129 nigel 77
1130     #ifdef SUPPORT_UCP
1131 nigel 87 othercase = _pcre_ucp_othercase(c);
1132     #else
1133     othercase = -1;
1134 nigel 77 #endif
1135    
1136     if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
1137     }
1138     }
1139     else
1140     #endif /* SUPPORT_UTF8 */
1141    
1142     /* Non-UTF-8 mode */
1143     {
1144     if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }
1145     }
1146     break;
1147    
1148    
1149     #ifdef SUPPORT_UCP
1150     /*-----------------------------------------------------------------*/
1151     /* This is a tricky one because it can match more than one character.
1152     Find out how many characters to skip, and then set up a negative state
1153     to wait for them to pass before continuing. */
1154    
1155     case OP_EXTUNI:
1156 nigel 87 if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1157 nigel 77 {
1158     const uschar *nptr = ptr + clen;
1159     int ncount = 0;
1160     while (nptr < end_subject)
1161     {
1162     int nclen = 1;
1163     GETCHARLEN(c, nptr, nclen);
1164 nigel 87 if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
1165 nigel 77 ncount++;
1166     nptr += nclen;
1167     }
1168     ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
1169     }
1170     break;
1171     #endif
1172    
1173     /*-----------------------------------------------------------------*/
1174     /* Match a negated single character. This is only used for one-byte
1175     characters, that is, we know that d < 256. The character we are
1176     checking (c) can be multibyte. */
1177    
1178     case OP_NOT:
1179     if (clen > 0)
1180     {
1181     int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1182     if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1183     }
1184     break;
1185    
1186     /*-----------------------------------------------------------------*/
1187     case OP_PLUS:
1188     case OP_MINPLUS:
1189     case OP_NOTPLUS:
1190     case OP_NOTMINPLUS:
1191     count = current_state->count; /* Already matched */
1192     if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1193     if (clen > 0)
1194     {
1195     int otherd = -1;
1196     if ((ims & PCRE_CASELESS) != 0)
1197     {
1198     #ifdef SUPPORT_UTF8
1199 nigel 87 if (utf8 && d >= 128)
1200 nigel 77 {
1201     #ifdef SUPPORT_UCP
1202 nigel 87 otherd = _pcre_ucp_othercase(d);
1203 nigel 77 #endif /* SUPPORT_UCP */
1204     }
1205     else
1206     #endif /* SUPPORT_UTF8 */
1207     otherd = fcc[d];
1208     }
1209     if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1210     { count++; ADD_NEW(state_offset, count); }
1211     }
1212     break;
1213    
1214     /*-----------------------------------------------------------------*/
1215     case OP_QUERY:
1216     case OP_MINQUERY:
1217     case OP_NOTQUERY:
1218     case OP_NOTMINQUERY:
1219     ADD_ACTIVE(state_offset + dlen + 1, 0);
1220     if (clen > 0)
1221     {
1222     int otherd = -1;
1223     if ((ims && PCRE_CASELESS) != 0)
1224     {
1225     #ifdef SUPPORT_UTF8
1226 nigel 87 if (utf8 && d >= 128)
1227 nigel 77 {
1228     #ifdef SUPPORT_UCP
1229 nigel 87 otherd = _pcre_ucp_othercase(d);
1230 nigel 77 #endif /* SUPPORT_UCP */
1231     }
1232     else
1233     #endif /* SUPPORT_UTF8 */
1234     otherd = fcc[d];
1235     }
1236     if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1237     { ADD_NEW(state_offset + dlen + 1, 0); }
1238     }
1239     break;
1240    
1241     /*-----------------------------------------------------------------*/
1242     case OP_STAR:
1243     case OP_MINSTAR:
1244     case OP_NOTSTAR:
1245     case OP_NOTMINSTAR:
1246     ADD_ACTIVE(state_offset + dlen + 1, 0);
1247     if (clen > 0)
1248     {
1249     int otherd = -1;
1250     if ((ims && PCRE_CASELESS) != 0)
1251     {
1252     #ifdef SUPPORT_UTF8
1253 nigel 87 if (utf8 && d >= 128)
1254 nigel 77 {
1255     #ifdef SUPPORT_UCP
1256 nigel 87 otherd = _pcre_ucp_othercase(d);
1257 nigel 77 #endif /* SUPPORT_UCP */
1258     }
1259     else
1260     #endif /* SUPPORT_UTF8 */
1261     otherd = fcc[d];
1262     }
1263     if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1264     { ADD_NEW(state_offset, 0); }
1265     }
1266     break;
1267    
1268     /*-----------------------------------------------------------------*/
1269     case OP_EXACT:
1270     case OP_UPTO:
1271     case OP_MINUPTO:
1272     case OP_NOTEXACT:
1273     case OP_NOTUPTO:
1274     case OP_NOTMINUPTO:
1275     if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)
1276     { ADD_ACTIVE(state_offset + dlen + 3, 0); }
1277     count = current_state->count; /* Number already matched */
1278     if (clen > 0)
1279     {
1280     int otherd = -1;
1281     if ((ims & PCRE_CASELESS) != 0)
1282     {
1283     #ifdef SUPPORT_UTF8
1284 nigel 87 if (utf8 && d >= 128)
1285 nigel 77 {
1286     #ifdef SUPPORT_UCP
1287 nigel 87 otherd = _pcre_ucp_othercase(d);
1288 nigel 77 #endif /* SUPPORT_UCP */
1289     }
1290     else
1291     #endif /* SUPPORT_UTF8 */
1292     otherd = fcc[d];
1293     }
1294     if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1295     {
1296     if (++count >= GET2(code, 1))
1297     { ADD_NEW(state_offset + dlen + 3, 0); }
1298     else
1299     { ADD_NEW(state_offset, count); }
1300     }
1301     }
1302     break;
1303    
1304    
1305     /* ========================================================================== */
1306     /* These are the class-handling opcodes */
1307    
1308     case OP_CLASS:
1309     case OP_NCLASS:
1310     case OP_XCLASS:
1311     {
1312     BOOL isinclass = FALSE;
1313     int next_state_offset;
1314     const uschar *ecode;
1315    
1316     /* For a simple class, there is always just a 32-byte table, and we
1317     can set isinclass from it. */
1318    
1319     if (codevalue != OP_XCLASS)
1320     {
1321     ecode = code + 33;
1322     if (clen > 0)
1323     {
1324     isinclass = (c > 255)? (codevalue == OP_NCLASS) :
1325     ((code[1 + c/8] & (1 << (c&7))) != 0);
1326     }
1327     }
1328    
1329     /* An extended class may have a table or a list of single characters,
1330     ranges, or both, and it may be positive or negative. There's a
1331     function that sorts all this out. */
1332    
1333     else
1334     {
1335     ecode = code + GET(code, 1);
1336     if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);
1337     }
1338    
1339     /* At this point, isinclass is set for all kinds of class, and ecode
1340     points to the byte after the end of the class. If there is a
1341     quantifier, this is where it will be. */
1342    
1343     next_state_offset = ecode - start_code;
1344    
1345     switch (*ecode)
1346     {
1347     case OP_CRSTAR:
1348     case OP_CRMINSTAR:
1349     ADD_ACTIVE(next_state_offset + 1, 0);
1350     if (isinclass) { ADD_NEW(state_offset, 0); }
1351     break;
1352    
1353     case OP_CRPLUS:
1354     case OP_CRMINPLUS:
1355     count = current_state->count; /* Already matched */
1356     if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
1357     if (isinclass) { count++; ADD_NEW(state_offset, count); }
1358     break;
1359    
1360     case OP_CRQUERY:
1361     case OP_CRMINQUERY:
1362     ADD_ACTIVE(next_state_offset + 1, 0);
1363     if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
1364     break;
1365    
1366     case OP_CRRANGE:
1367     case OP_CRMINRANGE:
1368     count = current_state->count; /* Already matched */
1369     if (count >= GET2(ecode, 1))
1370     { ADD_ACTIVE(next_state_offset + 5, 0); }
1371     if (isinclass)
1372     {
1373     if (++count >= GET2(ecode, 3))
1374     { ADD_NEW(next_state_offset + 5, 0); }
1375     else
1376     { ADD_NEW(state_offset, count); }
1377     }
1378     break;
1379    
1380     default:
1381     if (isinclass) { ADD_NEW(next_state_offset, 0); }
1382     break;
1383     }
1384     }
1385     break;
1386    
1387     /* ========================================================================== */
1388     /* These are the opcodes for fancy brackets of various kinds. We have
1389     to use recursion in order to handle them. */
1390    
1391     case OP_ASSERT:
1392     case OP_ASSERT_NOT:
1393     case OP_ASSERTBACK:
1394     case OP_ASSERTBACK_NOT:
1395     {
1396     int rc;
1397     int local_offsets[2];
1398     int local_workspace[1000];
1399     const uschar *endasscode = code + GET(code, 1);
1400    
1401     while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
1402    
1403     rc = internal_dfa_exec(
1404     md, /* static match data */
1405     code, /* this subexpression's code */
1406     ptr, /* where we currently are */
1407     ptr - start_subject, /* start offset */
1408     local_offsets, /* offset vector */
1409     sizeof(local_offsets)/sizeof(int), /* size of same */
1410     local_workspace, /* workspace vector */
1411     sizeof(local_workspace)/sizeof(int), /* size of same */
1412     ims, /* the current ims flags */
1413     rlevel, /* function recursion level */
1414     recursing); /* pass on regex recursion */
1415    
1416     if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
1417     { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
1418     }
1419     break;
1420    
1421     /*-----------------------------------------------------------------*/
1422     case OP_COND:
1423     {
1424     int local_offsets[1000];
1425     int local_workspace[1000];
1426     int condcode = code[LINK_SIZE+1];
1427    
1428     /* The only supported version of OP_CREF is for the value 0xffff, which
1429     means "test if in a recursion". */
1430    
1431     if (condcode == OP_CREF)
1432     {
1433     int value = GET2(code, LINK_SIZE+2);
1434     if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;
1435     if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
1436     else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
1437     }
1438    
1439     /* Otherwise, the condition is an assertion */
1440    
1441     else
1442     {
1443     int rc;
1444     const uschar *asscode = code + LINK_SIZE + 1;
1445     const uschar *endasscode = asscode + GET(asscode, 1);
1446    
1447     while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
1448    
1449     rc = internal_dfa_exec(
1450     md, /* fixed match data */
1451     asscode, /* this subexpression's code */
1452     ptr, /* where we currently are */
1453     ptr - start_subject, /* start offset */
1454     local_offsets, /* offset vector */
1455     sizeof(local_offsets)/sizeof(int), /* size of same */
1456     local_workspace, /* workspace vector */
1457     sizeof(local_workspace)/sizeof(int), /* size of same */
1458     ims, /* the current ims flags */
1459     rlevel, /* function recursion level */
1460     recursing); /* pass on regex recursion */
1461    
1462     if ((rc >= 0) ==
1463     (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
1464     { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
1465     else
1466     { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
1467     }
1468     }
1469     break;
1470    
1471     /*-----------------------------------------------------------------*/
1472     case OP_RECURSE:
1473     {
1474     int local_offsets[1000];
1475     int local_workspace[1000];
1476     int rc;
1477    
1478     DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,
1479     recursing + 1));
1480    
1481     rc = internal_dfa_exec(
1482     md, /* fixed match data */
1483     start_code + GET(code, 1), /* this subexpression's code */
1484     ptr, /* where we currently are */
1485     ptr - start_subject, /* start offset */
1486     local_offsets, /* offset vector */
1487     sizeof(local_offsets)/sizeof(int), /* size of same */
1488     local_workspace, /* workspace vector */
1489     sizeof(local_workspace)/sizeof(int), /* size of same */
1490     ims, /* the current ims flags */
1491     rlevel, /* function recursion level */
1492     recursing + 1); /* regex recurse level */
1493    
1494     DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,
1495     recursing + 1, rc));
1496    
1497     /* Ran out of internal offsets */
1498    
1499     if (rc == 0) return PCRE_ERROR_DFA_RECURSE;
1500    
1501     /* For each successful matched substring, set up the next state with a
1502     count of characters to skip before trying it. Note that the count is in
1503     characters, not bytes. */
1504    
1505     if (rc > 0)
1506     {
1507     for (rc = rc*2 - 2; rc >= 0; rc -= 2)
1508     {
1509     const uschar *p = start_subject + local_offsets[rc];
1510     const uschar *pp = start_subject + local_offsets[rc+1];
1511     int charcount = local_offsets[rc+1] - local_offsets[rc];
1512     while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
1513     if (charcount > 0)
1514     {
1515     ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
1516     }
1517     else
1518     {
1519     ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0);
1520     }
1521     }
1522     }
1523     else if (rc != PCRE_ERROR_NOMATCH) return rc;
1524     }
1525     break;
1526    
1527     /*-----------------------------------------------------------------*/
1528     case OP_ONCE:
1529     {
1530     int local_offsets[2];
1531     int local_workspace[1000];
1532    
1533     int rc = internal_dfa_exec(
1534     md, /* fixed match data */
1535     code, /* this subexpression's code */
1536     ptr, /* where we currently are */
1537     ptr - start_subject, /* start offset */
1538     local_offsets, /* offset vector */
1539     sizeof(local_offsets)/sizeof(int), /* size of same */
1540     local_workspace, /* workspace vector */
1541     sizeof(local_workspace)/sizeof(int), /* size of same */
1542     ims, /* the current ims flags */
1543     rlevel, /* function recursion level */
1544     recursing); /* pass on regex recursion */
1545    
1546     if (rc >= 0)
1547     {
1548     const uschar *end_subpattern = code;
1549     int charcount = local_offsets[1] - local_offsets[0];
1550     int next_state_offset, repeat_state_offset;
1551    
1552     do { end_subpattern += GET(end_subpattern, 1); }
1553     while (*end_subpattern == OP_ALT);
1554     next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;
1555    
1556     /* If the end of this subpattern is KETRMAX or KETRMIN, we must
1557     arrange for the repeat state also to be added to the relevant list.
1558     Calculate the offset, or set -1 for no repeat. */
1559    
1560     repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
1561     *end_subpattern == OP_KETRMIN)?
1562     end_subpattern - start_code - GET(end_subpattern, 1) : -1;
1563    
1564     /* If we have matched an empty string, add the next state at the
1565     current character pointer. This is important so that the duplicate
1566     checking kicks in, which is what breaks infinite loops that match an
1567     empty string. */
1568    
1569     if (charcount == 0)
1570     {
1571     ADD_ACTIVE(next_state_offset, 0);
1572     }
1573    
1574     /* Optimization: if there are no more active states, and there
1575     are no new states yet set up, then skip over the subject string
1576     right here, to save looping. Otherwise, set up the new state to swing
1577     into action when the end of the substring is reached. */
1578    
1579     else if (i + 1 >= active_count && new_count == 0)
1580     {
1581     ptr += charcount;
1582     clen = 0;
1583     ADD_NEW(next_state_offset, 0);
1584    
1585     /* If we are adding a repeat state at the new character position,
1586     we must fudge things so that it is the only current state.
1587     Otherwise, it might be a duplicate of one we processed before, and
1588     that would cause it to be skipped. */
1589    
1590     if (repeat_state_offset >= 0)
1591     {
1592     next_active_state = active_states;
1593     active_count = 0;
1594     i = -1;
1595     ADD_ACTIVE(repeat_state_offset, 0);
1596     }
1597     }
1598     else
1599     {
1600     const uschar *p = start_subject + local_offsets[0];
1601     const uschar *pp = start_subject + local_offsets[1];
1602     while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
1603     ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
1604     if (repeat_state_offset >= 0)
1605     { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
1606     }
1607    
1608     }
1609     else if (rc != PCRE_ERROR_NOMATCH) return rc;
1610     }
1611     break;
1612    
1613    
1614     /* ========================================================================== */
1615     /* Handle callouts */
1616    
1617     case OP_CALLOUT:
1618     if (pcre_callout != NULL)
1619     {
1620     int rrc;
1621     pcre_callout_block cb;
1622     cb.version = 1; /* Version 1 of the callout block */
1623     cb.callout_number = code[1];
1624     cb.offset_vector = offsets;
1625 nigel 87 cb.subject = (PCRE_SPTR)start_subject;
1626 nigel 77 cb.subject_length = end_subject - start_subject;
1627     cb.start_match = current_subject - start_subject;
1628     cb.current_position = ptr - start_subject;
1629     cb.pattern_position = GET(code, 2);
1630     cb.next_item_length = GET(code, 2 + LINK_SIZE);
1631     cb.capture_top = 1;
1632     cb.capture_last = -1;
1633     cb.callout_data = md->callout_data;
1634     if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */
1635     if (rrc == 0) { ADD_ACTIVE(state_offset + 2 + 2*LINK_SIZE, 0); }
1636     }
1637     break;
1638    
1639    
1640     /* ========================================================================== */
1641     default: /* Unsupported opcode */
1642     return PCRE_ERROR_DFA_UITEM;
1643     }
1644    
1645     NEXT_ACTIVE_STATE: continue;
1646    
1647     } /* End of loop scanning active states */
1648    
1649     /* We have finished the processing at the current subject character. If no
1650     new states have been set for the next character, we have found all the
1651     matches that we are going to find. If we are at the top level and partial
1652     matching has been requested, check for appropriate conditions. */
1653    
1654     if (new_count <= 0)
1655     {
1656     if (match_count < 0 && /* No matches found */
1657     rlevel == 1 && /* Top level match function */
1658     (md->moptions & PCRE_PARTIAL) != 0 && /* Want partial matching */
1659     ptr >= end_subject && /* Reached end of subject */
1660     ptr > current_subject) /* Matched non-empty string */
1661     {
1662     if (offsetcount >= 2)
1663     {
1664     offsets[0] = current_subject - start_subject;
1665     offsets[1] = end_subject - start_subject;
1666     }
1667     match_count = PCRE_ERROR_PARTIAL;
1668     }
1669    
1670     DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
1671     "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
1672     rlevel*2-2, SP));
1673     return match_count;
1674     }
1675    
1676     /* One or more states are active for the next character. */
1677    
1678     ptr += clen; /* Advance to next subject character */
1679     } /* Loop to move along the subject string */
1680    
1681     /* Control never gets here, but we must keep the compiler happy. */
1682    
1683     DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n"
1684     "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP));
1685     return PCRE_ERROR_NOMATCH;
1686     }
1687    
1688    
1689    
1690    
1691     /*************************************************
1692     * Execute a Regular Expression - DFA engine *
1693     *************************************************/
1694    
1695     /* This external function applies a compiled re to a subject string using a DFA
1696     engine. This function calls the internal function multiple times if the pattern
1697     is not anchored.
1698    
1699     Arguments:
1700     argument_re points to the compiled expression
1701     extra_data points to extra data or is NULL (not currently used)
1702     subject points to the subject string
1703     length length of subject string (may contain binary zeros)
1704     start_offset where to start in the subject string
1705     options option bits
1706     offsets vector of match offsets
1707     offsetcount size of same
1708     workspace workspace vector
1709     wscount size of same
1710    
1711     Returns: > 0 => number of match offset pairs placed in offsets
1712     = 0 => offsets overflowed; longest matches are present
1713     -1 => failed to match
1714     < -1 => some kind of unexpected problem
1715     */
1716    
1717 nigel 87 PCRE_DATA_SCOPE int
1718 nigel 77 pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
1719     const char *subject, int length, int start_offset, int options, int *offsets,
1720     int offsetcount, int *workspace, int wscount)
1721     {
1722     real_pcre *re = (real_pcre *)argument_re;
1723     dfa_match_data match_block;
1724     BOOL utf8, anchored, startline, firstline;
1725     const uschar *current_subject, *end_subject, *lcc;
1726    
1727     pcre_study_data internal_study;
1728     const pcre_study_data *study = NULL;
1729     real_pcre internal_re;
1730    
1731     const uschar *req_byte_ptr;
1732     const uschar *start_bits = NULL;
1733     BOOL first_byte_caseless = FALSE;
1734     BOOL req_byte_caseless = FALSE;
1735     int first_byte = -1;
1736     int req_byte = -1;
1737     int req_byte2 = -1;
1738    
1739     /* Plausibility checks */
1740    
1741     if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
1742     if (re == NULL || subject == NULL || workspace == NULL ||
1743     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
1744     if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
1745     if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
1746    
1747     /* We need to find the pointer to any study data before we test for byte
1748     flipping, so we scan the extra_data block first. This may set two fields in the
1749     match block, so we must initialize them beforehand. However, the other fields
1750     in the match block must not be set until after the byte flipping. */
1751    
1752     match_block.tables = re->tables;
1753     match_block.callout_data = NULL;
1754    
1755     if (extra_data != NULL)
1756     {
1757     unsigned int flags = extra_data->flags;
1758     if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
1759     study = (const pcre_study_data *)extra_data->study_data;
1760     if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
1761 nigel 87 if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
1762     return PCRE_ERROR_DFA_UMLIMIT;
1763 nigel 77 if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
1764     match_block.callout_data = extra_data->callout_data;
1765     if ((flags & PCRE_EXTRA_TABLES) != 0)
1766     match_block.tables = extra_data->tables;
1767     }
1768    
1769     /* Check that the first field in the block is the magic number. If it is not,
1770     test for a regex that was compiled on a host of opposite endianness. If this is
1771     the case, flipped values are put in internal_re and internal_study if there was
1772     study data too. */
1773    
1774     if (re->magic_number != MAGIC_NUMBER)
1775     {
1776     re = _pcre_try_flipped(re, &internal_re, study, &internal_study);
1777     if (re == NULL) return PCRE_ERROR_BADMAGIC;
1778     if (study != NULL) study = &internal_study;
1779     }
1780    
1781     /* Set some local values */
1782    
1783     current_subject = (const unsigned char *)subject + start_offset;
1784     end_subject = (const unsigned char *)subject + length;
1785     req_byte_ptr = current_subject - 1;
1786    
1787     utf8 = (re->options & PCRE_UTF8) != 0;
1788    
1789 nigel 87 anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
1790     (re->options & PCRE_ANCHORED) != 0;
1791    
1792 nigel 77 /* The remaining fixed data for passing around. */
1793    
1794     match_block.start_code = (const uschar *)argument_re +
1795     re->name_table_offset + re->name_count * re->name_entry_size;
1796     match_block.start_subject = (const unsigned char *)subject;
1797     match_block.end_subject = end_subject;
1798     match_block.moptions = options;
1799     match_block.poptions = re->options;
1800    
1801     /* Check a UTF-8 string if required. Unfortunately there's no way of passing
1802     back the character offset. */
1803    
1804     #ifdef SUPPORT_UTF8
1805     if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
1806     {
1807     if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
1808     return PCRE_ERROR_BADUTF8;
1809     if (start_offset > 0 && start_offset < length)
1810     {
1811     int tb = ((uschar *)subject)[start_offset];
1812     if (tb > 127)
1813     {
1814     tb &= 0xc0;
1815     if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;
1816     }
1817     }
1818     }
1819     #endif
1820    
1821     /* If the exec call supplied NULL for tables, use the inbuilt ones. This
1822     is a feature that makes it possible to save compiled regex and re-use them
1823     in other programs later. */
1824    
1825     if (match_block.tables == NULL) match_block.tables = _pcre_default_tables;
1826    
1827     /* The lower casing table and the "must be at the start of a line" flag are
1828     used in a loop when finding where to start. */
1829    
1830     lcc = match_block.tables + lcc_offset;
1831     startline = (re->options & PCRE_STARTLINE) != 0;
1832     firstline = (re->options & PCRE_FIRSTLINE) != 0;
1833    
1834     /* Set up the first character to match, if available. The first_byte value is
1835     never set for an anchored regular expression, but the anchoring may be forced
1836     at run time, so we have to test for anchoring. The first char may be unset for
1837     an unanchored pattern, of course. If there's no first char and the pattern was
1838     studied, there may be a bitmap of possible first characters. */
1839    
1840     if (!anchored)
1841     {
1842     if ((re->options & PCRE_FIRSTSET) != 0)
1843     {
1844     first_byte = re->first_byte & 255;
1845     if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
1846     first_byte = lcc[first_byte];
1847     }
1848     else
1849     {
1850     if (startline && study != NULL &&
1851     (study->options & PCRE_STUDY_MAPPED) != 0)
1852     start_bits = study->start_bits;
1853     }
1854     }
1855    
1856     /* For anchored or unanchored matches, there may be a "last known required
1857     character" set. */
1858    
1859     if ((re->options & PCRE_REQCHSET) != 0)
1860     {
1861     req_byte = re->req_byte & 255;
1862     req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
1863     req_byte2 = (match_block.tables + fcc_offset)[req_byte]; /* case flipped */
1864     }
1865    
1866     /* Call the main matching function, looping for a non-anchored regex after a
1867     failed match. Unless restarting, optimize by moving to the first match
1868     character if possible, when not anchored. Then unless wanting a partial match,
1869     check for a required later character. */
1870    
1871     for (;;)
1872     {
1873     int rc;
1874    
1875     if ((options & PCRE_DFA_RESTART) == 0)
1876     {
1877     const uschar *save_end_subject = end_subject;
1878    
1879     /* Advance to a unique first char if possible. If firstline is TRUE, the
1880     start of the match is constrained to the first line of a multiline string.
1881 nigel 87 Implement this by temporarily adjusting end_subject so that we stop
1882     scanning at a newline. If the match fails at the newline, later code breaks
1883     this loop. */
1884 nigel 77
1885     if (firstline)
1886     {
1887     const uschar *t = current_subject;
1888     while (t < save_end_subject && *t != '\n') t++;
1889     end_subject = t;
1890     }
1891    
1892     if (first_byte >= 0)
1893     {
1894     if (first_byte_caseless)
1895     while (current_subject < end_subject &&
1896     lcc[*current_subject] != first_byte)
1897     current_subject++;
1898     else
1899     while (current_subject < end_subject && *current_subject != first_byte)
1900     current_subject++;
1901     }
1902    
1903     /* Or to just after \n for a multiline match if possible */
1904    
1905     else if (startline)
1906     {
1907     if (current_subject > match_block.start_subject + start_offset)
1908     {
1909     while (current_subject < end_subject && current_subject[-1] != NEWLINE)
1910     current_subject++;
1911     }
1912     }
1913    
1914     /* Or to a non-unique first char after study */
1915    
1916     else if (start_bits != NULL)
1917     {
1918     while (current_subject < end_subject)
1919     {
1920     register unsigned int c = *current_subject;
1921     if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
1922     else break;
1923     }
1924     }
1925    
1926     /* Restore fudged end_subject */
1927    
1928     end_subject = save_end_subject;
1929     }
1930    
1931     /* If req_byte is set, we know that that character must appear in the subject
1932     for the match to succeed. If the first character is set, req_byte must be
1933     later in the subject; otherwise the test starts at the match point. This
1934     optimization can save a huge amount of work in patterns with nested unlimited
1935     repeats that aren't going to match. Writing separate code for cased/caseless
1936     versions makes it go faster, as does using an autoincrement and backing off
1937     on a match.
1938    
1939     HOWEVER: when the subject string is very, very long, searching to its end can
1940     take a long time, and give bad performance on quite ordinary patterns. This
1941     showed up when somebody was matching /^C/ on a 32-megabyte string... so we
1942     don't do this when the string is sufficiently long.
1943    
1944     ALSO: this processing is disabled when partial matching is requested.
1945     */
1946    
1947     if (req_byte >= 0 &&
1948     end_subject - current_subject < REQ_BYTE_MAX &&
1949     (options & PCRE_PARTIAL) == 0)
1950     {
1951     register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
1952    
1953     /* We don't need to repeat the search if we haven't yet reached the
1954     place we found it at last time. */
1955    
1956     if (p > req_byte_ptr)
1957     {
1958     if (req_byte_caseless)
1959     {
1960     while (p < end_subject)
1961     {
1962     register int pp = *p++;
1963     if (pp == req_byte || pp == req_byte2) { p--; break; }
1964     }
1965     }
1966     else
1967     {
1968     while (p < end_subject)
1969     {
1970     if (*p++ == req_byte) { p--; break; }
1971     }
1972     }
1973    
1974     /* If we can't find the required character, break the matching loop,
1975     which will cause a return or PCRE_ERROR_NOMATCH. */
1976    
1977     if (p >= end_subject) break;
1978    
1979     /* If we have found the required character, save the point where we
1980     found it, so that we don't search again next time round the loop if
1981     the start hasn't passed this character yet. */
1982    
1983     req_byte_ptr = p;
1984     }
1985     }
1986    
1987     /* OK, now we can do the business */
1988    
1989     rc = internal_dfa_exec(
1990     &match_block, /* fixed match data */
1991     match_block.start_code, /* this subexpression's code */
1992     current_subject, /* where we currently are */
1993     start_offset, /* start offset in subject */
1994     offsets, /* offset vector */
1995     offsetcount, /* size of same */
1996     workspace, /* workspace vector */
1997     wscount, /* size of same */
1998     re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
1999     0, /* function recurse level */
2000     0); /* regex recurse level */
2001    
2002     /* Anything other than "no match" means we are done, always; otherwise, carry
2003     on only if not anchored. */
2004    
2005     if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;
2006    
2007     /* Advance to the next subject character unless we are at the end of a line
2008     and firstline is set. */
2009    
2010     if (firstline && *current_subject == NEWLINE) break;
2011     current_subject++;
2012    
2013     #ifdef SUPPORT_UTF8
2014     if (utf8)
2015     {
2016     while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
2017     current_subject++;
2018     }
2019     #endif
2020    
2021     if (current_subject > end_subject) break;
2022     }
2023    
2024     return PCRE_ERROR_NOMATCH;
2025     }
2026    
2027     /* End of pcre_dfa_exec.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12