/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 698 - (hide annotations) (download)
Mon Sep 19 15:43:02 2011 UTC (2 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 32131 byte(s)
Revised pcre_jit_test that runs OK with or without UTF-8 and/or UCP.

1 ph10 667 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Main Library written by Philip Hazel
9     Copyright (c) 1997-2011 University of Cambridge
10    
11     This JIT compiler regression test program was written by Zoltan Herczeg
12     Copyright (c) 2010-2011
13    
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43 ph10 698 #ifdef HAVE_CONFIG_H
44     #include "config.h"
45     #endif
46    
47 ph10 667 #include <stdio.h>
48     #include <string.h>
49     #include "pcre.h"
50    
51     #define PCRE_BUG 0x80000000
52    
53     /*
54     Hungarian utf8 characters
55     \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')
56     \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')
57     \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)
58     \xc2\x85 = 0x85 (NExt Line = NEL)
59     \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)
60     \xe2\x80\xa8 = 0x2028 (Line Separator)
61     \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)
62     \xcc\x8d = 781 (Something with Mark property)
63 ph10 691 */
64 ph10 667
65     static void setstack(pcre_extra *extra);
66 ph10 677 static int regression_tests(void);
67 ph10 667
68     int main(void)
69     {
70 ph10 698 int jit = 0;
71     pcre_config(PCRE_CONFIG_JIT, &jit);
72     if (!jit) {
73     printf("JIT must be enabled to run pcre_jit_test\n");
74     return 1;
75     }
76     return regression_tests();
77 ph10 667 }
78    
79     static pcre_jit_stack* callback(void *arg)
80     {
81     return (pcre_jit_stack *)arg;
82     }
83    
84     static void setstack(pcre_extra *extra)
85     {
86     static pcre_jit_stack *stack;
87     if (stack) pcre_jit_stack_free(stack);
88     stack = pcre_jit_stack_alloc(1, 1024 * 1024);
89 ph10 675 pcre_assign_jit_stack(extra, callback, stack);
90 ph10 667 }
91    
92     /* --------------------------------------------------------------------------------------- */
93    
94     #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
95     #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
96     #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
97     #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
98     #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
99     #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
100     #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
101    
102     struct regression_test_case {
103     int flags;
104     int start_offset;
105     const char *pattern;
106     const char *input;
107     };
108    
109     static struct regression_test_case regression_test_cases[] = {
110     /* Constant strings. */
111     { MUA, 0, "AbC", "AbAbC" },
112     { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
113     { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
114     { MA, 0, "[^a]", "aAbB" },
115     { CMA, 0, "[^m]", "mMnN" },
116     { MA, 0, "a[^b][^#]", "abacd" },
117     { CMA, 0, "A[^B][^E]", "abacd" },
118     { CMUA, 0, "[^x][^#]", "XxBll" },
119     { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
120     { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
121     { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
122     { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
123     { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
124     { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
125     { MUA, 0, "[axd]", "sAXd" },
126     { CMUA, 0, "[axd]", "sAXd" },
127     { CMUA, 0, "[^axd]", "DxA" },
128     { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
129     { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
130     { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
131     { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
132 ph10 698 { MUA, 0, "[^a]", "\xc2\x80[]" },
133 ph10 667 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
134     { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
135     { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
136     { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
137     { PCRE_CASELESS, 0, "a1", "Aa1" },
138    
139     /* Assertions. */
140     { MUA, 0, "\\b[^A]", "A_B#" },
141     { MA, 0, "\\b\\W", "\n*" },
142     { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
143     { MAP, 0, "\\B", "_\xa1" },
144     { MAP, 0, "\\b_\\b[,A]\\B", "_," },
145     { MUAP, 0, "\\b", "\xe6\x92\xad!" },
146     { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
147     { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
148     { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
149     { MUA, 0, "\\b.", "\xcd\xbe" },
150     { MA, 0, "\\R^", "\n" },
151     { MA, 1, "^", "\n" },
152     { 0, 0, "^ab", "ab" },
153     { 0, 0, "^ab", "aab" },
154     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
155     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
156     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
157     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
158     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
159     { 0, 0, "ab$", "ab" },
160     { 0, 0, "ab$", "ab\r\n" },
161     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
162     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
163     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
164     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
165     { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
166     { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
167     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
168     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },
169     { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
170     { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
171     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
172     { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
173     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
174     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
175     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
176     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
177     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
178     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
179     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
180     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
181     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
182     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
183     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
184     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
185     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
186     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
187     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
188     { MA, 0, "\\Aa", "aaa" },
189     { MA, 1, "\\Aa", "aaa" },
190     { MA, 1, "\\Ga", "aaa" },
191     { MA, 1, "\\Ga", "aba" },
192     { MA, 0, "a\\z", "aaa" },
193     { MA, 0, "a\\z", "aab" },
194    
195     /* Brackets. */
196     { MUA, 0, "(ab|bb|cd)", "bacde" },
197     { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
198     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
199     { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
200     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
201     { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
202    
203     /* Greedy and non-greedy ? operators. */
204     { MUA, 0, "(?:a)?a", "laab" },
205     { CMUA, 0, "(A)?A", "llaab" },
206     { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
207     { MUA, 0, "(a)?a", "manm" },
208     { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
209     { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
210     { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
211    
212     /* Greedy and non-greedy + operators */
213     { MUA, 0, "(aa)+aa", "aaaaaaa" },
214     { MUA, 0, "(aa)+?aa", "aaaaaaa" },
215     { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
216     { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
217     { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
218     { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
219     { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
220    
221     /* Greedy and non-greedy * operators */
222     { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
223     { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
224     { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
225     { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
226     { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
227     { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
228     { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
229     { MA, 0, "((?:a|)*){0}a", "a" },
230    
231     /* Combining ? + * operators */
232     { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
233     { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
234     { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
235     { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
236     { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
237    
238     /* Single character iterators. */
239     { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
240     { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
241     { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
242     { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
243     { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
244     { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
245     { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
246     { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
247     { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
248     { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
249     { MUA, 0, "(a?+[^b])+", "babaacacb" },
250     { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
251     { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
252     { CMUA, 0, "[c-f]+k", "DemmFke" },
253     { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
254     { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
255     { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
256     { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
257     { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
258     { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
259     { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
260     { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
261     { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
262     { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
263     { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
264     { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
265     { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
266    
267     /* Basic character sets. */
268     { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
269     { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
270     { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
271     { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
272     { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
273     { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
274    
275     /* Unicode properties. */
276     { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
277     { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
278     { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
279     { MUAP, 0, "[\\P{Any}]", "abc" },
280     { MUAP, 0, "[^\\p{Any}]", "abc" },
281     { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
282     { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
283     { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
284     { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
285     { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
286     { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
287     { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
288     { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
289     { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
290     { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
291     { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
292     { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
293     { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
294     { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
295     { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
296    
297     /* Possible empty brackets. */
298     { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
299     { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
300     { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
301     { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
302     { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
303     { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
304     { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
305     { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
306     { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
307     { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
308    
309     /* Start offset. */
310     { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
311     { MUA, 4, "(\\w\\W\\w)+", "ab#d" },
312     { MUA, 2, "(\\w\\W\\w)+", "ab#d" },
313     { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
314    
315     /* Newline. */
316     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
317     { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
318     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
319    
320     /* Any character except newline or any newline. */
321     { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
322     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
323     { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
324     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
325     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
326     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },
327     { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
328     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
329     { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
330     { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
331     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
332     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
333     { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
334     { MUA, 0, "\\R+", "ab" },
335     { MUA, 0, "\\R+", "ab\r\n\r" },
336     { MUA, 0, "\\R*", "ab\r\n\r" },
337     { MUA, 0, "\\R*", "\r\n\r" },
338     { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
339     { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
340     { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
341     { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
342     { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },
343     { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
344     { MUA, 0, "\\R*\\R\\R", "\n\r" },
345     { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },
346     { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
347    
348     /* Atomic groups (no fallback from "next" direction). */
349     { MUA, 0, "(?>ab)ab", "bab" },
350     { MUA, 0, "(?>(ab))ab", "bab" },
351     { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
352     "bababcdedefgheijijklmlmnop" },
353     { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
354     { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
355     { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
356     { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
357     { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
358     { MUA, 0, "(?>x|)*$", "aaa" },
359     { MUA, 0, "(?>(x)|)*$", "aaa" },
360     { MUA, 0, "(?>x|())*$", "aaa" },
361     { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
362     { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
363     { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
364     { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
365     { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
366     { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
367     { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
368     { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
369     { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
370     { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
371     { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
372     { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
373     { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
374     { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
375     { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
376     { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
377     { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
378     { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },
379     { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
380     { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
381     { MUA, 0, "\\X{2,4}", "abcdef" },
382     { MUA, 0, "\\X{2,4}?", "abcdef" },
383     { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },
384     { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
385     { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
386     { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
387    
388 ph10 698 /* Possessive quantifiers. */
389     { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
390     { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
391     { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
392     { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
393     { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
394     { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
395 ph10 667 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
396     { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
397     { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
398     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
399     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
400     { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
401     { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
402     { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
403     { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
404 ph10 698 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
405     { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
406     { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
407     { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
408     { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
409     { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
410 ph10 667 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
411     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
412     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
413     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
414     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
415     { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
416     { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
417     { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
418     { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
419     { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
420     { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
421     { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
422     { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
423     { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
424    
425     /* Back references. */
426     { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
427     { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
428     { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
429     { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
430     { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
431     { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
432     { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
433     { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
434     { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
435     { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
436     { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
437     { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
438     { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
439     { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
440     { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
441     { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
442     { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
443     { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
444     { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
445     { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
446     { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
447     { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
448    
449     /* Assertions. */
450     { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
451     { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
452     { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
453     { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
454     { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
455     { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
456     { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
457     { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
458     { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
459     { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
460     { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
461     { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
462     { MUA, 0, "((?(?=a)a)+k)", "bbak" },
463     { MUA, 0, "(?=(?>(a))m)amk", "a k" },
464     { MUA, 0, "(?!(?>(a))m)amk", "a k" },
465     { MUA, 0, "(?>(?=(a))am)amk", "a k" },
466     { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
467     { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
468     { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
469     { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
470     { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
471     { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
472     { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
473     { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
474    
475     /* Not empty, ACCEPT, FAIL */
476     { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },
477     { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
478     { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
479     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
480     { MUA, 0, "a(*ACCEPT)b", "ab" },
481     { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },
482     { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
483     { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
484     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },
485     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
486     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
487     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
488     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },
489     { MUA, 0, "((a(*ACCEPT)b))", "ab" },
490     { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
491     { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
492     { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
493     { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
494     { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
495    
496     /* Conditional blocks. */
497     { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
498     { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
499     { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
500     { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
501     { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
502     { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
503     { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
504     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
505     { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
506     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
507     { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
508     { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
509     { MUA, 0, "(?(?=a)ab)", "a" },
510     { MUA, 0, "(?(?<!b)c)", "b" },
511     { MUA, 0, "(?(DEFINE)a(b))", "a" },
512     { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
513     { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
514     { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
515     { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
516     { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
517     { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
518     { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
519     { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
520     { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
521     { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
522     { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
523     { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
524     { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
525     { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
526     { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
527     { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
528    
529 ph10 698 /* Set start of match. */
530 ph10 667 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
531     { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
532     { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
533     { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },
534     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
535    
536     /* First line. */
537     { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
538     { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },
539     { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
540     { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },
541     { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },
542     { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },
543     { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },
544     { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },
545     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },
546     { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },
547     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },
548     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
549     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
550     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
551     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },
552     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },
553     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
554    
555     /* Recurse. */
556     { MUA, 0, "(a)(?1)", "aa" },
557     { MUA, 0, "((a))(?1)", "aa" },
558     { MUA, 0, "(b|a)(?1)", "aa" },
559     { MUA, 0, "(b|(a))(?1)", "aa" },
560     { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },
561     { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
562     { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
563     { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
564     { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
565     { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
566     { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
567     { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
568     { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
569     { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
570     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
571     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
572     { MUA, 0, "b|<(?R)*>", "<<b>" },
573     { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
574     { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
575    
576     /* Deep recursion. */
577     { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
578 ph10 698 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
579 ph10 677 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
580 ph10 691
581 ph10 677 /* Deep recursion: Stack limit reached. */
582     { MA, 0, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
583     { MA, 0, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
584     { MA, 0, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
585     { MA, 0, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
586     { MA, 0, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
587 ph10 667
588     { 0, 0, NULL, NULL }
589     };
590    
591 ph10 677 static int regression_tests(void)
592 ph10 667 {
593     pcre *re;
594     struct regression_test_case *current = regression_test_cases;
595     const char *error;
596     pcre_extra *extra;
597 ph10 698 int utf8 = 0, ucp = 0;
598 ph10 667 int ovector1[32];
599     int ovector2[32];
600     int return_value1, return_value2;
601     int i, err_offs;
602     int total = 0, succesful = 0;
603     int counter = 0;
604 ph10 698 int disabled_flags = PCRE_BUG;
605 ph10 667
606 ph10 698 /* This test compares the behaviour of interpreter and JIT. Although disabling
607     utf8 or ucp may make tests fail, if the pcre_exec result is the SAME, it is
608     still considered successful from pcre_jit_test point of view. */
609    
610     pcre_config(PCRE_CONFIG_UTF8, &utf8);
611     pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
612     if (!utf8)
613     disabled_flags |= PCRE_UTF8;
614     if (!ucp)
615     disabled_flags |= PCRE_UCP;
616    
617     printf("Running JIT regression tests with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
618 ph10 667 while (current->pattern) {
619 ph10 698 /* printf("\nPattern: %s :\n", current->pattern); */
620 ph10 667 total++;
621    
622     error = NULL;
623 ph10 698 re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags), &error, &err_offs, NULL);
624 ph10 667
625     if (!re) {
626 ph10 698 if (utf8 && ucp)
627     printf("\nCannot compile pattern: %s\n", current->pattern);
628     else {
629     /* Some patterns cannot be compiled when either of utf8
630     or ucp is disabled. We just skip them. */
631     printf(".");
632     succesful++;
633     }
634 ph10 667 current++;
635     continue;
636     }
637    
638     error = NULL;
639     extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
640     if (!extra) {
641     printf("\nCannot study pattern: %s\n", current->pattern);
642     current++;
643     continue;
644     }
645    
646     if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
647     printf("\nJIT compiler does not support: %s\n", current->pattern);
648     current++;
649     continue;
650     }
651    
652     counter++;
653     if ((counter & 0x3) != 0)
654     setstack(extra);
655    
656     for (i = 0; i < 32; ++i)
657     ovector1[i] = -2;
658     return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);
659    
660     for (i = 0; i < 32; ++i)
661     ovector2[i] = -2;
662     return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);
663    
664     /* If PCRE_BUG is set, just run the test, but do not compare the results.
665     Segfaults can still be captured. */
666     if (!(current->flags & PCRE_BUG)) {
667     if (return_value1 != return_value2) {
668     printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);
669     current++;
670     continue;
671     }
672    
673     if (return_value1 >= 0) {
674     return_value1 *= 2;
675     err_offs = 0;
676     for (i = 0; i < return_value1; ++i)
677     if (ovector1[i] != ovector2[i]) {
678     printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);
679     err_offs = 1;
680     }
681     if (err_offs) {
682     current++;
683     continue;
684     }
685     }
686     }
687    
688     pcre_free_study(extra);
689     pcre_free(re);
690    
691     /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
692     printf(".");
693     fflush(stdout);
694     current++;
695     succesful++;
696     }
697    
698 ph10 677 if (total == succesful) {
699 ph10 667 printf("\nAll JIT regression tests are successfully passed.\n");
700 ph10 677 return 0;
701 ph10 698 } else {
702 ph10 667 printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
703 ph10 698 return 1;
704     }
705 ph10 667 }
706    
707     /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12