/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 675 - (hide annotations) (download)
Sat Aug 27 10:18:46 2011 UTC (3 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 30906 byte(s)
Change pcre_assign_jit_callback to pcre_assign_jit_stack.

1 ph10 667 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Main Library written by Philip Hazel
9     Copyright (c) 1997-2011 University of Cambridge
10    
11     This JIT compiler regression test program was written by Zoltan Herczeg
12     Copyright (c) 2010-2011
13    
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43     #include <stdio.h>
44     #include <string.h>
45     #include <time.h>
46     #include "pcre.h"
47    
48     #define PCRE_BUG 0x80000000
49    
50     /*
51     Hungarian utf8 characters
52     \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')
53     \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')
54     \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)
55     \xc2\x85 = 0x85 (NExt Line = NEL)
56     \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)
57     \xe2\x80\xa8 = 0x2028 (Line Separator)
58     \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)
59     \xcc\x8d = 781 (Something with Mark property)
60     */
61    
62     static void setstack(pcre_extra *extra);
63     static void regression_tests(void);
64    
65     int main(void)
66     {
67     regression_tests();
68     return 0;
69     }
70    
71     static pcre_jit_stack* callback(void *arg)
72     {
73     return (pcre_jit_stack *)arg;
74     }
75    
76     static void setstack(pcre_extra *extra)
77     {
78     static pcre_jit_stack *stack;
79     if (stack) pcre_jit_stack_free(stack);
80     stack = pcre_jit_stack_alloc(1, 1024 * 1024);
81 ph10 675 pcre_assign_jit_stack(extra, callback, stack);
82 ph10 667 }
83    
84     /* --------------------------------------------------------------------------------------- */
85    
86     #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
87     #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
88     #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
89     #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
90     #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
91     #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
92     #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
93    
94     struct regression_test_case {
95     int flags;
96     int start_offset;
97     const char *pattern;
98     const char *input;
99     };
100    
101     static struct regression_test_case regression_test_cases[] = {
102     /* Constant strings. */
103     { MUA, 0, "AbC", "AbAbC" },
104     { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
105     { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
106     { MA, 0, "[^a]", "aAbB" },
107     { CMA, 0, "[^m]", "mMnN" },
108     { MA, 0, "a[^b][^#]", "abacd" },
109     { CMA, 0, "A[^B][^E]", "abacd" },
110     { CMUA, 0, "[^x][^#]", "XxBll" },
111     { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
112     { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
113     { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
114     { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
115     { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
116     { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
117     { MUA, 0, "[axd]", "sAXd" },
118     { CMUA, 0, "[axd]", "sAXd" },
119     { CMUA, 0, "[^axd]", "DxA" },
120     { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
121     { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
122     { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
123     { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
124     { MUA, 0, "[^a]", "\xc2\x80[]" },
125     { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
126     { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
127     { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
128     { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
129     { PCRE_CASELESS, 0, "a1", "Aa1" },
130    
131     /* Assertions. */
132     { MUA, 0, "\\b[^A]", "A_B#" },
133     { MA, 0, "\\b\\W", "\n*" },
134     { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
135     { MAP, 0, "\\B", "_\xa1" },
136     { MAP, 0, "\\b_\\b[,A]\\B", "_," },
137     { MUAP, 0, "\\b", "\xe6\x92\xad!" },
138     { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
139     { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
140     { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
141     { MUA, 0, "\\b.", "\xcd\xbe" },
142     { MA, 0, "\\R^", "\n" },
143     { MA, 1, "^", "\n" },
144     { 0, 0, "^ab", "ab" },
145     { 0, 0, "^ab", "aab" },
146     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
147     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
148     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
149     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
150     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
151     { 0, 0, "ab$", "ab" },
152     { 0, 0, "ab$", "ab\r\n" },
153     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
154     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
155     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
156     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
157     { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
158     { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
159     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
160     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },
161     { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
162     { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
163     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
164     { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
165     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
166     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
167     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
168     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
169     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
170     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
171     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
172     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
173     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
174     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
175     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
176     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
177     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
178     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
179     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
180     { MA, 0, "\\Aa", "aaa" },
181     { MA, 1, "\\Aa", "aaa" },
182     { MA, 1, "\\Ga", "aaa" },
183     { MA, 1, "\\Ga", "aba" },
184     { MA, 0, "a\\z", "aaa" },
185     { MA, 0, "a\\z", "aab" },
186    
187     /* Brackets. */
188     { MUA, 0, "(ab|bb|cd)", "bacde" },
189     { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
190     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
191     { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
192     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
193     { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
194    
195     /* Greedy and non-greedy ? operators. */
196     { MUA, 0, "(?:a)?a", "laab" },
197     { CMUA, 0, "(A)?A", "llaab" },
198     { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
199     { MUA, 0, "(a)?a", "manm" },
200     { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
201     { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
202     { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
203    
204     /* Greedy and non-greedy + operators */
205     { MUA, 0, "(aa)+aa", "aaaaaaa" },
206     { MUA, 0, "(aa)+?aa", "aaaaaaa" },
207     { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
208     { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
209     { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
210     { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
211     { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
212    
213     /* Greedy and non-greedy * operators */
214     { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
215     { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
216     { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
217     { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
218     { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
219     { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
220     { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
221     { MA, 0, "((?:a|)*){0}a", "a" },
222    
223     /* Combining ? + * operators */
224     { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
225     { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
226     { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
227     { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
228     { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
229    
230     /* Single character iterators. */
231     { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
232     { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
233     { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
234     { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
235     { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
236     { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
237     { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
238     { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
239     { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
240     { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
241     { MUA, 0, "(a?+[^b])+", "babaacacb" },
242     { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
243     { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
244     { CMUA, 0, "[c-f]+k", "DemmFke" },
245     { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
246     { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
247     { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
248     { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
249     { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
250     { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
251     { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
252     { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
253     { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
254     { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
255     { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
256     { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
257     { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
258    
259     /* Basic character sets. */
260     { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
261     { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
262     { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
263     { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
264     { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
265     { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
266    
267     /* Unicode properties. */
268     { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
269     { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
270     { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
271     { MUAP, 0, "[\\P{Any}]", "abc" },
272     { MUAP, 0, "[^\\p{Any}]", "abc" },
273     { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
274     { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
275     { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
276     { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
277     { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
278     { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
279     { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
280     { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
281     { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
282     { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
283     { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
284     { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
285     { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
286     { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
287     { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
288    
289     /* Possible empty brackets. */
290     { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
291     { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
292     { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
293     { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
294     { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
295     { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
296     { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
297     { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
298     { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
299     { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
300    
301     /* Start offset. */
302     { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
303     { MUA, 4, "(\\w\\W\\w)+", "ab#d" },
304     { MUA, 2, "(\\w\\W\\w)+", "ab#d" },
305     { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
306    
307     /* Newline. */
308     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
309     { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
310     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
311    
312     /* Any character except newline or any newline. */
313     { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
314     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
315     { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
316     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
317     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
318     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },
319     { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
320     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
321     { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
322     { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
323     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
324     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
325     { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
326     { MUA, 0, "\\R+", "ab" },
327     { MUA, 0, "\\R+", "ab\r\n\r" },
328     { MUA, 0, "\\R*", "ab\r\n\r" },
329     { MUA, 0, "\\R*", "\r\n\r" },
330     { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
331     { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
332     { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
333     { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
334     { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },
335     { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
336     { MUA, 0, "\\R*\\R\\R", "\n\r" },
337     { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },
338     { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
339    
340     /* Atomic groups (no fallback from "next" direction). */
341     { MUA, 0, "(?>ab)ab", "bab" },
342     { MUA, 0, "(?>(ab))ab", "bab" },
343     { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
344     "bababcdedefgheijijklmlmnop" },
345     { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
346     { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
347     { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
348     { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
349     { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
350     { MUA, 0, "(?>x|)*$", "aaa" },
351     { MUA, 0, "(?>(x)|)*$", "aaa" },
352     { MUA, 0, "(?>x|())*$", "aaa" },
353     { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
354     { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
355     { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
356     { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
357     { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
358     { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
359     { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
360     { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
361     { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
362     { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
363     { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
364     { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
365     { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
366     { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
367     { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
368     { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
369     { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
370     { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },
371     { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
372     { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
373     { MUA, 0, "\\X{2,4}", "abcdef" },
374     { MUA, 0, "\\X{2,4}?", "abcdef" },
375     { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },
376     { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
377     { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
378     { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
379    
380     /* Possessive quantifiers. */
381     { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
382     { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
383     { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
384     { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
385     { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
386     { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
387     { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
388     { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
389     { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
390     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
391     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
392     { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
393     { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
394     { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
395     { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
396     { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
397     { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
398     { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
399     { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
400     { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
401     { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
402     { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
403     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
404     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
405     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
406     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
407     { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
408     { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
409     { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
410     { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
411     { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
412     { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
413     { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
414     { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
415     { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
416    
417     /* Back references. */
418     { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
419     { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
420     { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
421     { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
422     { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
423     { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
424     { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
425     { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
426     { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
427     { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
428     { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
429     { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
430     { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
431     { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
432     { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
433     { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
434     { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
435     { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
436     { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
437     { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
438     { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
439     { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
440    
441     /* Assertions. */
442     { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
443     { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
444     { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
445     { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
446     { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
447     { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
448     { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
449     { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
450     { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
451     { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
452     { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
453     { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
454     { MUA, 0, "((?(?=a)a)+k)", "bbak" },
455     { MUA, 0, "(?=(?>(a))m)amk", "a k" },
456     { MUA, 0, "(?!(?>(a))m)amk", "a k" },
457     { MUA, 0, "(?>(?=(a))am)amk", "a k" },
458     { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
459     { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
460     { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
461     { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
462     { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
463     { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
464     { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
465     { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
466    
467     /* Not empty, ACCEPT, FAIL */
468     { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },
469     { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
470     { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
471     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
472     { MUA, 0, "a(*ACCEPT)b", "ab" },
473     { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },
474     { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
475     { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
476     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },
477     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
478     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
479     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
480     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },
481     { MUA, 0, "((a(*ACCEPT)b))", "ab" },
482     { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
483     { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
484     { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
485     { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
486     { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
487    
488     /* Conditional blocks. */
489     { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
490     { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
491     { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
492     { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
493     { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
494     { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
495     { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
496     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
497     { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
498     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
499     { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
500     { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
501     { MUA, 0, "(?(?=a)ab)", "a" },
502     { MUA, 0, "(?(?<!b)c)", "b" },
503     { MUA, 0, "(?(DEFINE)a(b))", "a" },
504     { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
505     { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
506     { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
507     { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
508     { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
509     { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
510     { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
511     { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
512     { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
513     { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
514     { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
515     { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
516     { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
517     { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
518     { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
519     { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
520    
521     /* Set start of match. */
522     { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
523     { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
524     { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
525     { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },
526     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
527    
528     /* First line. */
529     { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
530     { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },
531     { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
532     { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },
533     { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },
534     { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },
535     { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },
536     { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },
537     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },
538     { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },
539     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },
540     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
541     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
542     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
543     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },
544     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },
545     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
546    
547     /* Recurse. */
548     { MUA, 0, "(a)(?1)", "aa" },
549     { MUA, 0, "((a))(?1)", "aa" },
550     { MUA, 0, "(b|a)(?1)", "aa" },
551     { MUA, 0, "(b|(a))(?1)", "aa" },
552     { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },
553     { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
554     { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
555     { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
556     { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
557     { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
558     { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
559     { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
560     { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
561     { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
562     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
563     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
564     { MUA, 0, "b|<(?R)*>", "<<b>" },
565     { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
566     { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
567    
568     /* Deep recursion. */
569     { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
570     { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aaa+ " },
571    
572     { 0, 0, NULL, NULL }
573     };
574    
575     static void regression_tests(void)
576     {
577     pcre *re;
578     struct regression_test_case *current = regression_test_cases;
579     const char *error;
580     pcre_extra *extra;
581     int ovector1[32];
582     int ovector2[32];
583     int return_value1, return_value2;
584     int i, err_offs;
585     int total = 0, succesful = 0;
586     int counter = 0;
587    
588     printf("Running JIT regression tests:\n");
589     while (current->pattern) {
590     /* printf("\nPattern: %s :", current->pattern); */
591     total++;
592    
593     error = NULL;
594     re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_BUG), &error, &err_offs, NULL);
595    
596     if (!re) {
597     printf("\nCannot compile pattern: %s\n", current->pattern);
598     current++;
599     continue;
600     }
601    
602     error = NULL;
603     extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
604     if (!extra) {
605     printf("\nCannot study pattern: %s\n", current->pattern);
606     current++;
607     continue;
608     }
609    
610     if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
611     printf("\nJIT compiler does not support: %s\n", current->pattern);
612     current++;
613     continue;
614     }
615    
616     counter++;
617     if ((counter & 0x3) != 0)
618     setstack(extra);
619    
620     for (i = 0; i < 32; ++i)
621     ovector1[i] = -2;
622     return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);
623    
624     for (i = 0; i < 32; ++i)
625     ovector2[i] = -2;
626     return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);
627    
628     /* If PCRE_BUG is set, just run the test, but do not compare the results.
629     Segfaults can still be captured. */
630     if (!(current->flags & PCRE_BUG)) {
631     if (return_value1 != return_value2) {
632     printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);
633     current++;
634     continue;
635     }
636    
637     if (return_value1 >= 0) {
638     return_value1 *= 2;
639     err_offs = 0;
640     for (i = 0; i < return_value1; ++i)
641     if (ovector1[i] != ovector2[i]) {
642     printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);
643     err_offs = 1;
644     }
645     if (err_offs) {
646     current++;
647     continue;
648     }
649     }
650     }
651    
652     pcre_free_study(extra);
653     pcre_free(re);
654    
655     /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
656     printf(".");
657     fflush(stdout);
658     current++;
659     succesful++;
660     }
661    
662     if (total == succesful)
663     printf("\nAll JIT regression tests are successfully passed.\n");
664     else
665     printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
666     }
667    
668     /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12