/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 691 - (hide annotations) (download)
Sun Sep 11 14:31:21 2011 UTC (3 years, 1 month ago) by ph10
File MIME type: text/plain
File size: 31387 byte(s)
Final source and document tidies for 8.20-RC1.

1 ph10 667 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Main Library written by Philip Hazel
9     Copyright (c) 1997-2011 University of Cambridge
10    
11     This JIT compiler regression test program was written by Zoltan Herczeg
12     Copyright (c) 2010-2011
13    
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43     #include <stdio.h>
44     #include <string.h>
45     #include <time.h>
46     #include "pcre.h"
47    
48     #define PCRE_BUG 0x80000000
49    
50     /*
51     Hungarian utf8 characters
52     \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')
53     \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')
54     \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)
55     \xc2\x85 = 0x85 (NExt Line = NEL)
56     \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)
57     \xe2\x80\xa8 = 0x2028 (Line Separator)
58     \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)
59     \xcc\x8d = 781 (Something with Mark property)
60 ph10 691 */
61 ph10 667
62     static void setstack(pcre_extra *extra);
63 ph10 677 static int regression_tests(void);
64 ph10 667
65     int main(void)
66     {
67 ph10 677 return regression_tests();
68 ph10 667 }
69    
70     static pcre_jit_stack* callback(void *arg)
71     {
72     return (pcre_jit_stack *)arg;
73     }
74    
75     static void setstack(pcre_extra *extra)
76     {
77     static pcre_jit_stack *stack;
78     if (stack) pcre_jit_stack_free(stack);
79     stack = pcre_jit_stack_alloc(1, 1024 * 1024);
80 ph10 675 pcre_assign_jit_stack(extra, callback, stack);
81 ph10 667 }
82    
83     /* --------------------------------------------------------------------------------------- */
84    
85     #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
86     #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
87     #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
88     #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
89     #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
90     #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
91     #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
92    
93     struct regression_test_case {
94     int flags;
95     int start_offset;
96     const char *pattern;
97     const char *input;
98     };
99    
100     static struct regression_test_case regression_test_cases[] = {
101     /* Constant strings. */
102     { MUA, 0, "AbC", "AbAbC" },
103     { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
104     { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
105     { MA, 0, "[^a]", "aAbB" },
106     { CMA, 0, "[^m]", "mMnN" },
107     { MA, 0, "a[^b][^#]", "abacd" },
108     { CMA, 0, "A[^B][^E]", "abacd" },
109     { CMUA, 0, "[^x][^#]", "XxBll" },
110     { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
111     { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
112     { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
113     { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
114     { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
115     { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
116     { MUA, 0, "[axd]", "sAXd" },
117     { CMUA, 0, "[axd]", "sAXd" },
118     { CMUA, 0, "[^axd]", "DxA" },
119     { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
120     { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
121     { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
122     { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
123     { MUA, 0, "[^a]", "\xc2\x80[]" },
124     { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
125     { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
126     { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
127     { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
128     { PCRE_CASELESS, 0, "a1", "Aa1" },
129    
130     /* Assertions. */
131     { MUA, 0, "\\b[^A]", "A_B#" },
132     { MA, 0, "\\b\\W", "\n*" },
133     { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
134     { MAP, 0, "\\B", "_\xa1" },
135     { MAP, 0, "\\b_\\b[,A]\\B", "_," },
136     { MUAP, 0, "\\b", "\xe6\x92\xad!" },
137     { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
138     { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
139     { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
140     { MUA, 0, "\\b.", "\xcd\xbe" },
141     { MA, 0, "\\R^", "\n" },
142     { MA, 1, "^", "\n" },
143     { 0, 0, "^ab", "ab" },
144     { 0, 0, "^ab", "aab" },
145     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
146     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
147     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
148     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
149     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
150     { 0, 0, "ab$", "ab" },
151     { 0, 0, "ab$", "ab\r\n" },
152     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
153     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
154     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
155     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
156     { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
157     { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
158     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
159     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },
160     { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
161     { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
162     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
163     { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
164     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
165     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
166     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
167     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
168     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
169     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
170     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
171     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
172     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
173     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
174     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
175     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
176     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
177     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
178     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
179     { MA, 0, "\\Aa", "aaa" },
180     { MA, 1, "\\Aa", "aaa" },
181     { MA, 1, "\\Ga", "aaa" },
182     { MA, 1, "\\Ga", "aba" },
183     { MA, 0, "a\\z", "aaa" },
184     { MA, 0, "a\\z", "aab" },
185    
186     /* Brackets. */
187     { MUA, 0, "(ab|bb|cd)", "bacde" },
188     { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
189     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
190     { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
191     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
192     { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
193    
194     /* Greedy and non-greedy ? operators. */
195     { MUA, 0, "(?:a)?a", "laab" },
196     { CMUA, 0, "(A)?A", "llaab" },
197     { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
198     { MUA, 0, "(a)?a", "manm" },
199     { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
200     { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
201     { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
202    
203     /* Greedy and non-greedy + operators */
204     { MUA, 0, "(aa)+aa", "aaaaaaa" },
205     { MUA, 0, "(aa)+?aa", "aaaaaaa" },
206     { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
207     { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
208     { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
209     { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
210     { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
211    
212     /* Greedy and non-greedy * operators */
213     { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
214     { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
215     { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
216     { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
217     { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
218     { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
219     { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
220     { MA, 0, "((?:a|)*){0}a", "a" },
221    
222     /* Combining ? + * operators */
223     { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
224     { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
225     { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
226     { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
227     { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
228    
229     /* Single character iterators. */
230     { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
231     { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
232     { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
233     { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
234     { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
235     { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
236     { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
237     { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
238     { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
239     { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
240     { MUA, 0, "(a?+[^b])+", "babaacacb" },
241     { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
242     { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
243     { CMUA, 0, "[c-f]+k", "DemmFke" },
244     { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
245     { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
246     { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
247     { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
248     { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
249     { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
250     { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
251     { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
252     { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
253     { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
254     { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
255     { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
256     { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
257    
258     /* Basic character sets. */
259     { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
260     { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
261     { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
262     { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
263     { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
264     { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
265    
266     /* Unicode properties. */
267     { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
268     { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
269     { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
270     { MUAP, 0, "[\\P{Any}]", "abc" },
271     { MUAP, 0, "[^\\p{Any}]", "abc" },
272     { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
273     { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
274     { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
275     { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
276     { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
277     { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
278     { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
279     { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
280     { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
281     { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
282     { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
283     { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
284     { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
285     { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
286     { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
287    
288     /* Possible empty brackets. */
289     { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
290     { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
291     { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
292     { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
293     { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
294     { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
295     { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
296     { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
297     { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
298     { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
299    
300     /* Start offset. */
301     { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
302     { MUA, 4, "(\\w\\W\\w)+", "ab#d" },
303     { MUA, 2, "(\\w\\W\\w)+", "ab#d" },
304     { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
305    
306     /* Newline. */
307     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
308     { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
309     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
310    
311     /* Any character except newline or any newline. */
312     { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
313     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
314     { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
315     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
316     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
317     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },
318     { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
319     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
320     { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
321     { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
322     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
323     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
324     { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
325     { MUA, 0, "\\R+", "ab" },
326     { MUA, 0, "\\R+", "ab\r\n\r" },
327     { MUA, 0, "\\R*", "ab\r\n\r" },
328     { MUA, 0, "\\R*", "\r\n\r" },
329     { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
330     { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
331     { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
332     { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
333     { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },
334     { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
335     { MUA, 0, "\\R*\\R\\R", "\n\r" },
336     { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },
337     { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
338    
339     /* Atomic groups (no fallback from "next" direction). */
340     { MUA, 0, "(?>ab)ab", "bab" },
341     { MUA, 0, "(?>(ab))ab", "bab" },
342     { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
343     "bababcdedefgheijijklmlmnop" },
344     { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
345     { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
346     { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
347     { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
348     { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
349     { MUA, 0, "(?>x|)*$", "aaa" },
350     { MUA, 0, "(?>(x)|)*$", "aaa" },
351     { MUA, 0, "(?>x|())*$", "aaa" },
352     { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
353     { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
354     { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
355     { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
356     { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
357     { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
358     { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
359     { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
360     { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
361     { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
362     { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
363     { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
364     { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
365     { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
366     { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
367     { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
368     { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
369     { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },
370     { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
371     { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
372     { MUA, 0, "\\X{2,4}", "abcdef" },
373     { MUA, 0, "\\X{2,4}?", "abcdef" },
374     { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },
375     { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
376     { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
377     { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
378    
379     /* Possessive quantifiers. */
380     { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
381     { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
382     { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
383     { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
384     { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
385     { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
386     { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
387     { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
388     { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
389     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
390     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
391     { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
392     { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
393     { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
394     { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
395     { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
396     { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
397     { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
398     { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
399     { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
400     { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
401     { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
402     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
403     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
404     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
405     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
406     { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
407     { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
408     { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
409     { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
410     { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
411     { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
412     { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
413     { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
414     { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
415    
416     /* Back references. */
417     { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
418     { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
419     { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
420     { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
421     { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
422     { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
423     { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
424     { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
425     { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
426     { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
427     { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
428     { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
429     { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
430     { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
431     { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
432     { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
433     { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
434     { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
435     { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
436     { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
437     { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
438     { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
439    
440     /* Assertions. */
441     { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
442     { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
443     { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
444     { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
445     { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
446     { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
447     { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
448     { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
449     { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
450     { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
451     { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
452     { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
453     { MUA, 0, "((?(?=a)a)+k)", "bbak" },
454     { MUA, 0, "(?=(?>(a))m)amk", "a k" },
455     { MUA, 0, "(?!(?>(a))m)amk", "a k" },
456     { MUA, 0, "(?>(?=(a))am)amk", "a k" },
457     { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
458     { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
459     { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
460     { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
461     { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
462     { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
463     { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
464     { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
465    
466     /* Not empty, ACCEPT, FAIL */
467     { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },
468     { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
469     { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
470     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
471     { MUA, 0, "a(*ACCEPT)b", "ab" },
472     { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },
473     { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
474     { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
475     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },
476     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
477     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
478     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
479     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },
480     { MUA, 0, "((a(*ACCEPT)b))", "ab" },
481     { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
482     { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
483     { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
484     { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
485     { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
486    
487     /* Conditional blocks. */
488     { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
489     { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
490     { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
491     { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
492     { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
493     { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
494     { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
495     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
496     { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
497     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
498     { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
499     { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
500     { MUA, 0, "(?(?=a)ab)", "a" },
501     { MUA, 0, "(?(?<!b)c)", "b" },
502     { MUA, 0, "(?(DEFINE)a(b))", "a" },
503     { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
504     { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
505     { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
506     { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
507     { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
508     { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
509     { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
510     { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
511     { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
512     { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
513     { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
514     { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
515     { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
516     { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
517     { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
518     { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
519    
520     /* Set start of match. */
521     { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
522     { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
523     { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
524     { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },
525     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
526    
527     /* First line. */
528     { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
529     { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },
530     { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
531     { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },
532     { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },
533     { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },
534     { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },
535     { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },
536     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },
537     { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },
538     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },
539     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
540     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
541     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
542     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },
543     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },
544     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
545    
546     /* Recurse. */
547     { MUA, 0, "(a)(?1)", "aa" },
548     { MUA, 0, "((a))(?1)", "aa" },
549     { MUA, 0, "(b|a)(?1)", "aa" },
550     { MUA, 0, "(b|(a))(?1)", "aa" },
551     { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },
552     { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
553     { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
554     { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
555     { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
556     { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
557     { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
558     { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
559     { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
560     { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
561     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
562     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
563     { MUA, 0, "b|<(?R)*>", "<<b>" },
564     { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
565     { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
566    
567     /* Deep recursion. */
568     { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
569 ph10 677 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
570     { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
571 ph10 691
572 ph10 677 /* Deep recursion: Stack limit reached. */
573     { MA, 0, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
574     { MA, 0, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
575     { MA, 0, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
576     { MA, 0, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
577     { MA, 0, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
578 ph10 667
579     { 0, 0, NULL, NULL }
580     };
581    
582 ph10 677 static int regression_tests(void)
583 ph10 667 {
584     pcre *re;
585     struct regression_test_case *current = regression_test_cases;
586     const char *error;
587     pcre_extra *extra;
588     int ovector1[32];
589     int ovector2[32];
590     int return_value1, return_value2;
591     int i, err_offs;
592     int total = 0, succesful = 0;
593     int counter = 0;
594    
595     printf("Running JIT regression tests:\n");
596     while (current->pattern) {
597     /* printf("\nPattern: %s :", current->pattern); */
598     total++;
599    
600     error = NULL;
601     re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_BUG), &error, &err_offs, NULL);
602    
603     if (!re) {
604     printf("\nCannot compile pattern: %s\n", current->pattern);
605     current++;
606     continue;
607     }
608    
609     error = NULL;
610     extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
611     if (!extra) {
612     printf("\nCannot study pattern: %s\n", current->pattern);
613     current++;
614     continue;
615     }
616    
617     if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
618     printf("\nJIT compiler does not support: %s\n", current->pattern);
619     current++;
620     continue;
621     }
622    
623     counter++;
624     if ((counter & 0x3) != 0)
625     setstack(extra);
626    
627     for (i = 0; i < 32; ++i)
628     ovector1[i] = -2;
629     return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);
630    
631     for (i = 0; i < 32; ++i)
632     ovector2[i] = -2;
633     return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);
634    
635     /* If PCRE_BUG is set, just run the test, but do not compare the results.
636     Segfaults can still be captured. */
637     if (!(current->flags & PCRE_BUG)) {
638     if (return_value1 != return_value2) {
639     printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);
640     current++;
641     continue;
642     }
643    
644     if (return_value1 >= 0) {
645     return_value1 *= 2;
646     err_offs = 0;
647     for (i = 0; i < return_value1; ++i)
648     if (ovector1[i] != ovector2[i]) {
649     printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);
650     err_offs = 1;
651     }
652     if (err_offs) {
653     current++;
654     continue;
655     }
656     }
657     }
658    
659     pcre_free_study(extra);
660     pcre_free(re);
661    
662     /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
663     printf(".");
664     fflush(stdout);
665     current++;
666     succesful++;
667     }
668    
669 ph10 677 if (total == succesful) {
670 ph10 667 printf("\nAll JIT regression tests are successfully passed.\n");
671 ph10 677 return 0;
672     } else {
673 ph10 667 printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
674 ph10 677 return 1;
675 ph10 691 }
676 ph10 667 }
677    
678     /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12