/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 736 - (show annotations) (download)
Sun Oct 16 15:48:03 2011 UTC (3 years, 1 month ago) by zherczeg
File MIME type: text/plain
File size: 32241 byte(s)
Support OP_ANYBYTE in JIT when utf8 is disabled and optimizing utf8 character length computation
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Hungarian utf8 characters
55 \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')
56 \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')
57 \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)
58 \xc2\x85 = 0x85 (NExt Line = NEL)
59 \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)
60 \xe2\x80\xa8 = 0x2028 (Line Separator)
61 \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)
62 \xcc\x8d = 781 (Something with Mark property)
63 */
64
65 static void setstack(pcre_extra *extra);
66 static int regression_tests(void);
67
68 int main(void)
69 {
70 int jit = 0;
71 pcre_config(PCRE_CONFIG_JIT, &jit);
72 if (!jit) {
73 printf("JIT must be enabled to run pcre_jit_test\n");
74 return 1;
75 }
76 return regression_tests();
77 }
78
79 static pcre_jit_stack* callback(void *arg)
80 {
81 return (pcre_jit_stack *)arg;
82 }
83
84 static void setstack(pcre_extra *extra)
85 {
86 static pcre_jit_stack *stack;
87 if (stack) pcre_jit_stack_free(stack);
88 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
89 pcre_assign_jit_stack(extra, callback, stack);
90 }
91
92 /* --------------------------------------------------------------------------------------- */
93
94 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
95 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
96 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
97 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
98 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
99 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
100 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
101
102 struct regression_test_case {
103 int flags;
104 int start_offset;
105 const char *pattern;
106 const char *input;
107 };
108
109 static struct regression_test_case regression_test_cases[] = {
110 /* Constant strings. */
111 { MUA, 0, "AbC", "AbAbC" },
112 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
113 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
114 { MA, 0, "[^a]", "aAbB" },
115 { CMA, 0, "[^m]", "mMnN" },
116 { MA, 0, "a[^b][^#]", "abacd" },
117 { CMA, 0, "A[^B][^E]", "abacd" },
118 { CMUA, 0, "[^x][^#]", "XxBll" },
119 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
120 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
121 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
122 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
123 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
124 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
125 { MUA, 0, "[axd]", "sAXd" },
126 { CMUA, 0, "[axd]", "sAXd" },
127 { CMUA, 0, "[^axd]", "DxA" },
128 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
129 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
130 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
131 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
132 { MUA, 0, "[^a]", "\xc2\x80[]" },
133 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
134 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
135 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
136 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
137 { PCRE_CASELESS, 0, "a1", "Aa1" },
138 { MA, 0, "\\Ca", "cda" },
139 { CMA, 0, "\\Ca", "CDA" },
140 { MA, 0, "\\Cx", "cda" },
141 { CMA, 0, "\\Cx", "CDA" },
142
143 /* Assertions. */
144 { MUA, 0, "\\b[^A]", "A_B#" },
145 { MA, 0, "\\b\\W", "\n*" },
146 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
147 { MAP, 0, "\\B", "_\xa1" },
148 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
149 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
150 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
151 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
152 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
153 { MUA, 0, "\\b.", "\xcd\xbe" },
154 { MA, 0, "\\R^", "\n" },
155 { MA, 1, "^", "\n" },
156 { 0, 0, "^ab", "ab" },
157 { 0, 0, "^ab", "aab" },
158 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
159 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
160 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
161 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
162 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
163 { 0, 0, "ab$", "ab" },
164 { 0, 0, "ab$", "ab\r\n" },
165 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
166 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
167 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
168 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
169 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
170 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
171 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
172 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },
173 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
174 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
175 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
176 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
177 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
178 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
179 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
180 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
181 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
182 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
183 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
184 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
185 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
186 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
187 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
188 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
189 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
190 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
191 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
192 { MA, 0, "\\Aa", "aaa" },
193 { MA, 1, "\\Aa", "aaa" },
194 { MA, 1, "\\Ga", "aaa" },
195 { MA, 1, "\\Ga", "aba" },
196 { MA, 0, "a\\z", "aaa" },
197 { MA, 0, "a\\z", "aab" },
198
199 /* Brackets. */
200 { MUA, 0, "(ab|bb|cd)", "bacde" },
201 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
202 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
203 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
204 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
205 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
206
207 /* Greedy and non-greedy ? operators. */
208 { MUA, 0, "(?:a)?a", "laab" },
209 { CMUA, 0, "(A)?A", "llaab" },
210 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
211 { MUA, 0, "(a)?a", "manm" },
212 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
213 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
214 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
215
216 /* Greedy and non-greedy + operators */
217 { MUA, 0, "(aa)+aa", "aaaaaaa" },
218 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
219 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
220 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
221 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
222 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
223 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
224
225 /* Greedy and non-greedy * operators */
226 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
227 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
228 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
229 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
230 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
231 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
232 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
233 { MA, 0, "((?:a|)*){0}a", "a" },
234
235 /* Combining ? + * operators */
236 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
237 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
238 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
239 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
240 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
241
242 /* Single character iterators. */
243 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
244 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
245 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
246 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
247 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
248 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
249 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
250 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
251 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
252 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
253 { MUA, 0, "(a?+[^b])+", "babaacacb" },
254 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
255 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
256 { CMUA, 0, "[c-f]+k", "DemmFke" },
257 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
258 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
259 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
260 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
261 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
262 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
263 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
264 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
265 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
266 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
267 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
268 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
269 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
270
271 /* Basic character sets. */
272 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
273 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
274 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
275 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
276 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
277 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
278
279 /* Unicode properties. */
280 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
281 { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
282 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
283 { MUAP, 0, "[\\P{Any}]", "abc" },
284 { MUAP, 0, "[^\\p{Any}]", "abc" },
285 { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
286 { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
287 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
288 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
289 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
290 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
291 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
292 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
293 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
294 { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
295 { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
296 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
297 { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
298 { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
299 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
300
301 /* Possible empty brackets. */
302 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
303 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
304 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
305 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
306 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
307 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
308 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
309 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
310 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
311 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
312
313 /* Start offset. */
314 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
315 { MUA, 4, "(\\w\\W\\w)+", "ab#d" },
316 { MUA, 2, "(\\w\\W\\w)+", "ab#d" },
317 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
318
319 /* Newline. */
320 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
321 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
322 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
323
324 /* Any character except newline or any newline. */
325 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
326 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
327 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
328 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
329 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
330 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },
331 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
332 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
333 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
334 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
335 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
336 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
337 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
338 { MUA, 0, "\\R+", "ab" },
339 { MUA, 0, "\\R+", "ab\r\n\r" },
340 { MUA, 0, "\\R*", "ab\r\n\r" },
341 { MUA, 0, "\\R*", "\r\n\r" },
342 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
343 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
344 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
345 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
346 { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },
347 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
348 { MUA, 0, "\\R*\\R\\R", "\n\r" },
349 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },
350 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
351
352 /* Atomic groups (no fallback from "next" direction). */
353 { MUA, 0, "(?>ab)ab", "bab" },
354 { MUA, 0, "(?>(ab))ab", "bab" },
355 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
356 "bababcdedefgheijijklmlmnop" },
357 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
358 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
359 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
360 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
361 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
362 { MUA, 0, "(?>x|)*$", "aaa" },
363 { MUA, 0, "(?>(x)|)*$", "aaa" },
364 { MUA, 0, "(?>x|())*$", "aaa" },
365 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
366 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
367 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
368 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
369 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
370 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
371 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
372 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
373 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
374 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
375 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
376 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
377 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
378 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
379 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
380 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
381 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
382 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },
383 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
384 { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
385 { MUA, 0, "\\X{2,4}", "abcdef" },
386 { MUA, 0, "\\X{2,4}?", "abcdef" },
387 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },
388 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
389 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
390 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
391
392 /* Possessive quantifiers. */
393 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
394 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
395 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
396 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
397 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
398 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
399 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
400 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
401 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
402 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
403 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
404 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
405 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
406 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
407 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
408 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
409 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
410 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
411 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
412 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
413 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
414 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
415 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
416 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
417 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
418 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
419 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
420 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
421 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
422 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
423 { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
424 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
425 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
426 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
427 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
428
429 /* Back references. */
430 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
431 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
432 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
433 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
434 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
435 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
436 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
437 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
438 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
439 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
440 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
441 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
442 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
443 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
444 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
445 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
446 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
447 { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
448 { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
449 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
450 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
451 { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
452
453 /* Assertions. */
454 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
455 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
456 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
457 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
458 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
459 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
460 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
461 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
462 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
463 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
464 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
465 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
466 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
467 { MUA, 0, "(?=(?>(a))m)amk", "a k" },
468 { MUA, 0, "(?!(?>(a))m)amk", "a k" },
469 { MUA, 0, "(?>(?=(a))am)amk", "a k" },
470 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
471 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
472 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
473 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
474 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
475 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
476 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
477 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
478
479 /* Not empty, ACCEPT, FAIL */
480 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },
481 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
482 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
483 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
484 { MUA, 0, "a(*ACCEPT)b", "ab" },
485 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },
486 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
487 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
488 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },
489 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
490 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
491 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
492 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },
493 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
494 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
495 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
496 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
497 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
498 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
499
500 /* Conditional blocks. */
501 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
502 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
503 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
504 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
505 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
506 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
507 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
508 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
509 { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
510 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
511 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
512 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
513 { MUA, 0, "(?(?=a)ab)", "a" },
514 { MUA, 0, "(?(?<!b)c)", "b" },
515 { MUA, 0, "(?(DEFINE)a(b))", "a" },
516 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
517 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
518 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
519 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
520 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
521 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
522 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
523 { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
524 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
525 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
526 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
527 { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
528 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
529 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
530 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
531 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
532
533 /* Set start of match. */
534 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
535 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
536 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
537 { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },
538 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
539
540 /* First line. */
541 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
542 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },
543 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
544 { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },
545 { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },
546 { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },
547 { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },
548 { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },
549 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },
550 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },
551 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },
552 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
553 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
554 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
555 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },
556 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },
557 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
558
559 /* Recurse. */
560 { MUA, 0, "(a)(?1)", "aa" },
561 { MUA, 0, "((a))(?1)", "aa" },
562 { MUA, 0, "(b|a)(?1)", "aa" },
563 { MUA, 0, "(b|(a))(?1)", "aa" },
564 { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },
565 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
566 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
567 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
568 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
569 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
570 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
571 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
572 { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
573 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
574 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
575 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
576 { MUA, 0, "b|<(?R)*>", "<<b>" },
577 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
578 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
579
580 /* Deep recursion. */
581 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
582 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
583 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
584
585 /* Deep recursion: Stack limit reached. */
586 { MA, 0, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
587 { MA, 0, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
588 { MA, 0, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
589 { MA, 0, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
590 { MA, 0, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
591
592 { 0, 0, NULL, NULL }
593 };
594
595 static int regression_tests(void)
596 {
597 pcre *re;
598 struct regression_test_case *current = regression_test_cases;
599 const char *error;
600 pcre_extra *extra;
601 int utf8 = 0, ucp = 0;
602 int ovector1[32];
603 int ovector2[32];
604 int return_value1, return_value2;
605 int i, err_offs;
606 int total = 0, succesful = 0;
607 int counter = 0;
608 int disabled_flags = PCRE_BUG;
609
610 /* This test compares the behaviour of interpreter and JIT. Although disabling
611 utf8 or ucp may make tests fail, if the pcre_exec result is the SAME, it is
612 still considered successful from pcre_jit_test point of view. */
613
614 pcre_config(PCRE_CONFIG_UTF8, &utf8);
615 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
616 if (!utf8)
617 disabled_flags |= PCRE_UTF8;
618 if (!ucp)
619 disabled_flags |= PCRE_UCP;
620
621 printf("Running JIT regression tests with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
622 while (current->pattern) {
623 /* printf("\nPattern: %s :\n", current->pattern); */
624 total++;
625
626 error = NULL;
627 re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags), &error, &err_offs, NULL);
628
629 if (!re) {
630 if (utf8 && ucp)
631 printf("\nCannot compile pattern: %s\n", current->pattern);
632 else {
633 /* Some patterns cannot be compiled when either of utf8
634 or ucp is disabled. We just skip them. */
635 printf(".");
636 succesful++;
637 }
638 current++;
639 continue;
640 }
641
642 error = NULL;
643 extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
644 if (!extra) {
645 printf("\nCannot study pattern: %s\n", current->pattern);
646 current++;
647 continue;
648 }
649
650 if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
651 printf("\nJIT compiler does not support: %s\n", current->pattern);
652 current++;
653 continue;
654 }
655
656 counter++;
657 if ((counter & 0x3) != 0)
658 setstack(extra);
659
660 for (i = 0; i < 32; ++i)
661 ovector1[i] = -2;
662 return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);
663
664 for (i = 0; i < 32; ++i)
665 ovector2[i] = -2;
666 return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);
667
668 /* If PCRE_BUG is set, just run the test, but do not compare the results.
669 Segfaults can still be captured. */
670 if (!(current->flags & PCRE_BUG)) {
671 if (return_value1 != return_value2) {
672 printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);
673 current++;
674 continue;
675 }
676
677 if (return_value1 >= 0) {
678 return_value1 *= 2;
679 err_offs = 0;
680 for (i = 0; i < return_value1; ++i)
681 if (ovector1[i] != ovector2[i]) {
682 printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);
683 err_offs = 1;
684 }
685 if (err_offs) {
686 current++;
687 continue;
688 }
689 }
690 }
691
692 pcre_free_study(extra);
693 pcre_free(re);
694
695 /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
696 printf(".");
697 fflush(stdout);
698 current++;
699 succesful++;
700 }
701
702 if (total == succesful) {
703 printf("\nAll JIT regression tests are successfully passed.\n");
704 return 0;
705 } else {
706 printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
707 return 1;
708 }
709 }
710
711 /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12