/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 698 - (show annotations) (download)
Mon Sep 19 15:43:02 2011 UTC (3 years, 2 months ago) by ph10
File MIME type: text/plain
File size: 32131 byte(s)
Revised pcre_jit_test that runs OK with or without UTF-8 and/or UCP.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Hungarian utf8 characters
55 \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')
56 \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')
57 \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)
58 \xc2\x85 = 0x85 (NExt Line = NEL)
59 \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)
60 \xe2\x80\xa8 = 0x2028 (Line Separator)
61 \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)
62 \xcc\x8d = 781 (Something with Mark property)
63 */
64
65 static void setstack(pcre_extra *extra);
66 static int regression_tests(void);
67
68 int main(void)
69 {
70 int jit = 0;
71 pcre_config(PCRE_CONFIG_JIT, &jit);
72 if (!jit) {
73 printf("JIT must be enabled to run pcre_jit_test\n");
74 return 1;
75 }
76 return regression_tests();
77 }
78
79 static pcre_jit_stack* callback(void *arg)
80 {
81 return (pcre_jit_stack *)arg;
82 }
83
84 static void setstack(pcre_extra *extra)
85 {
86 static pcre_jit_stack *stack;
87 if (stack) pcre_jit_stack_free(stack);
88 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
89 pcre_assign_jit_stack(extra, callback, stack);
90 }
91
92 /* --------------------------------------------------------------------------------------- */
93
94 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
95 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
96 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
97 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
98 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
99 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
100 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
101
102 struct regression_test_case {
103 int flags;
104 int start_offset;
105 const char *pattern;
106 const char *input;
107 };
108
109 static struct regression_test_case regression_test_cases[] = {
110 /* Constant strings. */
111 { MUA, 0, "AbC", "AbAbC" },
112 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
113 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
114 { MA, 0, "[^a]", "aAbB" },
115 { CMA, 0, "[^m]", "mMnN" },
116 { MA, 0, "a[^b][^#]", "abacd" },
117 { CMA, 0, "A[^B][^E]", "abacd" },
118 { CMUA, 0, "[^x][^#]", "XxBll" },
119 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
120 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
121 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
122 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
123 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
124 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
125 { MUA, 0, "[axd]", "sAXd" },
126 { CMUA, 0, "[axd]", "sAXd" },
127 { CMUA, 0, "[^axd]", "DxA" },
128 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
129 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
130 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
131 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
132 { MUA, 0, "[^a]", "\xc2\x80[]" },
133 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
134 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
135 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
136 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
137 { PCRE_CASELESS, 0, "a1", "Aa1" },
138
139 /* Assertions. */
140 { MUA, 0, "\\b[^A]", "A_B#" },
141 { MA, 0, "\\b\\W", "\n*" },
142 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
143 { MAP, 0, "\\B", "_\xa1" },
144 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
145 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
146 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
147 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
148 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
149 { MUA, 0, "\\b.", "\xcd\xbe" },
150 { MA, 0, "\\R^", "\n" },
151 { MA, 1, "^", "\n" },
152 { 0, 0, "^ab", "ab" },
153 { 0, 0, "^ab", "aab" },
154 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
155 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
156 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
157 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
158 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
159 { 0, 0, "ab$", "ab" },
160 { 0, 0, "ab$", "ab\r\n" },
161 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
162 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
163 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
164 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
165 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
166 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
167 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
168 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },
169 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
170 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
171 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
172 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
173 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
174 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
175 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
176 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
177 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
178 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
179 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
180 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
181 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
182 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
183 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
184 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
185 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
186 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
187 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
188 { MA, 0, "\\Aa", "aaa" },
189 { MA, 1, "\\Aa", "aaa" },
190 { MA, 1, "\\Ga", "aaa" },
191 { MA, 1, "\\Ga", "aba" },
192 { MA, 0, "a\\z", "aaa" },
193 { MA, 0, "a\\z", "aab" },
194
195 /* Brackets. */
196 { MUA, 0, "(ab|bb|cd)", "bacde" },
197 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
198 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
199 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
200 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
201 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
202
203 /* Greedy and non-greedy ? operators. */
204 { MUA, 0, "(?:a)?a", "laab" },
205 { CMUA, 0, "(A)?A", "llaab" },
206 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
207 { MUA, 0, "(a)?a", "manm" },
208 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
209 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
210 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
211
212 /* Greedy and non-greedy + operators */
213 { MUA, 0, "(aa)+aa", "aaaaaaa" },
214 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
215 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
216 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
217 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
218 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
219 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
220
221 /* Greedy and non-greedy * operators */
222 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
223 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
224 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
225 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
226 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
227 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
228 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
229 { MA, 0, "((?:a|)*){0}a", "a" },
230
231 /* Combining ? + * operators */
232 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
233 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
234 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
235 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
236 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
237
238 /* Single character iterators. */
239 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
240 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
241 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
242 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
243 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
244 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
245 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
246 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
247 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
248 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
249 { MUA, 0, "(a?+[^b])+", "babaacacb" },
250 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
251 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
252 { CMUA, 0, "[c-f]+k", "DemmFke" },
253 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
254 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
255 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
256 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
257 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
258 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
259 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
260 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
261 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
262 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
263 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
264 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
265 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
266
267 /* Basic character sets. */
268 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
269 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
270 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
271 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
272 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
273 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
274
275 /* Unicode properties. */
276 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
277 { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
278 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
279 { MUAP, 0, "[\\P{Any}]", "abc" },
280 { MUAP, 0, "[^\\p{Any}]", "abc" },
281 { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
282 { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
283 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
284 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
285 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
286 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
287 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
288 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
289 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
290 { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
291 { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
292 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
293 { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
294 { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
295 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
296
297 /* Possible empty brackets. */
298 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
299 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
300 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
301 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
302 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
303 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
304 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
305 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
306 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
307 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
308
309 /* Start offset. */
310 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
311 { MUA, 4, "(\\w\\W\\w)+", "ab#d" },
312 { MUA, 2, "(\\w\\W\\w)+", "ab#d" },
313 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
314
315 /* Newline. */
316 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
317 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
318 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
319
320 /* Any character except newline or any newline. */
321 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
322 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
323 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
324 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
325 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
326 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },
327 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
328 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
329 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
330 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
331 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
332 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
333 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
334 { MUA, 0, "\\R+", "ab" },
335 { MUA, 0, "\\R+", "ab\r\n\r" },
336 { MUA, 0, "\\R*", "ab\r\n\r" },
337 { MUA, 0, "\\R*", "\r\n\r" },
338 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
339 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
340 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
341 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
342 { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },
343 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
344 { MUA, 0, "\\R*\\R\\R", "\n\r" },
345 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },
346 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
347
348 /* Atomic groups (no fallback from "next" direction). */
349 { MUA, 0, "(?>ab)ab", "bab" },
350 { MUA, 0, "(?>(ab))ab", "bab" },
351 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
352 "bababcdedefgheijijklmlmnop" },
353 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
354 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
355 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
356 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
357 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
358 { MUA, 0, "(?>x|)*$", "aaa" },
359 { MUA, 0, "(?>(x)|)*$", "aaa" },
360 { MUA, 0, "(?>x|())*$", "aaa" },
361 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
362 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
363 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
364 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
365 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
366 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
367 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
368 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
369 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
370 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
371 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
372 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
373 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
374 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
375 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
376 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
377 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
378 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },
379 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
380 { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
381 { MUA, 0, "\\X{2,4}", "abcdef" },
382 { MUA, 0, "\\X{2,4}?", "abcdef" },
383 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },
384 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
385 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
386 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
387
388 /* Possessive quantifiers. */
389 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
390 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
391 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
392 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
393 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
394 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
395 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
396 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
397 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
398 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
399 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
400 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
401 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
402 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
403 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
404 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
405 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
406 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
407 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
408 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
409 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
410 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
411 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
412 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
413 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
414 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
415 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
416 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
417 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
418 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
419 { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
420 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
421 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
422 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
423 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
424
425 /* Back references. */
426 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
427 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
428 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
429 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
430 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
431 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
432 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
433 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
434 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
435 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
436 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
437 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
438 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
439 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
440 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
441 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
442 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
443 { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
444 { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
445 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
446 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
447 { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
448
449 /* Assertions. */
450 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
451 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
452 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
453 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
454 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
455 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
456 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
457 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
458 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
459 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
460 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
461 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
462 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
463 { MUA, 0, "(?=(?>(a))m)amk", "a k" },
464 { MUA, 0, "(?!(?>(a))m)amk", "a k" },
465 { MUA, 0, "(?>(?=(a))am)amk", "a k" },
466 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
467 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
468 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
469 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
470 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
471 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
472 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
473 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
474
475 /* Not empty, ACCEPT, FAIL */
476 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },
477 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
478 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
479 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
480 { MUA, 0, "a(*ACCEPT)b", "ab" },
481 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },
482 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
483 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
484 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },
485 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
486 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
487 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
488 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },
489 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
490 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
491 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
492 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
493 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
494 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
495
496 /* Conditional blocks. */
497 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
498 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
499 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
500 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
501 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
502 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
503 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
504 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
505 { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
506 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
507 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
508 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
509 { MUA, 0, "(?(?=a)ab)", "a" },
510 { MUA, 0, "(?(?<!b)c)", "b" },
511 { MUA, 0, "(?(DEFINE)a(b))", "a" },
512 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
513 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
514 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
515 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
516 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
517 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
518 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
519 { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
520 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
521 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
522 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
523 { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
524 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
525 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
526 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
527 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
528
529 /* Set start of match. */
530 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
531 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
532 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
533 { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },
534 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
535
536 /* First line. */
537 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
538 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },
539 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
540 { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },
541 { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },
542 { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },
543 { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },
544 { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },
545 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },
546 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },
547 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },
548 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
549 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
550 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
551 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },
552 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },
553 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
554
555 /* Recurse. */
556 { MUA, 0, "(a)(?1)", "aa" },
557 { MUA, 0, "((a))(?1)", "aa" },
558 { MUA, 0, "(b|a)(?1)", "aa" },
559 { MUA, 0, "(b|(a))(?1)", "aa" },
560 { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },
561 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
562 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
563 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
564 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
565 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
566 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
567 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
568 { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
569 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
570 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
571 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
572 { MUA, 0, "b|<(?R)*>", "<<b>" },
573 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
574 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
575
576 /* Deep recursion. */
577 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
578 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
579 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
580
581 /* Deep recursion: Stack limit reached. */
582 { MA, 0, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
583 { MA, 0, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
584 { MA, 0, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
585 { MA, 0, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
586 { MA, 0, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
587
588 { 0, 0, NULL, NULL }
589 };
590
591 static int regression_tests(void)
592 {
593 pcre *re;
594 struct regression_test_case *current = regression_test_cases;
595 const char *error;
596 pcre_extra *extra;
597 int utf8 = 0, ucp = 0;
598 int ovector1[32];
599 int ovector2[32];
600 int return_value1, return_value2;
601 int i, err_offs;
602 int total = 0, succesful = 0;
603 int counter = 0;
604 int disabled_flags = PCRE_BUG;
605
606 /* This test compares the behaviour of interpreter and JIT. Although disabling
607 utf8 or ucp may make tests fail, if the pcre_exec result is the SAME, it is
608 still considered successful from pcre_jit_test point of view. */
609
610 pcre_config(PCRE_CONFIG_UTF8, &utf8);
611 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
612 if (!utf8)
613 disabled_flags |= PCRE_UTF8;
614 if (!ucp)
615 disabled_flags |= PCRE_UCP;
616
617 printf("Running JIT regression tests with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
618 while (current->pattern) {
619 /* printf("\nPattern: %s :\n", current->pattern); */
620 total++;
621
622 error = NULL;
623 re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags), &error, &err_offs, NULL);
624
625 if (!re) {
626 if (utf8 && ucp)
627 printf("\nCannot compile pattern: %s\n", current->pattern);
628 else {
629 /* Some patterns cannot be compiled when either of utf8
630 or ucp is disabled. We just skip them. */
631 printf(".");
632 succesful++;
633 }
634 current++;
635 continue;
636 }
637
638 error = NULL;
639 extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
640 if (!extra) {
641 printf("\nCannot study pattern: %s\n", current->pattern);
642 current++;
643 continue;
644 }
645
646 if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
647 printf("\nJIT compiler does not support: %s\n", current->pattern);
648 current++;
649 continue;
650 }
651
652 counter++;
653 if ((counter & 0x3) != 0)
654 setstack(extra);
655
656 for (i = 0; i < 32; ++i)
657 ovector1[i] = -2;
658 return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);
659
660 for (i = 0; i < 32; ++i)
661 ovector2[i] = -2;
662 return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);
663
664 /* If PCRE_BUG is set, just run the test, but do not compare the results.
665 Segfaults can still be captured. */
666 if (!(current->flags & PCRE_BUG)) {
667 if (return_value1 != return_value2) {
668 printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);
669 current++;
670 continue;
671 }
672
673 if (return_value1 >= 0) {
674 return_value1 *= 2;
675 err_offs = 0;
676 for (i = 0; i < return_value1; ++i)
677 if (ovector1[i] != ovector2[i]) {
678 printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);
679 err_offs = 1;
680 }
681 if (err_offs) {
682 current++;
683 continue;
684 }
685 }
686 }
687
688 pcre_free_study(extra);
689 pcre_free(re);
690
691 /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
692 printf(".");
693 fflush(stdout);
694 current++;
695 succesful++;
696 }
697
698 if (total == succesful) {
699 printf("\nAll JIT regression tests are successfully passed.\n");
700 return 0;
701 } else {
702 printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
703 return 1;
704 }
705 }
706
707 /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12