/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 667 - (show annotations) (download)
Mon Aug 22 14:57:32 2011 UTC (3 years ago) by ph10
File MIME type: text/plain
File size: 30909 byte(s)
Commit all the changes for JIT support, but without any documentation yet.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #include <stdio.h>
44 #include <string.h>
45 #include <time.h>
46 #include "pcre.h"
47
48 #define PCRE_BUG 0x80000000
49
50 /*
51 Hungarian utf8 characters
52 \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')
53 \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')
54 \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)
55 \xc2\x85 = 0x85 (NExt Line = NEL)
56 \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)
57 \xe2\x80\xa8 = 0x2028 (Line Separator)
58 \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)
59 \xcc\x8d = 781 (Something with Mark property)
60 */
61
62 static void setstack(pcre_extra *extra);
63 static void regression_tests(void);
64
65 int main(void)
66 {
67 regression_tests();
68 return 0;
69 }
70
71 static pcre_jit_stack* callback(void *arg)
72 {
73 return (pcre_jit_stack *)arg;
74 }
75
76 static void setstack(pcre_extra *extra)
77 {
78 static pcre_jit_stack *stack;
79 if (stack) pcre_jit_stack_free(stack);
80 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
81 pcre_assign_jit_callback(extra, callback, stack);
82 }
83
84 /* --------------------------------------------------------------------------------------- */
85
86 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
87 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
88 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
89 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
90 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
91 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
92 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
93
94 struct regression_test_case {
95 int flags;
96 int start_offset;
97 const char *pattern;
98 const char *input;
99 };
100
101 static struct regression_test_case regression_test_cases[] = {
102 /* Constant strings. */
103 { MUA, 0, "AbC", "AbAbC" },
104 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
105 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
106 { MA, 0, "[^a]", "aAbB" },
107 { CMA, 0, "[^m]", "mMnN" },
108 { MA, 0, "a[^b][^#]", "abacd" },
109 { CMA, 0, "A[^B][^E]", "abacd" },
110 { CMUA, 0, "[^x][^#]", "XxBll" },
111 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
112 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
113 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
114 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
115 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
116 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
117 { MUA, 0, "[axd]", "sAXd" },
118 { CMUA, 0, "[axd]", "sAXd" },
119 { CMUA, 0, "[^axd]", "DxA" },
120 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
121 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
122 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
123 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
124 { MUA, 0, "[^a]", "\xc2\x80[]" },
125 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
126 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
127 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
128 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
129 { PCRE_CASELESS, 0, "a1", "Aa1" },
130
131 /* Assertions. */
132 { MUA, 0, "\\b[^A]", "A_B#" },
133 { MA, 0, "\\b\\W", "\n*" },
134 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
135 { MAP, 0, "\\B", "_\xa1" },
136 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
137 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
138 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
139 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
140 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
141 { MUA, 0, "\\b.", "\xcd\xbe" },
142 { MA, 0, "\\R^", "\n" },
143 { MA, 1, "^", "\n" },
144 { 0, 0, "^ab", "ab" },
145 { 0, 0, "^ab", "aab" },
146 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
147 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
148 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
149 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
150 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
151 { 0, 0, "ab$", "ab" },
152 { 0, 0, "ab$", "ab\r\n" },
153 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
154 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
155 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
156 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
157 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
158 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
159 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
160 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },
161 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
162 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
163 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
164 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
165 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
166 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
167 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
168 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
169 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
170 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
171 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
172 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
173 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
174 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
175 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
176 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
177 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
178 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
179 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
180 { MA, 0, "\\Aa", "aaa" },
181 { MA, 1, "\\Aa", "aaa" },
182 { MA, 1, "\\Ga", "aaa" },
183 { MA, 1, "\\Ga", "aba" },
184 { MA, 0, "a\\z", "aaa" },
185 { MA, 0, "a\\z", "aab" },
186
187 /* Brackets. */
188 { MUA, 0, "(ab|bb|cd)", "bacde" },
189 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
190 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
191 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
192 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
193 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
194
195 /* Greedy and non-greedy ? operators. */
196 { MUA, 0, "(?:a)?a", "laab" },
197 { CMUA, 0, "(A)?A", "llaab" },
198 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
199 { MUA, 0, "(a)?a", "manm" },
200 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
201 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
202 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
203
204 /* Greedy and non-greedy + operators */
205 { MUA, 0, "(aa)+aa", "aaaaaaa" },
206 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
207 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
208 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
209 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
210 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
211 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
212
213 /* Greedy and non-greedy * operators */
214 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
215 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
216 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
217 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
218 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
219 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
220 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
221 { MA, 0, "((?:a|)*){0}a", "a" },
222
223 /* Combining ? + * operators */
224 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
225 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
226 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
227 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
228 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
229
230 /* Single character iterators. */
231 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
232 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
233 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
234 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
235 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
236 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
237 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
238 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
239 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
240 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
241 { MUA, 0, "(a?+[^b])+", "babaacacb" },
242 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
243 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
244 { CMUA, 0, "[c-f]+k", "DemmFke" },
245 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
246 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
247 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
248 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
249 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
250 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
251 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
252 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
253 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
254 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
255 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
256 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
257 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
258
259 /* Basic character sets. */
260 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
261 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
262 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
263 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
264 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
265 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
266
267 /* Unicode properties. */
268 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
269 { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
270 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
271 { MUAP, 0, "[\\P{Any}]", "abc" },
272 { MUAP, 0, "[^\\p{Any}]", "abc" },
273 { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
274 { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
275 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
276 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
277 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
278 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
279 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
280 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
281 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
282 { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
283 { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
284 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
285 { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
286 { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
287 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
288
289 /* Possible empty brackets. */
290 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
291 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
292 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
293 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
294 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
295 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
296 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
297 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
298 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
299 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
300
301 /* Start offset. */
302 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
303 { MUA, 4, "(\\w\\W\\w)+", "ab#d" },
304 { MUA, 2, "(\\w\\W\\w)+", "ab#d" },
305 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
306
307 /* Newline. */
308 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
309 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
310 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
311
312 /* Any character except newline or any newline. */
313 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
314 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
315 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
316 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
317 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
318 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },
319 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
320 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
321 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
322 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
323 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
324 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
325 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
326 { MUA, 0, "\\R+", "ab" },
327 { MUA, 0, "\\R+", "ab\r\n\r" },
328 { MUA, 0, "\\R*", "ab\r\n\r" },
329 { MUA, 0, "\\R*", "\r\n\r" },
330 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
331 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
332 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
333 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
334 { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },
335 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
336 { MUA, 0, "\\R*\\R\\R", "\n\r" },
337 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },
338 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
339
340 /* Atomic groups (no fallback from "next" direction). */
341 { MUA, 0, "(?>ab)ab", "bab" },
342 { MUA, 0, "(?>(ab))ab", "bab" },
343 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
344 "bababcdedefgheijijklmlmnop" },
345 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
346 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
347 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
348 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
349 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
350 { MUA, 0, "(?>x|)*$", "aaa" },
351 { MUA, 0, "(?>(x)|)*$", "aaa" },
352 { MUA, 0, "(?>x|())*$", "aaa" },
353 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
354 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
355 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
356 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
357 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
358 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
359 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
360 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
361 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
362 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
363 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
364 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
365 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
366 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
367 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
368 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
369 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
370 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },
371 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
372 { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
373 { MUA, 0, "\\X{2,4}", "abcdef" },
374 { MUA, 0, "\\X{2,4}?", "abcdef" },
375 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },
376 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
377 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
378 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
379
380 /* Possessive quantifiers. */
381 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
382 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
383 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
384 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
385 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
386 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
387 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
388 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
389 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
390 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
391 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
392 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
393 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
394 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
395 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
396 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
397 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
398 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
399 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
400 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
401 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
402 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
403 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
404 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
405 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
406 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
407 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
408 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
409 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
410 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
411 { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
412 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
413 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
414 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
415 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
416
417 /* Back references. */
418 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
419 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
420 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
421 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
422 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
423 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
424 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
425 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
426 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
427 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
428 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
429 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
430 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
431 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
432 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
433 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
434 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
435 { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
436 { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
437 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
438 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
439 { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
440
441 /* Assertions. */
442 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
443 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
444 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
445 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
446 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
447 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
448 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
449 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
450 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
451 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
452 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
453 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
454 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
455 { MUA, 0, "(?=(?>(a))m)amk", "a k" },
456 { MUA, 0, "(?!(?>(a))m)amk", "a k" },
457 { MUA, 0, "(?>(?=(a))am)amk", "a k" },
458 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
459 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
460 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
461 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
462 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
463 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
464 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
465 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
466
467 /* Not empty, ACCEPT, FAIL */
468 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },
469 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
470 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
471 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
472 { MUA, 0, "a(*ACCEPT)b", "ab" },
473 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },
474 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
475 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
476 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },
477 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
478 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
479 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
480 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },
481 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
482 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
483 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
484 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
485 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
486 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
487
488 /* Conditional blocks. */
489 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
490 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
491 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
492 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
493 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
494 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
495 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
496 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
497 { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
498 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
499 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
500 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
501 { MUA, 0, "(?(?=a)ab)", "a" },
502 { MUA, 0, "(?(?<!b)c)", "b" },
503 { MUA, 0, "(?(DEFINE)a(b))", "a" },
504 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
505 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
506 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
507 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
508 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
509 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
510 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
511 { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
512 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
513 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
514 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
515 { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
516 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
517 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
518 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
519 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
520
521 /* Set start of match. */
522 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
523 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
524 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
525 { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },
526 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
527
528 /* First line. */
529 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
530 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },
531 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
532 { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },
533 { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },
534 { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },
535 { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },
536 { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },
537 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },
538 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },
539 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },
540 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
541 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
542 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
543 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },
544 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },
545 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
546
547 /* Recurse. */
548 { MUA, 0, "(a)(?1)", "aa" },
549 { MUA, 0, "((a))(?1)", "aa" },
550 { MUA, 0, "(b|a)(?1)", "aa" },
551 { MUA, 0, "(b|(a))(?1)", "aa" },
552 { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },
553 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
554 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
555 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
556 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
557 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
558 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
559 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
560 { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
561 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
562 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
563 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
564 { MUA, 0, "b|<(?R)*>", "<<b>" },
565 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
566 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
567
568 /* Deep recursion. */
569 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
570 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aaa+ " },
571
572 { 0, 0, NULL, NULL }
573 };
574
575 static void regression_tests(void)
576 {
577 pcre *re;
578 struct regression_test_case *current = regression_test_cases;
579 const char *error;
580 pcre_extra *extra;
581 int ovector1[32];
582 int ovector2[32];
583 int return_value1, return_value2;
584 int i, err_offs;
585 int total = 0, succesful = 0;
586 int counter = 0;
587
588 printf("Running JIT regression tests:\n");
589 while (current->pattern) {
590 /* printf("\nPattern: %s :", current->pattern); */
591 total++;
592
593 error = NULL;
594 re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_BUG), &error, &err_offs, NULL);
595
596 if (!re) {
597 printf("\nCannot compile pattern: %s\n", current->pattern);
598 current++;
599 continue;
600 }
601
602 error = NULL;
603 extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
604 if (!extra) {
605 printf("\nCannot study pattern: %s\n", current->pattern);
606 current++;
607 continue;
608 }
609
610 if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
611 printf("\nJIT compiler does not support: %s\n", current->pattern);
612 current++;
613 continue;
614 }
615
616 counter++;
617 if ((counter & 0x3) != 0)
618 setstack(extra);
619
620 for (i = 0; i < 32; ++i)
621 ovector1[i] = -2;
622 return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);
623
624 for (i = 0; i < 32; ++i)
625 ovector2[i] = -2;
626 return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);
627
628 /* If PCRE_BUG is set, just run the test, but do not compare the results.
629 Segfaults can still be captured. */
630 if (!(current->flags & PCRE_BUG)) {
631 if (return_value1 != return_value2) {
632 printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);
633 current++;
634 continue;
635 }
636
637 if (return_value1 >= 0) {
638 return_value1 *= 2;
639 err_offs = 0;
640 for (i = 0; i < return_value1; ++i)
641 if (ovector1[i] != ovector2[i]) {
642 printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);
643 err_offs = 1;
644 }
645 if (err_offs) {
646 current++;
647 continue;
648 }
649 }
650 }
651
652 pcre_free_study(extra);
653 pcre_free(re);
654
655 /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
656 printf(".");
657 fflush(stdout);
658 current++;
659 succesful++;
660 }
661
662 if (total == succesful)
663 printf("\nAll JIT regression tests are successfully passed.\n");
664 else
665 printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
666 }
667
668 /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12