/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 691 - (show annotations) (download)
Sun Sep 11 14:31:21 2011 UTC (2 years, 11 months ago) by ph10
File MIME type: text/plain
File size: 31387 byte(s)
Final source and document tidies for 8.20-RC1.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2011 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2011
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #include <stdio.h>
44 #include <string.h>
45 #include <time.h>
46 #include "pcre.h"
47
48 #define PCRE_BUG 0x80000000
49
50 /*
51 Hungarian utf8 characters
52 \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')
53 \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')
54 \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)
55 \xc2\x85 = 0x85 (NExt Line = NEL)
56 \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)
57 \xe2\x80\xa8 = 0x2028 (Line Separator)
58 \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)
59 \xcc\x8d = 781 (Something with Mark property)
60 */
61
62 static void setstack(pcre_extra *extra);
63 static int regression_tests(void);
64
65 int main(void)
66 {
67 return regression_tests();
68 }
69
70 static pcre_jit_stack* callback(void *arg)
71 {
72 return (pcre_jit_stack *)arg;
73 }
74
75 static void setstack(pcre_extra *extra)
76 {
77 static pcre_jit_stack *stack;
78 if (stack) pcre_jit_stack_free(stack);
79 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
80 pcre_assign_jit_stack(extra, callback, stack);
81 }
82
83 /* --------------------------------------------------------------------------------------- */
84
85 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
86 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
87 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
88 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
89 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
90 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
91 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
92
93 struct regression_test_case {
94 int flags;
95 int start_offset;
96 const char *pattern;
97 const char *input;
98 };
99
100 static struct regression_test_case regression_test_cases[] = {
101 /* Constant strings. */
102 { MUA, 0, "AbC", "AbAbC" },
103 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
104 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
105 { MA, 0, "[^a]", "aAbB" },
106 { CMA, 0, "[^m]", "mMnN" },
107 { MA, 0, "a[^b][^#]", "abacd" },
108 { CMA, 0, "A[^B][^E]", "abacd" },
109 { CMUA, 0, "[^x][^#]", "XxBll" },
110 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
111 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
112 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
113 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
114 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
115 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
116 { MUA, 0, "[axd]", "sAXd" },
117 { CMUA, 0, "[axd]", "sAXd" },
118 { CMUA, 0, "[^axd]", "DxA" },
119 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
120 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
121 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
122 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
123 { MUA, 0, "[^a]", "\xc2\x80[]" },
124 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
125 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
126 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
127 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
128 { PCRE_CASELESS, 0, "a1", "Aa1" },
129
130 /* Assertions. */
131 { MUA, 0, "\\b[^A]", "A_B#" },
132 { MA, 0, "\\b\\W", "\n*" },
133 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
134 { MAP, 0, "\\B", "_\xa1" },
135 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
136 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
137 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
138 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
139 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
140 { MUA, 0, "\\b.", "\xcd\xbe" },
141 { MA, 0, "\\R^", "\n" },
142 { MA, 1, "^", "\n" },
143 { 0, 0, "^ab", "ab" },
144 { 0, 0, "^ab", "aab" },
145 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
146 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
147 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
148 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
149 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
150 { 0, 0, "ab$", "ab" },
151 { 0, 0, "ab$", "ab\r\n" },
152 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
153 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
154 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
155 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
156 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
157 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
158 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
159 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },
160 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
161 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
162 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
163 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
164 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
165 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
166 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
167 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
168 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
169 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
170 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
171 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
172 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
173 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
174 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
175 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
176 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
177 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
178 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
179 { MA, 0, "\\Aa", "aaa" },
180 { MA, 1, "\\Aa", "aaa" },
181 { MA, 1, "\\Ga", "aaa" },
182 { MA, 1, "\\Ga", "aba" },
183 { MA, 0, "a\\z", "aaa" },
184 { MA, 0, "a\\z", "aab" },
185
186 /* Brackets. */
187 { MUA, 0, "(ab|bb|cd)", "bacde" },
188 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
189 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
190 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
191 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
192 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
193
194 /* Greedy and non-greedy ? operators. */
195 { MUA, 0, "(?:a)?a", "laab" },
196 { CMUA, 0, "(A)?A", "llaab" },
197 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
198 { MUA, 0, "(a)?a", "manm" },
199 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
200 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
201 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
202
203 /* Greedy and non-greedy + operators */
204 { MUA, 0, "(aa)+aa", "aaaaaaa" },
205 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
206 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
207 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
208 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
209 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
210 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
211
212 /* Greedy and non-greedy * operators */
213 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
214 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
215 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
216 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
217 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
218 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
219 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
220 { MA, 0, "((?:a|)*){0}a", "a" },
221
222 /* Combining ? + * operators */
223 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
224 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
225 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
226 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
227 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
228
229 /* Single character iterators. */
230 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
231 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
232 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
233 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
234 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
235 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
236 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
237 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
238 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
239 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
240 { MUA, 0, "(a?+[^b])+", "babaacacb" },
241 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
242 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
243 { CMUA, 0, "[c-f]+k", "DemmFke" },
244 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
245 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
246 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
247 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
248 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
249 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
250 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
251 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
252 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
253 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
254 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
255 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
256 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
257
258 /* Basic character sets. */
259 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
260 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
261 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
262 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
263 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
264 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
265
266 /* Unicode properties. */
267 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
268 { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
269 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
270 { MUAP, 0, "[\\P{Any}]", "abc" },
271 { MUAP, 0, "[^\\p{Any}]", "abc" },
272 { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
273 { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
274 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
275 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
276 { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
277 { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
278 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
279 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
280 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
281 { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
282 { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
283 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
284 { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
285 { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
286 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
287
288 /* Possible empty brackets. */
289 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
290 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
291 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
292 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
293 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
294 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
295 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
296 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
297 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
298 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
299
300 /* Start offset. */
301 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
302 { MUA, 4, "(\\w\\W\\w)+", "ab#d" },
303 { MUA, 2, "(\\w\\W\\w)+", "ab#d" },
304 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
305
306 /* Newline. */
307 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
308 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
309 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
310
311 /* Any character except newline or any newline. */
312 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
313 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
314 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
315 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
316 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
317 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },
318 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
319 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
320 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
321 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
322 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
323 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
324 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
325 { MUA, 0, "\\R+", "ab" },
326 { MUA, 0, "\\R+", "ab\r\n\r" },
327 { MUA, 0, "\\R*", "ab\r\n\r" },
328 { MUA, 0, "\\R*", "\r\n\r" },
329 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
330 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
331 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
332 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
333 { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },
334 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
335 { MUA, 0, "\\R*\\R\\R", "\n\r" },
336 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },
337 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
338
339 /* Atomic groups (no fallback from "next" direction). */
340 { MUA, 0, "(?>ab)ab", "bab" },
341 { MUA, 0, "(?>(ab))ab", "bab" },
342 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
343 "bababcdedefgheijijklmlmnop" },
344 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
345 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
346 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
347 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
348 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
349 { MUA, 0, "(?>x|)*$", "aaa" },
350 { MUA, 0, "(?>(x)|)*$", "aaa" },
351 { MUA, 0, "(?>x|())*$", "aaa" },
352 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
353 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
354 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
355 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
356 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
357 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
358 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
359 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
360 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
361 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
362 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
363 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
364 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
365 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
366 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
367 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
368 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
369 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },
370 { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
371 { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
372 { MUA, 0, "\\X{2,4}", "abcdef" },
373 { MUA, 0, "\\X{2,4}?", "abcdef" },
374 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },
375 { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
376 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
377 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
378
379 /* Possessive quantifiers. */
380 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
381 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
382 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
383 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
384 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
385 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
386 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
387 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
388 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
389 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
390 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
391 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
392 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
393 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
394 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
395 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
396 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
397 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
398 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
399 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
400 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
401 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
402 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
403 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
404 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
405 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
406 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
407 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
408 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
409 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
410 { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
411 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
412 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
413 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
414 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
415
416 /* Back references. */
417 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
418 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
419 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
420 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
421 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
422 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
423 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
424 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
425 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
426 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
427 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
428 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
429 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
430 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
431 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
432 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
433 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
434 { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
435 { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
436 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
437 { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
438 { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
439
440 /* Assertions. */
441 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
442 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
443 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
444 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
445 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
446 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
447 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
448 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
449 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
450 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
451 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
452 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
453 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
454 { MUA, 0, "(?=(?>(a))m)amk", "a k" },
455 { MUA, 0, "(?!(?>(a))m)amk", "a k" },
456 { MUA, 0, "(?>(?=(a))am)amk", "a k" },
457 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
458 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
459 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
460 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
461 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
462 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
463 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
464 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
465
466 /* Not empty, ACCEPT, FAIL */
467 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },
468 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
469 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
470 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
471 { MUA, 0, "a(*ACCEPT)b", "ab" },
472 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },
473 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
474 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
475 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },
476 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
477 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
478 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
479 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },
480 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
481 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
482 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
483 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
484 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
485 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
486
487 /* Conditional blocks. */
488 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
489 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
490 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
491 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
492 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
493 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
494 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
495 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
496 { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
497 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
498 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
499 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
500 { MUA, 0, "(?(?=a)ab)", "a" },
501 { MUA, 0, "(?(?<!b)c)", "b" },
502 { MUA, 0, "(?(DEFINE)a(b))", "a" },
503 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
504 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
505 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
506 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
507 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
508 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
509 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
510 { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
511 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
512 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
513 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
514 { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
515 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
516 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
517 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
518 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
519
520 /* Set start of match. */
521 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
522 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
523 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
524 { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },
525 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
526
527 /* First line. */
528 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
529 { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },
530 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
531 { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },
532 { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },
533 { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },
534 { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },
535 { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },
536 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },
537 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },
538 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },
539 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
540 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
541 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
542 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },
543 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },
544 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
545
546 /* Recurse. */
547 { MUA, 0, "(a)(?1)", "aa" },
548 { MUA, 0, "((a))(?1)", "aa" },
549 { MUA, 0, "(b|a)(?1)", "aa" },
550 { MUA, 0, "(b|(a))(?1)", "aa" },
551 { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },
552 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
553 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
554 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
555 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
556 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
557 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
558 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
559 { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
560 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
561 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
562 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
563 { MUA, 0, "b|<(?R)*>", "<<b>" },
564 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
565 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
566
567 /* Deep recursion. */
568 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
569 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
570 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
571
572 /* Deep recursion: Stack limit reached. */
573 { MA, 0, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
574 { MA, 0, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
575 { MA, 0, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
576 { MA, 0, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
577 { MA, 0, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
578
579 { 0, 0, NULL, NULL }
580 };
581
582 static int regression_tests(void)
583 {
584 pcre *re;
585 struct regression_test_case *current = regression_test_cases;
586 const char *error;
587 pcre_extra *extra;
588 int ovector1[32];
589 int ovector2[32];
590 int return_value1, return_value2;
591 int i, err_offs;
592 int total = 0, succesful = 0;
593 int counter = 0;
594
595 printf("Running JIT regression tests:\n");
596 while (current->pattern) {
597 /* printf("\nPattern: %s :", current->pattern); */
598 total++;
599
600 error = NULL;
601 re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_BUG), &error, &err_offs, NULL);
602
603 if (!re) {
604 printf("\nCannot compile pattern: %s\n", current->pattern);
605 current++;
606 continue;
607 }
608
609 error = NULL;
610 extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
611 if (!extra) {
612 printf("\nCannot study pattern: %s\n", current->pattern);
613 current++;
614 continue;
615 }
616
617 if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
618 printf("\nJIT compiler does not support: %s\n", current->pattern);
619 current++;
620 continue;
621 }
622
623 counter++;
624 if ((counter & 0x3) != 0)
625 setstack(extra);
626
627 for (i = 0; i < 32; ++i)
628 ovector1[i] = -2;
629 return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);
630
631 for (i = 0; i < 32; ++i)
632 ovector2[i] = -2;
633 return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);
634
635 /* If PCRE_BUG is set, just run the test, but do not compare the results.
636 Segfaults can still be captured. */
637 if (!(current->flags & PCRE_BUG)) {
638 if (return_value1 != return_value2) {
639 printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);
640 current++;
641 continue;
642 }
643
644 if (return_value1 >= 0) {
645 return_value1 *= 2;
646 err_offs = 0;
647 for (i = 0; i < return_value1; ++i)
648 if (ovector1[i] != ovector2[i]) {
649 printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);
650 err_offs = 1;
651 }
652 if (err_offs) {
653 current++;
654 continue;
655 }
656 }
657 }
658
659 pcre_free_study(extra);
660 pcre_free(re);
661
662 /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
663 printf(".");
664 fflush(stdout);
665 current++;
666 succesful++;
667 }
668
669 if (total == succesful) {
670 printf("\nAll JIT regression tests are successfully passed.\n");
671 return 0;
672 } else {
673 printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
674 return 1;
675 }
676 }
677
678 /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12