/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 836 - (show annotations) (download)
Wed Dec 28 17:16:11 2011 UTC (2 years, 3 months ago) by ph10
File MIME type: text/plain
File size: 49528 byte(s)
Merging all the changes from the pcre16 branch into the trunk.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65 \xc3\xa9 = 0xe9 = 233 (e')
66 \xc3\x89 = 0xc9 = 201 (E')
67 \xc3\xa1 = 0xe1 = 225 (a')
68 \xc3\x81 = 0xc1 = 193 (A')
69 \xc8\xba = 0x23a = 570
70 \xe2\xb1\xa5 = 0x2c65 = 11365
71 \xe1\xbd\xb8 = 0x1f78 = 8056
72 \xe1\xbf\xb8 = 0x1ff8 = 8184
73 \xf0\x90\x90\x80 = 0x10400 = 66560
74 \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76 \xcc\x8d = 0x30d = 781
77 Special:
78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 */
84
85 static int regression_tests(void);
86
87 int main(void)
88 {
89 int jit = 0;
90 #ifdef SUPPORT_PCRE8
91 pcre_config(PCRE_CONFIG_JIT, &jit);
92 #else
93 pcre16_config(PCRE_CONFIG_JIT, &jit);
94 #endif
95 if (!jit) {
96 printf("JIT must be enabled to run pcre_jit_test\n");
97 return 1;
98 }
99 return regression_tests();
100 }
101
102 /* --------------------------------------------------------------------------------------- */
103
104 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106 #endif
107
108 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115
116 #define OFFSET_MASK 0x00ffff
117 #define F_NO8 0x010000
118 #define F_NO16 0x020000
119 #define F_NOMATCH 0x040000
120 #define F_DIFF 0x080000
121 #define F_FORCECONV 0x100000
122 #define F_PROPERTY 0x200000
123
124 struct regression_test_case {
125 int flags;
126 int start_offset;
127 const char *pattern;
128 const char *input;
129 };
130
131 static struct regression_test_case regression_test_cases[] = {
132 /* Constant strings. */
133 { MUA, 0, "AbC", "AbAbC" },
134 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136 { MA, 0, "[^a]", "aAbB" },
137 { CMA, 0, "[^m]", "mMnN" },
138 { MA, 0, "a[^b][^#]", "abacd" },
139 { CMA, 0, "A[^B][^E]", "abacd" },
140 { CMUA, 0, "[^x][^#]", "XxBll" },
141 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147 { MUA, 0, "[axd]", "sAXd" },
148 { CMUA, 0, "[axd]", "sAXd" },
149 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159 { PCRE_CASELESS, 0, "a1", "Aa1" },
160 { MA, 0, "\\Ca", "cda" },
161 { CMA, 0, "\\Ca", "CDA" },
162 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168
169 /* Assertions. */
170 { MUA, 0, "\\b[^A]", "A_B#" },
171 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173 { MAP, 0, "\\B", "_\xa1" },
174 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182 { MA, 1 | F_NOMATCH, "^", "\n" },
183 { 0, 0, "^ab", "ab" },
184 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190 { 0, 0, "ab$", "ab" },
191 { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
194 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
196 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
197 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
198 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
199 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
200 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
201 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
202 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
203 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
204 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
205 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
206 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
207 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
208 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
212 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
213 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
214 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
215 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
217 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
218 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
219 { MA, 0, "\\Aa", "aaa" },
220 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
221 { MA, 1, "\\Ga", "aaa" },
222 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
223 { MA, 0, "a\\z", "aaa" },
224 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
225
226 /* Brackets. */
227 { MUA, 0, "(ab|bb|cd)", "bacde" },
228 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
229 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
230 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
231 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
232 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
233
234 /* Greedy and non-greedy ? operators. */
235 { MUA, 0, "(?:a)?a", "laab" },
236 { CMUA, 0, "(A)?A", "llaab" },
237 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
238 { MUA, 0, "(a)?a", "manm" },
239 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
240 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
241 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
242
243 /* Greedy and non-greedy + operators */
244 { MUA, 0, "(aa)+aa", "aaaaaaa" },
245 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
246 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
247 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
248 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
249 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
251
252 /* Greedy and non-greedy * operators */
253 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
254 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
255 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
256 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
257 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
258 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
259 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
260 { MA, 0, "((?:a|)*){0}a", "a" },
261
262 /* Combining ? + * operators */
263 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
264 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
265 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
267 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
268
269 /* Single character iterators. */
270 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
271 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
272 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
273 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
274 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
275 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
276 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
277 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
278 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
279 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
280 { MUA, 0, "(a?+[^b])+", "babaacacb" },
281 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
282 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
283 { CMUA, 0, "[c-f]+k", "DemmFke" },
284 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
285 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
286 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
287 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
288 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
289 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
290 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
291 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
292 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
293 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
294 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
295 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
296 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
297 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
298 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
299 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
300 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
302
303 /* Basic character sets. */
304 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
305 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
306 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
307 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
308 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
309 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
310
311 /* Unicode properties. */
312 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
313 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
314 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
315 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
316 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
317 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
318 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
320 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
321 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
323 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
324 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
325 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
326 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
327 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
328 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
329 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
330 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
331 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
332
333 /* Possible empty brackets. */
334 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
335 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
336 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
337 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
338 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
339 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
340 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
341 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
342 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
343 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
344
345 /* Start offset. */
346 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
347 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
348 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
350
351 /* Newline. */
352 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
353 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
355
356 /* Any character except newline or any newline. */
357 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
358 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
359 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
360 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
362 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
363 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
364 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
365 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
366 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
367 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
368 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
369 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
370 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
371 { MUA, 0, "\\R+", "ab\r\n\r" },
372 { MUA, 0, "\\R*", "ab\r\n\r" },
373 { MUA, 0, "\\R*", "\r\n\r" },
374 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
375 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
376 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
377 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
378 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
379 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
380 { MUA, 0, "\\R*\\R\\R", "\n\r" },
381 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
382 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
383
384 /* Atomic groups (no fallback from "next" direction). */
385 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
386 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
387 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
388 "bababcdedefgheijijklmlmnop" },
389 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
390 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
391 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
392 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
393 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
394 { MUA, 0, "(?>x|)*$", "aaa" },
395 { MUA, 0, "(?>(x)|)*$", "aaa" },
396 { MUA, 0, "(?>x|())*$", "aaa" },
397 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
398 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
399 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
400 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
401 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
402 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
403 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
404 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
405 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
406 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
407 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
408 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
409 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
410 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
411 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
412 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
413 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
415 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
416 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
417 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
418 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
419 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
420 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
421 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
422 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
423
424 /* Possessive quantifiers. */
425 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
426 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
427 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
428 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
429 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
430 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
431 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
432 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
433 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
434 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
435 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
436 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
437 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
438 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
439 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
440 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
441 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
442 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
443 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
444 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
445 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
446 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
447 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
448 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
449 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
450 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
451 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
452 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
453 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
454 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
455 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
456 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
457 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
458 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
459 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460
461 /* Back references. */
462 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
463 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
464 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
465 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
466 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
467 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
468 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
470 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
471 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
472 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
473 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
474 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
475 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
477 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
478 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
479 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
480 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
481 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
482 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
483 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
484 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
485
486 /* Assertions. */
487 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
488 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
489 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
490 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
491 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
492 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
493 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
494 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
495 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
496 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
497 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
498 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
499 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
500 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
501 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
502 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
503 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
504 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
505 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
506 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
508 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
509 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
510 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
511
512 /* Not empty, ACCEPT, FAIL */
513 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
514 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
515 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
516 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
517 { MUA, 0, "a(*ACCEPT)b", "ab" },
518 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
519 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
520 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
521 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
522 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
523 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
524 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
525 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
526 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
527 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
528 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
529 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
530 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
531 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
532
533 /* Conditional blocks. */
534 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
535 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
536 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
537 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
538 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
539 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
540 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
541 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
543 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
545 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?=a)ab)", "a" },
547 { MUA, 0, "(?(?<!b)c)", "b" },
548 { MUA, 0, "(?(DEFINE)a(b))", "a" },
549 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
550 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
551 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
552 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
553 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
554 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
555 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
556 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
557 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
558 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
559 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
560 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
561 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
562 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
563 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
564 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
565 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
566 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
567 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
568
569 /* Set start of match. */
570 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
571 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
572 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
573 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
574 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
575
576 /* First line. */
577 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
578 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
579 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
580 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
581 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
582 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
583 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
584 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
585 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
586 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
587 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
588 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
589 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
590 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
591 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
592 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
593 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
594
595 /* Recurse. */
596 { MUA, 0, "(a)(?1)", "aa" },
597 { MUA, 0, "((a))(?1)", "aa" },
598 { MUA, 0, "(b|a)(?1)", "aa" },
599 { MUA, 0, "(b|(a))(?1)", "aa" },
600 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
601 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
602 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
603 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
604 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
605 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
606 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
607 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
608 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
609 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
610 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
611 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
612 { MUA, 0, "b|<(?R)*>", "<<b>" },
613 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
614 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
615 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
616 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
617 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
618 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
619 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
620 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
621
622 /* 16 bit specific tests. */
623 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
624 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
625 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
626 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
627 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
628 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
629 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
630 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
631 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
632 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
633 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
634 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
635 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
636 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
637 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
638 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
639 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
640 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
642 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
643 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
645 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
646 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
647 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
648 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
649 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
650 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
651 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
652 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
653 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
654
655 /* Deep recursion. */
656 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
657 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
658 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
659
660 /* Deep recursion: Stack limit reached. */
661 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
662 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
663 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
664 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
665 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
666
667 { 0, 0, NULL, NULL }
668 };
669
670 static const unsigned char *tables(int mode)
671 {
672 /* The purpose of this function to allow valgrind
673 for reporting invalid reads and writes. */
674 static unsigned char *tables_copy;
675 pcre *regex;
676 const char *errorptr;
677 int erroroffset;
678 const unsigned char *default_tables;
679 #ifdef SUPPORT_PCRE8
680 char null_str[1] = { 0 };
681 #else
682 PCRE_SCHAR16 null_str[1] = { 0 };
683 #endif
684
685 if (mode) {
686 if (tables_copy)
687 free(tables_copy);
688 tables_copy = NULL;
689 return NULL;
690 }
691
692 if (tables_copy)
693 return tables_copy;
694
695 default_tables = NULL;
696 #ifdef SUPPORT_PCRE8
697 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
698 if (regex) {
699 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
700 pcre_free(regex);
701 }
702 #else
703 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
704 if (regex) {
705 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
706 pcre16_free(regex);
707 }
708 #endif
709 /* Shouldn't ever happen. */
710 if (!default_tables)
711 return NULL;
712
713 /* Unfortunately this value cannot get from pcre_fullinfo.
714 Since this is a test program, this is acceptable at the moment. */
715 tables_copy = (unsigned char *)malloc(1088);
716 if (!tables_copy)
717 return NULL;
718
719 memcpy(tables_copy, default_tables, 1088);
720 return tables_copy;
721 }
722
723 static pcre_jit_stack* callback(void *arg)
724 {
725 return (pcre_jit_stack *)arg;
726 }
727
728 #ifdef SUPPORT_PCRE8
729 static void setstack8(pcre_extra *extra)
730 {
731 static pcre_jit_stack *stack;
732
733 if (!extra) {
734 if (stack)
735 pcre_jit_stack_free(stack);
736 stack = NULL;
737 return;
738 }
739
740 if (!stack)
741 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
742 /* Extra can be NULL. */
743 pcre_assign_jit_stack(extra, callback, stack);
744 }
745 #endif /* SUPPORT_PCRE8 */
746
747 #ifdef SUPPORT_PCRE16
748 static void setstack16(pcre_extra *extra)
749 {
750 static pcre_jit_stack *stack;
751
752 if (!extra) {
753 if (stack)
754 pcre16_jit_stack_free(stack);
755 stack = NULL;
756 return;
757 }
758
759 if (!stack)
760 stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
761 /* Extra can be NULL. */
762 pcre16_assign_jit_stack(extra, callback, stack);
763 }
764 #endif /* SUPPORT_PCRE8 */
765
766 #ifdef SUPPORT_PCRE16
767
768 static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length)
769 {
770 unsigned char *iptr = (unsigned char*)input;
771 unsigned short *optr = (unsigned short *)output;
772 unsigned int c;
773
774 if (max_length == 0)
775 return 0;
776
777 while (*iptr && max_length > 1) {
778 c = 0;
779 if (offsetmap)
780 *offsetmap++ = (int)(iptr - (unsigned char*)input);
781
782 if (!(*iptr & 0x80))
783 c = *iptr++;
784 else if (!(*iptr & 0x20)) {
785 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
786 iptr += 2;
787 } else if (!(*iptr & 0x10)) {
788 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
789 iptr += 3;
790 } else if (!(*iptr & 0x08)) {
791 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
792 iptr += 4;
793 }
794
795 if (c < 65536) {
796 *optr++ = c;
797 max_length--;
798 } else if (max_length <= 2) {
799 *optr = '\0';
800 return (int)(optr - (unsigned short *)output);
801 } else {
802 c -= 0x10000;
803 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
804 *optr++ = 0xdc00 | (c & 0x3ff);
805 max_length -= 2;
806 if (offsetmap)
807 offsetmap++;
808 }
809 }
810 if (offsetmap)
811 *offsetmap = (int)(iptr - (unsigned char*)input);
812 *optr = '\0';
813 return (int)(optr - (unsigned short *)output);
814 }
815
816 static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length)
817 {
818 unsigned char *iptr = (unsigned char*)input;
819 unsigned short *optr = (unsigned short *)output;
820
821 if (max_length == 0)
822 return 0;
823
824 while (*iptr && max_length > 1) {
825 *optr++ = *iptr++;
826 max_length--;
827 }
828 *optr = '\0';
829 return (int)(optr - (unsigned short *)output);
830 }
831
832 #define REGTEST_MAX_LENGTH 4096
833 static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
834 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
835
836 #endif /* SUPPORT_PCRE16 */
837
838 static int check_ascii(const char *input)
839 {
840 const unsigned char *ptr = (unsigned char *)input;
841 while (*ptr) {
842 if (*ptr > 127)
843 return 0;
844 ptr++;
845 }
846 return 1;
847 }
848
849 static int regression_tests(void)
850 {
851 struct regression_test_case *current = regression_test_cases;
852 const char *error;
853 int i, err_offs;
854 int is_successful, is_ascii_pattern, is_ascii_input;
855 int total = 0;
856 int successful = 0;
857 int counter = 0;
858 #ifdef SUPPORT_PCRE8
859 pcre *re8;
860 pcre_extra *extra8;
861 int ovector8_1[32];
862 int ovector8_2[32];
863 int return_value8_1, return_value8_2;
864 int utf8 = 0, ucp8 = 0;
865 int disabled_flags8 = 0;
866 #endif
867 #ifdef SUPPORT_PCRE16
868 pcre *re16;
869 pcre_extra *extra16;
870 int ovector16_1[32];
871 int ovector16_2[32];
872 int return_value16_1, return_value16_2;
873 int utf16 = 0, ucp16 = 0;
874 int disabled_flags16 = 0;
875 int length16;
876 #endif
877
878 /* This test compares the behaviour of interpreter and JIT. Although disabling
879 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
880 still considered successful from pcre_jit_test point of view. */
881
882 printf("Running JIT regression\n");
883
884 #ifdef SUPPORT_PCRE8
885 pcre_config(PCRE_CONFIG_UTF8, &utf8);
886 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
887 if (!utf8)
888 disabled_flags8 |= PCRE_UTF8;
889 if (!ucp8)
890 disabled_flags8 |= PCRE_UCP;
891 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
892 #endif
893 #ifdef SUPPORT_PCRE16
894 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
895 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
896 if (!utf16)
897 disabled_flags16 |= PCRE_UTF8;
898 if (!ucp16)
899 disabled_flags16 |= PCRE_UCP;
900 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
901 #endif
902
903 while (current->pattern) {
904 /* printf("\nPattern: %s :\n", current->pattern); */
905 total++;
906 if (current->start_offset & F_PROPERTY) {
907 is_ascii_pattern = 0;
908 is_ascii_input = 0;
909 } else {
910 is_ascii_pattern = check_ascii(current->pattern);
911 is_ascii_input = check_ascii(current->input);
912 }
913
914 error = NULL;
915 #ifdef SUPPORT_PCRE8
916 re8 = NULL;
917 if (!(current->start_offset & F_NO8))
918 re8 = pcre_compile(current->pattern,
919 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
920 &error, &err_offs, tables(0));
921
922 extra8 = NULL;
923 if (re8) {
924 error = NULL;
925 extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
926 if (!extra8) {
927 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
928 pcre_free(re8);
929 re8 = NULL;
930 }
931 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
932 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
933 pcre_free_study(extra8);
934 pcre_free(re8);
935 re8 = NULL;
936 }
937 } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
938 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
939 #endif
940 #ifdef SUPPORT_PCRE16
941 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
942 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
943 else
944 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
945
946 re16 = NULL;
947 if (!(current->start_offset & F_NO16))
948 re16 = pcre16_compile(regtest_buf,
949 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
950 &error, &err_offs, tables(0));
951
952 extra16 = NULL;
953 if (re16) {
954 error = NULL;
955 extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
956 if (!extra16) {
957 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
958 pcre16_free(re16);
959 re16 = NULL;
960 }
961 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
962 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
963 pcre16_free_study(extra16);
964 pcre16_free(re16);
965 re16 = NULL;
966 }
967 } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
968 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
969 #endif
970
971 counter++;
972 if ((counter & 0x3) != 0) {
973 #ifdef SUPPORT_PCRE8
974 setstack8(NULL);
975 #endif
976 #ifdef SUPPORT_PCRE16
977 setstack16(NULL);
978 #endif
979 }
980
981 #ifdef SUPPORT_PCRE8
982 return_value8_1 = -1000;
983 return_value8_2 = -1000;
984 for (i = 0; i < 32; ++i)
985 ovector8_1[i] = -2;
986 for (i = 0; i < 32; ++i)
987 ovector8_2[i] = -2;
988 if (re8) {
989 setstack8(extra8);
990 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
991 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
992 return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
993 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
994 }
995 #endif
996
997 #ifdef SUPPORT_PCRE16
998 return_value16_1 = -1000;
999 return_value16_2 = -1000;
1000 for (i = 0; i < 32; ++i)
1001 ovector16_1[i] = -2;
1002 for (i = 0; i < 32; ++i)
1003 ovector16_2[i] = -2;
1004 if (re16) {
1005 setstack16(extra16);
1006 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1007 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1008 else
1009 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1010 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1011 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
1012 return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1013 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
1014 }
1015 #endif
1016
1017 /* If F_DIFF is set, just run the test, but do not compare the results.
1018 Segfaults can still be captured. */
1019
1020 is_successful = 1;
1021 if (!(current->start_offset & F_DIFF)) {
1022 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1023 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1024 /* All results must be the same. */
1025 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1026 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1027 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1028 total, current->pattern, current->input);
1029 is_successful = 0;
1030 } else if (return_value8_1 >= 0) {
1031 return_value8_1 *= 2;
1032 /* Transform back the results. */
1033 if (current->flags & PCRE_UTF8) {
1034 for (i = 0; i < return_value8_1; ++i) {
1035 if (ovector16_1[i] >= 0)
1036 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1037 if (ovector16_2[i] >= 0)
1038 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1039 }
1040 }
1041
1042 for (i = 0; i < return_value8_1; ++i)
1043 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1044 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1045 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1046 total, current->pattern, current->input);
1047 is_successful = 0;
1048 }
1049 }
1050 } else {
1051 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1052 /* Only the 8 bit and 16 bit results must be equal. */
1053 #ifdef SUPPORT_PCRE8
1054 if (return_value8_1 != return_value8_2) {
1055 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1056 return_value8_1, return_value8_2, total, current->pattern, current->input);
1057 is_successful = 0;
1058 } else if (return_value8_1 >= 0) {
1059 return_value8_1 *= 2;
1060 for (i = 0; i < return_value8_1; ++i)
1061 if (ovector8_1[i] != ovector8_2[i]) {
1062 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1063 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1064 is_successful = 0;
1065 }
1066 }
1067 #endif
1068
1069 #ifdef SUPPORT_PCRE16
1070 if (return_value16_1 != return_value16_2) {
1071 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1072 return_value16_1, return_value16_2, total, current->pattern, current->input);
1073 is_successful = 0;
1074 } else if (return_value16_1 >= 0) {
1075 return_value16_1 *= 2;
1076 for (i = 0; i < return_value16_1; ++i)
1077 if (ovector16_1[i] != ovector16_2[i]) {
1078 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1079 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1080 is_successful = 0;
1081 }
1082 }
1083 #endif
1084
1085 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1086 }
1087 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1088 }
1089
1090 if (is_successful) {
1091 #ifdef SUPPORT_PCRE8
1092 if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1093 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1094 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1095 total, current->pattern, current->input);
1096 is_successful = 0;
1097 }
1098
1099 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1100 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1101 total, current->pattern, current->input);
1102 is_successful = 0;
1103 }
1104 }
1105 #endif
1106 #ifdef SUPPORT_PCRE16
1107 if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1108 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1109 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1110 total, current->pattern, current->input);
1111 is_successful = 0;
1112 }
1113
1114 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1115 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1116 total, current->pattern, current->input);
1117 is_successful = 0;
1118 }
1119 }
1120 #endif
1121 }
1122
1123 if (is_successful)
1124 successful++;
1125
1126 #ifdef SUPPORT_PCRE8
1127 if (re8) {
1128 pcre_free_study(extra8);
1129 pcre_free(re8);
1130 }
1131 #endif
1132 #ifdef SUPPORT_PCRE16
1133 if (re16) {
1134 pcre16_free_study(extra16);
1135 pcre16_free(re16);
1136 }
1137 #endif
1138
1139 /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1140 printf(".");
1141 fflush(stdout);
1142 current++;
1143 }
1144 tables(1);
1145 #ifdef SUPPORT_PCRE8
1146 setstack8(NULL);
1147 #endif
1148 #ifdef SUPPORT_PCRE16
1149 setstack16(NULL);
1150 #endif
1151
1152 if (total == successful) {
1153 printf("\nAll JIT regression tests are successfully passed.\n");
1154 return 0;
1155 } else {
1156 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1157 return 1;
1158 }
1159 }
1160
1161 /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12