/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 929 - (show annotations) (download)
Fri Feb 24 11:07:47 2012 UTC (2 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 53274 byte(s)
(*MARK) support, set_SOM optimization and other fixes in JIT
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65 \xc3\xa9 = 0xe9 = 233 (e')
66 \xc3\x89 = 0xc9 = 201 (E')
67 \xc3\xa1 = 0xe1 = 225 (a')
68 \xc3\x81 = 0xc1 = 193 (A')
69 \xc8\xba = 0x23a = 570
70 \xe2\xb1\xa5 = 0x2c65 = 11365
71 \xe1\xbd\xb8 = 0x1f78 = 8056
72 \xe1\xbf\xb8 = 0x1ff8 = 8184
73 \xf0\x90\x90\x80 = 0x10400 = 66560
74 \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76 \xcc\x8d = 0x30d = 781
77 Special:
78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 */
84
85 static int regression_tests(void);
86
87 int main(void)
88 {
89 int jit = 0;
90 #ifdef SUPPORT_PCRE8
91 pcre_config(PCRE_CONFIG_JIT, &jit);
92 #else
93 pcre16_config(PCRE_CONFIG_JIT, &jit);
94 #endif
95 if (!jit) {
96 printf("JIT must be enabled to run pcre_jit_test\n");
97 return 1;
98 }
99 return regression_tests();
100 }
101
102 /* --------------------------------------------------------------------------------------- */
103
104 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106 #endif
107
108 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115
116 #define OFFSET_MASK 0x00ffff
117 #define F_NO8 0x010000
118 #define F_NO16 0x020000
119 #define F_NOMATCH 0x040000
120 #define F_DIFF 0x080000
121 #define F_FORCECONV 0x100000
122 #define F_PROPERTY 0x200000
123
124 struct regression_test_case {
125 int flags;
126 int start_offset;
127 const char *pattern;
128 const char *input;
129 };
130
131 static struct regression_test_case regression_test_cases[] = {
132 /* Constant strings. */
133 { MUA, 0, "AbC", "AbAbC" },
134 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136 { MA, 0, "[^a]", "aAbB" },
137 { CMA, 0, "[^m]", "mMnN" },
138 { MA, 0, "a[^b][^#]", "abacd" },
139 { CMA, 0, "A[^B][^E]", "abacd" },
140 { CMUA, 0, "[^x][^#]", "XxBll" },
141 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147 { MUA, 0, "[axd]", "sAXd" },
148 { CMUA, 0, "[axd]", "sAXd" },
149 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159 { PCRE_CASELESS, 0, "a1", "Aa1" },
160 { MA, 0, "\\Ca", "cda" },
161 { CMA, 0, "\\Ca", "CDA" },
162 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168
169 /* Assertions. */
170 { MUA, 0, "\\b[^A]", "A_B#" },
171 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173 { MAP, 0, "\\B", "_\xa1" },
174 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182 { MA, 1 | F_NOMATCH, "^", "\n" },
183 { 0, 0, "^ab", "ab" },
184 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190 { 0, 0, "ab$", "ab" },
191 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
192 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
193 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
197 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
198 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
199 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
200 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
201 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
202 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
203 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
204 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
205 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
206 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
207 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
208 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
212 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
213 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
214 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
215 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
218 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
219 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
220 { MA, 0, "\\Aa", "aaa" },
221 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
222 { MA, 1, "\\Ga", "aaa" },
223 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
224 { MA, 0, "a\\z", "aaa" },
225 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
226
227 /* Brackets. */
228 { MUA, 0, "(ab|bb|cd)", "bacde" },
229 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
230 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
231 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
232 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
233 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
234
235 /* Greedy and non-greedy ? operators. */
236 { MUA, 0, "(?:a)?a", "laab" },
237 { CMUA, 0, "(A)?A", "llaab" },
238 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
239 { MUA, 0, "(a)?a", "manm" },
240 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
241 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
242 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
243
244 /* Greedy and non-greedy + operators */
245 { MUA, 0, "(aa)+aa", "aaaaaaa" },
246 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
247 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
248 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
249 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
251 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
252
253 /* Greedy and non-greedy * operators */
254 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
255 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
256 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
257 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
258 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
259 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
260 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
261 { MA, 0, "((?:a|)*){0}a", "a" },
262
263 /* Combining ? + * operators */
264 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
265 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
267 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
268 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
269
270 /* Single character iterators. */
271 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
272 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
273 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
274 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
275 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
276 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
277 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
278 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
279 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
280 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
281 { MUA, 0, "(a?+[^b])+", "babaacacb" },
282 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
283 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
284 { CMUA, 0, "[c-f]+k", "DemmFke" },
285 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
286 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
287 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
288 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
289 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
290 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
291 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
292 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
293 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
294 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
295 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
296 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
297 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
298 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
299 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
300 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
302 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
303
304 /* Basic character sets. */
305 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
306 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
307 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
308 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
309 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
310 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
311
312 /* Unicode properties. */
313 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
314 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
315 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
316 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
317 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
318 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
320 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
321 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
323 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
324 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
325 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
326 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
327 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
328 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
329 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
330 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
331 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
332 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
333
334 /* Possible empty brackets. */
335 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
336 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
337 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
338 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
339 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
340 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
341 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
342 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
343 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
344 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
345
346 /* Start offset. */
347 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
348 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
350 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
351
352 /* Newline. */
353 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
355 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
356
357 /* Any character except newline or any newline. */
358 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
359 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
360 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
362 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
363 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
364 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
365 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
366 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
367 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
368 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
369 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
370 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
371 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
372 { MUA, 0, "\\R+", "ab\r\n\r" },
373 { MUA, 0, "\\R*", "ab\r\n\r" },
374 { MUA, 0, "\\R*", "\r\n\r" },
375 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
376 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
377 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
378 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
379 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
380 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
381 { MUA, 0, "\\R*\\R\\R", "\n\r" },
382 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
383 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
384
385 /* Atomic groups (no fallback from "next" direction). */
386 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
387 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
388 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
389 "bababcdedefgheijijklmlmnop" },
390 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
391 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
392 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
393 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
394 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
395 { MUA, 0, "(?>x|)*$", "aaa" },
396 { MUA, 0, "(?>(x)|)*$", "aaa" },
397 { MUA, 0, "(?>x|())*$", "aaa" },
398 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
399 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
400 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
401 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
402 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
403 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
404 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
405 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
406 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
407 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
408 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
409 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
410 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
411 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
412 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
413 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
415 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
416 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
417 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
418 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
419 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
420 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
421 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
422 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
423 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
424
425 /* Possessive quantifiers. */
426 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
427 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
428 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
429 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
430 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
431 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
432 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
433 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
434 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
435 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
436 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
437 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
438 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
439 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
440 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
441 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
442 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
443 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
444 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
445 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
446 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
447 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
448 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
449 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
450 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
451 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
452 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
453 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
454 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
455 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
456 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
457 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
458 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
459 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
461
462 /* Back references. */
463 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
464 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
465 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
466 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
467 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
468 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
470 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
471 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
472 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
473 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
474 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
475 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
477 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
478 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
479 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
480 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
481 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
482 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
483 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
484 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
485 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
486
487 /* Assertions. */
488 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
489 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
490 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
491 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
492 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
493 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
494 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
495 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
496 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
497 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
498 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
499 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
500 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
501 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
502 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
503 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
504 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
505 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
506 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
508 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
509 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
510 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
511 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
512
513 /* Not empty, ACCEPT, FAIL */
514 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
515 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
516 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
517 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
518 { MUA, 0, "a(*ACCEPT)b", "ab" },
519 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
520 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
521 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
522 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
523 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
524 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
525 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
526 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
527 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
528 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
529 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
530 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
531 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
532 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
533
534 /* Conditional blocks. */
535 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
536 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
537 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
538 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
539 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
540 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
541 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
543 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
545 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
547 { MUA, 0, "(?(?=a)ab)", "a" },
548 { MUA, 0, "(?(?<!b)c)", "b" },
549 { MUA, 0, "(?(DEFINE)a(b))", "a" },
550 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
551 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
552 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
553 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
554 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
555 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
556 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
557 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
558 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
559 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
560 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
561 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
562 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
563 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
564 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
565 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
566 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
567 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
568 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
569
570 /* Set start of match. */
571 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
572 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
573 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
574 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
575 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
576
577 /* First line. */
578 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
579 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
580 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
581 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
582 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
583 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
584 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
585 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
586 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
587 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
588 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
589 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
590 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
591 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
592 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
593 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
594 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
595
596 /* Recurse. */
597 { MUA, 0, "(a)(?1)", "aa" },
598 { MUA, 0, "((a))(?1)", "aa" },
599 { MUA, 0, "(b|a)(?1)", "aa" },
600 { MUA, 0, "(b|(a))(?1)", "aa" },
601 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
602 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
603 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
604 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
605 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
606 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
607 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
608 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
609 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
610 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
611 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
612 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
613 { MUA, 0, "b|<(?R)*>", "<<b>" },
614 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
615 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
616 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
617 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
618 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
619 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
620 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
621 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
622
623 /* 16 bit specific tests. */
624 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
625 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
626 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
627 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
628 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
629 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
630 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
631 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
632 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
633 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
634 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
635 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
636 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
637 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
638 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
639 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
640 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
642 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
643 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
645 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
646 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
647 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
648 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
649 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
650 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
651 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
652 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
653 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
654 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
655
656 /* Partial matching. */
657 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
658 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
659 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
660 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
661 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
662 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
663 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
664 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
665
666 /* (*MARK) verb. */
667 { MUA, 0, "a(*MARK:aa)a", "ababaa" },
668 { MUA, 0 | F_NOMATCH, "a(*:aa)a", "abab" },
669 { MUA, 0, "a(*:aa)(b(*:bb)b|bc)", "abc" },
670 { MUA, 0 | F_NOMATCH, "a(*:1)x|b(*:2)y", "abc" },
671 { MUA, 0, "(?>a(*:aa))b|ac", "ac" },
672 { MUA, 0, "(?(DEFINE)(a(*:aa)))(?1)", "a" },
673 { MUA, 0 | F_NOMATCH, "(?(DEFINE)((a)(*:aa)))(?1)b", "aa" },
674 { MUA, 0, "(?(DEFINE)(a(*:aa)))a(?1)b|aac", "aac" },
675 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
676 { MUA, 0, "(a(*:aa)){0}(?:b(?1)b)+", "babba" },
677 { MUA, 0 | F_NOMATCH, "(a(*:aa)){0}(?:b(?1)b)+", "ba" },
678 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b|c)+c", "babbab cc" },
679 { MUA, 0, "(a\\K(*:aa)){0}(?:b(?1)b)+", "babba" },
680 { MUA, 0 | F_NOMATCH, "(a\\K(*:aa)){0}(?:b(?1)b)+", "ba" },
681
682 /* Deep recursion. */
683 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
684 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
685 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
686
687 /* Deep recursion: Stack limit reached. */
688 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
689 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
690 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
691 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
692 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
693
694 { 0, 0, NULL, NULL }
695 };
696
697 static const unsigned char *tables(int mode)
698 {
699 /* The purpose of this function to allow valgrind
700 for reporting invalid reads and writes. */
701 static unsigned char *tables_copy;
702 const char *errorptr;
703 int erroroffset;
704 unsigned char *default_tables;
705 #ifdef SUPPORT_PCRE8
706 pcre *regex;
707 char null_str[1] = { 0 };
708 #else
709 pcre16 *regex;
710 PCRE_UCHAR16 null_str[1] = { 0 };
711 #endif
712
713 if (mode) {
714 if (tables_copy)
715 free(tables_copy);
716 tables_copy = NULL;
717 return NULL;
718 }
719
720 if (tables_copy)
721 return tables_copy;
722
723 default_tables = NULL;
724 #ifdef SUPPORT_PCRE8
725 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
726 if (regex) {
727 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
728 pcre_free(regex);
729 }
730 #else
731 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
732 if (regex) {
733 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
734 pcre16_free(regex);
735 }
736 #endif
737 /* Shouldn't ever happen. */
738 if (!default_tables)
739 return NULL;
740
741 /* Unfortunately this value cannot get from pcre_fullinfo.
742 Since this is a test program, this is acceptable at the moment. */
743 tables_copy = (unsigned char *)malloc(1088);
744 if (!tables_copy)
745 return NULL;
746
747 memcpy(tables_copy, default_tables, 1088);
748 return tables_copy;
749 }
750
751 #ifdef SUPPORT_PCRE8
752 static pcre_jit_stack* callback8(void *arg)
753 {
754 return (pcre_jit_stack *)arg;
755 }
756 #endif
757
758 #ifdef SUPPORT_PCRE16
759 static pcre16_jit_stack* callback16(void *arg)
760 {
761 return (pcre16_jit_stack *)arg;
762 }
763 #endif
764
765 #ifdef SUPPORT_PCRE8
766 static void setstack8(pcre_extra *extra)
767 {
768 static pcre_jit_stack *stack;
769
770 if (!extra) {
771 if (stack)
772 pcre_jit_stack_free(stack);
773 stack = NULL;
774 return;
775 }
776
777 if (!stack)
778 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
779 /* Extra can be NULL. */
780 pcre_assign_jit_stack(extra, callback8, stack);
781 }
782 #endif /* SUPPORT_PCRE8 */
783
784 #ifdef SUPPORT_PCRE16
785 static void setstack16(pcre16_extra *extra)
786 {
787 static pcre16_jit_stack *stack;
788
789 if (!extra) {
790 if (stack)
791 pcre16_jit_stack_free(stack);
792 stack = NULL;
793 return;
794 }
795
796 if (!stack)
797 stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
798 /* Extra can be NULL. */
799 pcre16_assign_jit_stack(extra, callback16, stack);
800 }
801 #endif /* SUPPORT_PCRE8 */
802
803 #ifdef SUPPORT_PCRE16
804
805 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
806 {
807 unsigned char *iptr = (unsigned char*)input;
808 unsigned short *optr = (unsigned short *)output;
809 unsigned int c;
810
811 if (max_length == 0)
812 return 0;
813
814 while (*iptr && max_length > 1) {
815 c = 0;
816 if (offsetmap)
817 *offsetmap++ = (int)(iptr - (unsigned char*)input);
818
819 if (!(*iptr & 0x80))
820 c = *iptr++;
821 else if (!(*iptr & 0x20)) {
822 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
823 iptr += 2;
824 } else if (!(*iptr & 0x10)) {
825 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
826 iptr += 3;
827 } else if (!(*iptr & 0x08)) {
828 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
829 iptr += 4;
830 }
831
832 if (c < 65536) {
833 *optr++ = c;
834 max_length--;
835 } else if (max_length <= 2) {
836 *optr = '\0';
837 return (int)(optr - (unsigned short *)output);
838 } else {
839 c -= 0x10000;
840 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
841 *optr++ = 0xdc00 | (c & 0x3ff);
842 max_length -= 2;
843 if (offsetmap)
844 offsetmap++;
845 }
846 }
847 if (offsetmap)
848 *offsetmap = (int)(iptr - (unsigned char*)input);
849 *optr = '\0';
850 return (int)(optr - (unsigned short *)output);
851 }
852
853 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
854 {
855 unsigned char *iptr = (unsigned char*)input;
856 unsigned short *optr = (unsigned short *)output;
857
858 if (max_length == 0)
859 return 0;
860
861 while (*iptr && max_length > 1) {
862 *optr++ = *iptr++;
863 max_length--;
864 }
865 *optr = '\0';
866 return (int)(optr - (unsigned short *)output);
867 }
868
869 #define REGTEST_MAX_LENGTH 4096
870 static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
871 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
872
873 #endif /* SUPPORT_PCRE16 */
874
875 static int check_ascii(const char *input)
876 {
877 const unsigned char *ptr = (unsigned char *)input;
878 while (*ptr) {
879 if (*ptr > 127)
880 return 0;
881 ptr++;
882 }
883 return 1;
884 }
885
886 static int regression_tests(void)
887 {
888 struct regression_test_case *current = regression_test_cases;
889 const char *error;
890 char *cpu_info;
891 int i, err_offs;
892 int is_successful, is_ascii_pattern, is_ascii_input;
893 int total = 0;
894 int successful = 0;
895 int successful_row = 0;
896 int counter = 0;
897 int study_mode;
898 #ifdef SUPPORT_PCRE8
899 pcre *re8;
900 pcre_extra *extra8;
901 pcre_extra dummy_extra8;
902 int ovector8_1[32];
903 int ovector8_2[32];
904 int return_value8_1, return_value8_2;
905 unsigned char *mark8_1, *mark8_2;
906 int utf8 = 0, ucp8 = 0;
907 int disabled_flags8 = 0;
908 #endif
909 #ifdef SUPPORT_PCRE16
910 pcre16 *re16;
911 pcre16_extra *extra16;
912 pcre16_extra dummy_extra16;
913 int ovector16_1[32];
914 int ovector16_2[32];
915 int return_value16_1, return_value16_2;
916 PCRE_UCHAR16 *mark16_1, *mark16_2;
917 int utf16 = 0, ucp16 = 0;
918 int disabled_flags16 = 0;
919 int length16;
920 #endif
921
922 /* This test compares the behaviour of interpreter and JIT. Although disabling
923 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
924 still considered successful from pcre_jit_test point of view. */
925
926 #ifdef SUPPORT_PCRE8
927 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
928 #else
929 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
930 #endif
931
932 printf("Running JIT regression tests\n");
933 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
934
935 #ifdef SUPPORT_PCRE8
936 pcre_config(PCRE_CONFIG_UTF8, &utf8);
937 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
938 if (!utf8)
939 disabled_flags8 |= PCRE_UTF8;
940 if (!ucp8)
941 disabled_flags8 |= PCRE_UCP;
942 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
943 #endif
944 #ifdef SUPPORT_PCRE16
945 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
946 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
947 if (!utf16)
948 disabled_flags16 |= PCRE_UTF8;
949 if (!ucp16)
950 disabled_flags16 |= PCRE_UCP;
951 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
952 #endif
953
954 while (current->pattern) {
955 /* printf("\nPattern: %s :\n", current->pattern); */
956 total++;
957 if (current->start_offset & F_PROPERTY) {
958 is_ascii_pattern = 0;
959 is_ascii_input = 0;
960 } else {
961 is_ascii_pattern = check_ascii(current->pattern);
962 is_ascii_input = check_ascii(current->input);
963 }
964
965 if (current->flags & PCRE_PARTIAL_SOFT)
966 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
967 else if (current->flags & PCRE_PARTIAL_HARD)
968 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
969 else
970 study_mode = PCRE_STUDY_JIT_COMPILE;
971 error = NULL;
972 #ifdef SUPPORT_PCRE8
973 re8 = NULL;
974 if (!(current->start_offset & F_NO8))
975 re8 = pcre_compile(current->pattern,
976 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags8),
977 &error, &err_offs, tables(0));
978
979 extra8 = NULL;
980 if (re8) {
981 error = NULL;
982 extra8 = pcre_study(re8, study_mode, &error);
983 if (!extra8) {
984 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
985 pcre_free(re8);
986 re8 = NULL;
987 }
988 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
989 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
990 pcre_free_study(extra8);
991 pcre_free(re8);
992 re8 = NULL;
993 }
994 extra8->flags |= PCRE_EXTRA_MARK;
995 } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
996 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
997 #endif
998 #ifdef SUPPORT_PCRE16
999 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1000 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
1001 else
1002 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
1003
1004 re16 = NULL;
1005 if (!(current->start_offset & F_NO16))
1006 re16 = pcre16_compile(regtest_buf,
1007 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags16),
1008 &error, &err_offs, tables(0));
1009
1010 extra16 = NULL;
1011 if (re16) {
1012 error = NULL;
1013 extra16 = pcre16_study(re16, study_mode, &error);
1014 if (!extra16) {
1015 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
1016 pcre16_free(re16);
1017 re16 = NULL;
1018 }
1019 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
1020 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
1021 pcre16_free_study(extra16);
1022 pcre16_free(re16);
1023 re16 = NULL;
1024 }
1025 extra16->flags |= PCRE_EXTRA_MARK;
1026 } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1027 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
1028 #endif
1029
1030 counter++;
1031 if ((counter & 0x3) != 0) {
1032 #ifdef SUPPORT_PCRE8
1033 setstack8(NULL);
1034 #endif
1035 #ifdef SUPPORT_PCRE16
1036 setstack16(NULL);
1037 #endif
1038 }
1039
1040 #ifdef SUPPORT_PCRE8
1041 return_value8_1 = -1000;
1042 return_value8_2 = -1000;
1043 for (i = 0; i < 32; ++i)
1044 ovector8_1[i] = -2;
1045 for (i = 0; i < 32; ++i)
1046 ovector8_2[i] = -2;
1047 if (re8) {
1048 mark8_1 = NULL;
1049 mark8_2 = NULL;
1050 setstack8(extra8);
1051 extra8->mark = &mark8_1;
1052 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1053 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1054 memset(&dummy_extra8, 0, sizeof(pcre_extra));
1055 dummy_extra8.flags = PCRE_EXTRA_MARK;
1056 dummy_extra8.mark = &mark8_2;
1057 return_value8_2 = pcre_exec(re8, &dummy_extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1058 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1059 }
1060 #endif
1061
1062 #ifdef SUPPORT_PCRE16
1063 return_value16_1 = -1000;
1064 return_value16_2 = -1000;
1065 for (i = 0; i < 32; ++i)
1066 ovector16_1[i] = -2;
1067 for (i = 0; i < 32; ++i)
1068 ovector16_2[i] = -2;
1069 if (re16) {
1070 mark16_1 = NULL;
1071 mark16_2 = NULL;
1072 setstack16(extra16);
1073 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1074 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1075 else
1076 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1077 extra16->mark = &mark16_1;
1078 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1079 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1080 memset(&dummy_extra16, 0, sizeof(pcre16_extra));
1081 dummy_extra16.flags = PCRE_EXTRA_MARK;
1082 dummy_extra16.mark = &mark16_2;
1083 return_value16_2 = pcre16_exec(re16, &dummy_extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1084 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1085 }
1086 #endif
1087
1088 /* printf("[%d-%d|%d-%d|%d-%d]%s", return_value8_1, return_value16_1, ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1089
1090 /* If F_DIFF is set, just run the test, but do not compare the results.
1091 Segfaults can still be captured. */
1092
1093 is_successful = 1;
1094 if (!(current->start_offset & F_DIFF)) {
1095 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1096 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1097 /* All results must be the same. */
1098 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1099 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1100 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1101 total, current->pattern, current->input);
1102 is_successful = 0;
1103 } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) {
1104 if (return_value8_1 == PCRE_ERROR_PARTIAL) {
1105 return_value8_1 = 2;
1106 return_value16_1 = 2;
1107 } else {
1108 return_value8_1 *= 2;
1109 return_value16_1 *= 2;
1110 }
1111
1112 /* Transform back the results. */
1113 if (current->flags & PCRE_UTF8) {
1114 for (i = 0; i < return_value8_1; ++i) {
1115 if (ovector16_1[i] >= 0)
1116 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1117 if (ovector16_2[i] >= 0)
1118 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1119 }
1120 }
1121
1122 for (i = 0; i < return_value8_1; ++i)
1123 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1124 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1125 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1126 total, current->pattern, current->input);
1127 is_successful = 0;
1128 }
1129 }
1130 } else {
1131 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1132 /* Only the 8 bit and 16 bit results must be equal. */
1133 #ifdef SUPPORT_PCRE8
1134 if (return_value8_1 != return_value8_2) {
1135 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1136 return_value8_1, return_value8_2, total, current->pattern, current->input);
1137 is_successful = 0;
1138 } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) {
1139 if (return_value8_1 == PCRE_ERROR_PARTIAL)
1140 return_value8_1 = 2;
1141 else
1142 return_value8_1 *= 2;
1143
1144 for (i = 0; i < return_value8_1; ++i)
1145 if (ovector8_1[i] != ovector8_2[i]) {
1146 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1147 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1148 is_successful = 0;
1149 }
1150 }
1151 #endif
1152
1153 #ifdef SUPPORT_PCRE16
1154 if (return_value16_1 != return_value16_2) {
1155 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1156 return_value16_1, return_value16_2, total, current->pattern, current->input);
1157 is_successful = 0;
1158 } else if (return_value16_1 >= 0 || return_value16_1 == PCRE_ERROR_PARTIAL) {
1159 if (return_value16_1 == PCRE_ERROR_PARTIAL)
1160 return_value16_1 = 2;
1161 else
1162 return_value16_1 *= 2;
1163
1164 for (i = 0; i < return_value16_1; ++i)
1165 if (ovector16_1[i] != ovector16_2[i]) {
1166 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1167 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1168 is_successful = 0;
1169 }
1170 }
1171 #endif
1172
1173 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1174 }
1175 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1176 }
1177
1178 if (is_successful) {
1179 #ifdef SUPPORT_PCRE8
1180 if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1181 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1182 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1183 total, current->pattern, current->input);
1184 is_successful = 0;
1185 }
1186
1187 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1188 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1189 total, current->pattern, current->input);
1190 is_successful = 0;
1191 }
1192 }
1193 #endif
1194 #ifdef SUPPORT_PCRE16
1195 if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1196 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1197 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1198 total, current->pattern, current->input);
1199 is_successful = 0;
1200 }
1201
1202 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1203 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1204 total, current->pattern, current->input);
1205 is_successful = 0;
1206 }
1207 }
1208 #endif
1209 }
1210
1211 if (is_successful) {
1212 #ifdef SUPPORT_PCRE8
1213 if (mark8_1 != mark8_2) {
1214 printf("8 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1215 total, current->pattern, current->input);
1216 is_successful = 0;
1217 }
1218 #endif
1219 #ifdef SUPPORT_PCRE16
1220 if (mark16_1 != mark16_2) {
1221 printf("16 bit: Mark value mismatch: [%d] '%s' @ '%s'\n",
1222 total, current->pattern, current->input);
1223 is_successful = 0;
1224 }
1225 #endif
1226 }
1227
1228 #ifdef SUPPORT_PCRE8
1229 if (re8) {
1230 pcre_free_study(extra8);
1231 pcre_free(re8);
1232 }
1233 #endif
1234 #ifdef SUPPORT_PCRE16
1235 if (re16) {
1236 pcre16_free_study(extra16);
1237 pcre16_free(re16);
1238 }
1239 #endif
1240
1241 if (is_successful) {
1242 successful++;
1243 successful_row++;
1244 printf(".");
1245 if (successful_row >= 60) {
1246 successful_row = 0;
1247 printf("\n");
1248 }
1249 } else
1250 successful_row = 0;
1251
1252 fflush(stdout);
1253 current++;
1254 }
1255 tables(1);
1256 #ifdef SUPPORT_PCRE8
1257 setstack8(NULL);
1258 #endif
1259 #ifdef SUPPORT_PCRE16
1260 setstack16(NULL);
1261 #endif
1262
1263 if (total == successful) {
1264 printf("\nAll JIT regression tests are successfully passed.\n");
1265 return 0;
1266 } else {
1267 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1268 return 1;
1269 }
1270 }
1271
1272 /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12