/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 884 - (show annotations) (download)
Tue Jan 17 11:52:43 2012 UTC (2 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 49921 byte(s)
JIT test prints cpu info
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65 \xc3\xa9 = 0xe9 = 233 (e')
66 \xc3\x89 = 0xc9 = 201 (E')
67 \xc3\xa1 = 0xe1 = 225 (a')
68 \xc3\x81 = 0xc1 = 193 (A')
69 \xc8\xba = 0x23a = 570
70 \xe2\xb1\xa5 = 0x2c65 = 11365
71 \xe1\xbd\xb8 = 0x1f78 = 8056
72 \xe1\xbf\xb8 = 0x1ff8 = 8184
73 \xf0\x90\x90\x80 = 0x10400 = 66560
74 \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76 \xcc\x8d = 0x30d = 781
77 Special:
78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 */
84
85 static int regression_tests(void);
86
87 int main(void)
88 {
89 int jit = 0;
90 #ifdef SUPPORT_PCRE8
91 pcre_config(PCRE_CONFIG_JIT, &jit);
92 #else
93 pcre16_config(PCRE_CONFIG_JIT, &jit);
94 #endif
95 if (!jit) {
96 printf("JIT must be enabled to run pcre_jit_test\n");
97 return 1;
98 }
99 return regression_tests();
100 }
101
102 /* --------------------------------------------------------------------------------------- */
103
104 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106 #endif
107
108 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115
116 #define OFFSET_MASK 0x00ffff
117 #define F_NO8 0x010000
118 #define F_NO16 0x020000
119 #define F_NOMATCH 0x040000
120 #define F_DIFF 0x080000
121 #define F_FORCECONV 0x100000
122 #define F_PROPERTY 0x200000
123
124 struct regression_test_case {
125 int flags;
126 int start_offset;
127 const char *pattern;
128 const char *input;
129 };
130
131 static struct regression_test_case regression_test_cases[] = {
132 /* Constant strings. */
133 { MUA, 0, "AbC", "AbAbC" },
134 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136 { MA, 0, "[^a]", "aAbB" },
137 { CMA, 0, "[^m]", "mMnN" },
138 { MA, 0, "a[^b][^#]", "abacd" },
139 { CMA, 0, "A[^B][^E]", "abacd" },
140 { CMUA, 0, "[^x][^#]", "XxBll" },
141 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147 { MUA, 0, "[axd]", "sAXd" },
148 { CMUA, 0, "[axd]", "sAXd" },
149 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159 { PCRE_CASELESS, 0, "a1", "Aa1" },
160 { MA, 0, "\\Ca", "cda" },
161 { CMA, 0, "\\Ca", "CDA" },
162 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168
169 /* Assertions. */
170 { MUA, 0, "\\b[^A]", "A_B#" },
171 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173 { MAP, 0, "\\B", "_\xa1" },
174 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182 { MA, 1 | F_NOMATCH, "^", "\n" },
183 { 0, 0, "^ab", "ab" },
184 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190 { 0, 0, "ab$", "ab" },
191 { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
194 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
196 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
197 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
198 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
199 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
200 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
201 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
202 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
203 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
204 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
205 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
206 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
207 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
208 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
212 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
213 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
214 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
215 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
217 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
218 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
219 { MA, 0, "\\Aa", "aaa" },
220 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
221 { MA, 1, "\\Ga", "aaa" },
222 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
223 { MA, 0, "a\\z", "aaa" },
224 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
225
226 /* Brackets. */
227 { MUA, 0, "(ab|bb|cd)", "bacde" },
228 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
229 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
230 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
231 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
232 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
233
234 /* Greedy and non-greedy ? operators. */
235 { MUA, 0, "(?:a)?a", "laab" },
236 { CMUA, 0, "(A)?A", "llaab" },
237 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
238 { MUA, 0, "(a)?a", "manm" },
239 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
240 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
241 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
242
243 /* Greedy and non-greedy + operators */
244 { MUA, 0, "(aa)+aa", "aaaaaaa" },
245 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
246 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
247 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
248 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
249 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
251
252 /* Greedy and non-greedy * operators */
253 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
254 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
255 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
256 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
257 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
258 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
259 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
260 { MA, 0, "((?:a|)*){0}a", "a" },
261
262 /* Combining ? + * operators */
263 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
264 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
265 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
267 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
268
269 /* Single character iterators. */
270 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
271 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
272 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
273 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
274 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
275 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
276 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
277 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
278 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
279 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
280 { MUA, 0, "(a?+[^b])+", "babaacacb" },
281 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
282 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
283 { CMUA, 0, "[c-f]+k", "DemmFke" },
284 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
285 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
286 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
287 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
288 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
289 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
290 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
291 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
292 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
293 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
294 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
295 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
296 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
297 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
298 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
299 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
300 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
302
303 /* Basic character sets. */
304 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
305 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
306 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
307 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
308 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
309 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
310
311 /* Unicode properties. */
312 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
313 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
314 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
315 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
316 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
317 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
318 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
320 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
321 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
323 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
324 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
325 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
326 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
327 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
328 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
329 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
330 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
331 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
332
333 /* Possible empty brackets. */
334 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
335 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
336 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
337 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
338 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
339 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
340 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
341 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
342 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
343 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
344
345 /* Start offset. */
346 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
347 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
348 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
350
351 /* Newline. */
352 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
353 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
355
356 /* Any character except newline or any newline. */
357 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
358 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
359 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
360 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
362 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
363 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
364 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
365 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
366 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
367 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
368 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
369 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
370 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
371 { MUA, 0, "\\R+", "ab\r\n\r" },
372 { MUA, 0, "\\R*", "ab\r\n\r" },
373 { MUA, 0, "\\R*", "\r\n\r" },
374 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
375 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
376 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
377 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
378 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
379 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
380 { MUA, 0, "\\R*\\R\\R", "\n\r" },
381 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
382 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
383
384 /* Atomic groups (no fallback from "next" direction). */
385 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
386 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
387 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
388 "bababcdedefgheijijklmlmnop" },
389 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
390 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
391 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
392 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
393 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
394 { MUA, 0, "(?>x|)*$", "aaa" },
395 { MUA, 0, "(?>(x)|)*$", "aaa" },
396 { MUA, 0, "(?>x|())*$", "aaa" },
397 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
398 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
399 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
400 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
401 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
402 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
403 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
404 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
405 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
406 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
407 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
408 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
409 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
410 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
411 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
412 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
413 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
415 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
416 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
417 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
418 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
419 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
420 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
421 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
422 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
423
424 /* Possessive quantifiers. */
425 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
426 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
427 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
428 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
429 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
430 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
431 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
432 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
433 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
434 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
435 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
436 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
437 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
438 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
439 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
440 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
441 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
442 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
443 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
444 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
445 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
446 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
447 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
448 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
449 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
450 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
451 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
452 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
453 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
454 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
455 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
456 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
457 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
458 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
459 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460
461 /* Back references. */
462 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
463 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
464 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
465 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
466 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
467 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
468 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
470 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
471 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
472 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
473 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
474 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
475 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
477 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
478 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
479 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
480 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
481 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
482 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
483 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
484 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
485
486 /* Assertions. */
487 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
488 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
489 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
490 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
491 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
492 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
493 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
494 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
495 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
496 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
497 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
498 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
499 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
500 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
501 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
502 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
503 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
504 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
505 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
506 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
508 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
509 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
510 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
511
512 /* Not empty, ACCEPT, FAIL */
513 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
514 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
515 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
516 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
517 { MUA, 0, "a(*ACCEPT)b", "ab" },
518 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
519 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
520 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
521 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
522 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
523 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
524 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
525 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
526 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
527 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
528 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
529 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
530 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
531 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
532
533 /* Conditional blocks. */
534 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
535 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
536 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
537 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
538 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
539 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
540 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
541 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
543 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
545 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?=a)ab)", "a" },
547 { MUA, 0, "(?(?<!b)c)", "b" },
548 { MUA, 0, "(?(DEFINE)a(b))", "a" },
549 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
550 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
551 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
552 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
553 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
554 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
555 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
556 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
557 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
558 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
559 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
560 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
561 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
562 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
563 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
564 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
565 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
566 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
567 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
568
569 /* Set start of match. */
570 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
571 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
572 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
573 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
574 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
575
576 /* First line. */
577 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
578 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
579 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
580 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
581 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
582 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
583 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
584 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
585 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
586 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
587 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
588 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
589 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
590 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
591 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
592 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
593 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
594
595 /* Recurse. */
596 { MUA, 0, "(a)(?1)", "aa" },
597 { MUA, 0, "((a))(?1)", "aa" },
598 { MUA, 0, "(b|a)(?1)", "aa" },
599 { MUA, 0, "(b|(a))(?1)", "aa" },
600 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
601 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
602 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
603 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
604 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
605 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
606 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
607 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
608 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
609 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
610 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
611 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
612 { MUA, 0, "b|<(?R)*>", "<<b>" },
613 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
614 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
615 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
616 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
617 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
618 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
619 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
620 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
621
622 /* 16 bit specific tests. */
623 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
624 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
625 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
626 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
627 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
628 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
629 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
630 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
631 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
632 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
633 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
634 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
635 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
636 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
637 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
638 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
639 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
640 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
642 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
643 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
645 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
646 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
647 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
648 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
649 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
650 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
651 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
652 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
653 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
654
655 /* Deep recursion. */
656 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
657 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
658 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
659
660 /* Deep recursion: Stack limit reached. */
661 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
662 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
663 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
664 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
665 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
666
667 { 0, 0, NULL, NULL }
668 };
669
670 static const unsigned char *tables(int mode)
671 {
672 /* The purpose of this function to allow valgrind
673 for reporting invalid reads and writes. */
674 static unsigned char *tables_copy;
675 const char *errorptr;
676 int erroroffset;
677 const unsigned char *default_tables;
678 #ifdef SUPPORT_PCRE8
679 pcre *regex;
680 char null_str[1] = { 0 };
681 #else
682 pcre16 *regex;
683 PCRE_UCHAR16 null_str[1] = { 0 };
684 #endif
685
686 if (mode) {
687 if (tables_copy)
688 free(tables_copy);
689 tables_copy = NULL;
690 return NULL;
691 }
692
693 if (tables_copy)
694 return tables_copy;
695
696 default_tables = NULL;
697 #ifdef SUPPORT_PCRE8
698 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
699 if (regex) {
700 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
701 pcre_free(regex);
702 }
703 #else
704 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
705 if (regex) {
706 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
707 pcre16_free(regex);
708 }
709 #endif
710 /* Shouldn't ever happen. */
711 if (!default_tables)
712 return NULL;
713
714 /* Unfortunately this value cannot get from pcre_fullinfo.
715 Since this is a test program, this is acceptable at the moment. */
716 tables_copy = (unsigned char *)malloc(1088);
717 if (!tables_copy)
718 return NULL;
719
720 memcpy(tables_copy, default_tables, 1088);
721 return tables_copy;
722 }
723
724 #ifdef SUPPORT_PCRE8
725 static pcre_jit_stack* callback8(void *arg)
726 {
727 return (pcre_jit_stack *)arg;
728 }
729 #endif
730
731 #ifdef SUPPORT_PCRE16
732 static pcre16_jit_stack* callback16(void *arg)
733 {
734 return (pcre16_jit_stack *)arg;
735 }
736 #endif
737
738 #ifdef SUPPORT_PCRE8
739 static void setstack8(pcre_extra *extra)
740 {
741 static pcre_jit_stack *stack;
742
743 if (!extra) {
744 if (stack)
745 pcre_jit_stack_free(stack);
746 stack = NULL;
747 return;
748 }
749
750 if (!stack)
751 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
752 /* Extra can be NULL. */
753 pcre_assign_jit_stack(extra, callback8, stack);
754 }
755 #endif /* SUPPORT_PCRE8 */
756
757 #ifdef SUPPORT_PCRE16
758 static void setstack16(pcre16_extra *extra)
759 {
760 static pcre16_jit_stack *stack;
761
762 if (!extra) {
763 if (stack)
764 pcre16_jit_stack_free(stack);
765 stack = NULL;
766 return;
767 }
768
769 if (!stack)
770 stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
771 /* Extra can be NULL. */
772 pcre16_assign_jit_stack(extra, callback16, stack);
773 }
774 #endif /* SUPPORT_PCRE8 */
775
776 #ifdef SUPPORT_PCRE16
777
778 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
779 {
780 unsigned char *iptr = (unsigned char*)input;
781 unsigned short *optr = (unsigned short *)output;
782 unsigned int c;
783
784 if (max_length == 0)
785 return 0;
786
787 while (*iptr && max_length > 1) {
788 c = 0;
789 if (offsetmap)
790 *offsetmap++ = (int)(iptr - (unsigned char*)input);
791
792 if (!(*iptr & 0x80))
793 c = *iptr++;
794 else if (!(*iptr & 0x20)) {
795 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
796 iptr += 2;
797 } else if (!(*iptr & 0x10)) {
798 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
799 iptr += 3;
800 } else if (!(*iptr & 0x08)) {
801 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
802 iptr += 4;
803 }
804
805 if (c < 65536) {
806 *optr++ = c;
807 max_length--;
808 } else if (max_length <= 2) {
809 *optr = '\0';
810 return (int)(optr - (unsigned short *)output);
811 } else {
812 c -= 0x10000;
813 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
814 *optr++ = 0xdc00 | (c & 0x3ff);
815 max_length -= 2;
816 if (offsetmap)
817 offsetmap++;
818 }
819 }
820 if (offsetmap)
821 *offsetmap = (int)(iptr - (unsigned char*)input);
822 *optr = '\0';
823 return (int)(optr - (unsigned short *)output);
824 }
825
826 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
827 {
828 unsigned char *iptr = (unsigned char*)input;
829 unsigned short *optr = (unsigned short *)output;
830
831 if (max_length == 0)
832 return 0;
833
834 while (*iptr && max_length > 1) {
835 *optr++ = *iptr++;
836 max_length--;
837 }
838 *optr = '\0';
839 return (int)(optr - (unsigned short *)output);
840 }
841
842 #define REGTEST_MAX_LENGTH 4096
843 static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
844 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
845
846 #endif /* SUPPORT_PCRE16 */
847
848 static int check_ascii(const char *input)
849 {
850 const unsigned char *ptr = (unsigned char *)input;
851 while (*ptr) {
852 if (*ptr > 127)
853 return 0;
854 ptr++;
855 }
856 return 1;
857 }
858
859 static int regression_tests(void)
860 {
861 struct regression_test_case *current = regression_test_cases;
862 const char *error;
863 const char *cpu_info;
864 int i, err_offs;
865 int is_successful, is_ascii_pattern, is_ascii_input;
866 int total = 0;
867 int successful = 0;
868 int counter = 0;
869 #ifdef SUPPORT_PCRE8
870 pcre *re8;
871 pcre_extra *extra8;
872 int ovector8_1[32];
873 int ovector8_2[32];
874 int return_value8_1, return_value8_2;
875 int utf8 = 0, ucp8 = 0;
876 int disabled_flags8 = 0;
877 #endif
878 #ifdef SUPPORT_PCRE16
879 pcre16 *re16;
880 pcre16_extra *extra16;
881 int ovector16_1[32];
882 int ovector16_2[32];
883 int return_value16_1, return_value16_2;
884 int utf16 = 0, ucp16 = 0;
885 int disabled_flags16 = 0;
886 int length16;
887 #endif
888
889 /* This test compares the behaviour of interpreter and JIT. Although disabling
890 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
891 still considered successful from pcre_jit_test point of view. */
892
893 #ifdef SUPPORT_PCRE8
894 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
895 #else
896 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
897 #endif
898
899 printf("Running JIT regression tests\n");
900 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
901
902 #ifdef SUPPORT_PCRE8
903 pcre_config(PCRE_CONFIG_UTF8, &utf8);
904 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
905 if (!utf8)
906 disabled_flags8 |= PCRE_UTF8;
907 if (!ucp8)
908 disabled_flags8 |= PCRE_UCP;
909 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
910 #endif
911 #ifdef SUPPORT_PCRE16
912 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
913 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
914 if (!utf16)
915 disabled_flags16 |= PCRE_UTF8;
916 if (!ucp16)
917 disabled_flags16 |= PCRE_UCP;
918 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
919 #endif
920
921 while (current->pattern) {
922 /* printf("\nPattern: %s :\n", current->pattern); */
923 total++;
924 if (current->start_offset & F_PROPERTY) {
925 is_ascii_pattern = 0;
926 is_ascii_input = 0;
927 } else {
928 is_ascii_pattern = check_ascii(current->pattern);
929 is_ascii_input = check_ascii(current->input);
930 }
931
932 error = NULL;
933 #ifdef SUPPORT_PCRE8
934 re8 = NULL;
935 if (!(current->start_offset & F_NO8))
936 re8 = pcre_compile(current->pattern,
937 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
938 &error, &err_offs, tables(0));
939
940 extra8 = NULL;
941 if (re8) {
942 error = NULL;
943 extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
944 if (!extra8) {
945 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
946 pcre_free(re8);
947 re8 = NULL;
948 }
949 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
950 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
951 pcre_free_study(extra8);
952 pcre_free(re8);
953 re8 = NULL;
954 }
955 } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
956 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
957 #endif
958 #ifdef SUPPORT_PCRE16
959 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
960 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
961 else
962 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
963
964 re16 = NULL;
965 if (!(current->start_offset & F_NO16))
966 re16 = pcre16_compile(regtest_buf,
967 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
968 &error, &err_offs, tables(0));
969
970 extra16 = NULL;
971 if (re16) {
972 error = NULL;
973 extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
974 if (!extra16) {
975 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
976 pcre16_free(re16);
977 re16 = NULL;
978 }
979 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
980 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
981 pcre16_free_study(extra16);
982 pcre16_free(re16);
983 re16 = NULL;
984 }
985 } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
986 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
987 #endif
988
989 counter++;
990 if ((counter & 0x3) != 0) {
991 #ifdef SUPPORT_PCRE8
992 setstack8(NULL);
993 #endif
994 #ifdef SUPPORT_PCRE16
995 setstack16(NULL);
996 #endif
997 }
998
999 #ifdef SUPPORT_PCRE8
1000 return_value8_1 = -1000;
1001 return_value8_2 = -1000;
1002 for (i = 0; i < 32; ++i)
1003 ovector8_1[i] = -2;
1004 for (i = 0; i < 32; ++i)
1005 ovector8_2[i] = -2;
1006 if (re8) {
1007 setstack8(extra8);
1008 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1009 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
1010 return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1011 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
1012 }
1013 #endif
1014
1015 #ifdef SUPPORT_PCRE16
1016 return_value16_1 = -1000;
1017 return_value16_2 = -1000;
1018 for (i = 0; i < 32; ++i)
1019 ovector16_1[i] = -2;
1020 for (i = 0; i < 32; ++i)
1021 ovector16_2[i] = -2;
1022 if (re16) {
1023 setstack16(extra16);
1024 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1025 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1026 else
1027 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1028 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1029 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
1030 return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1031 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
1032 }
1033 #endif
1034
1035 /* If F_DIFF is set, just run the test, but do not compare the results.
1036 Segfaults can still be captured. */
1037
1038 is_successful = 1;
1039 if (!(current->start_offset & F_DIFF)) {
1040 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1041 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1042 /* All results must be the same. */
1043 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1044 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1045 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1046 total, current->pattern, current->input);
1047 is_successful = 0;
1048 } else if (return_value8_1 >= 0) {
1049 return_value8_1 *= 2;
1050 /* Transform back the results. */
1051 if (current->flags & PCRE_UTF8) {
1052 for (i = 0; i < return_value8_1; ++i) {
1053 if (ovector16_1[i] >= 0)
1054 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1055 if (ovector16_2[i] >= 0)
1056 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1057 }
1058 }
1059
1060 for (i = 0; i < return_value8_1; ++i)
1061 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1062 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1063 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1064 total, current->pattern, current->input);
1065 is_successful = 0;
1066 }
1067 }
1068 } else {
1069 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1070 /* Only the 8 bit and 16 bit results must be equal. */
1071 #ifdef SUPPORT_PCRE8
1072 if (return_value8_1 != return_value8_2) {
1073 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1074 return_value8_1, return_value8_2, total, current->pattern, current->input);
1075 is_successful = 0;
1076 } else if (return_value8_1 >= 0) {
1077 return_value8_1 *= 2;
1078 for (i = 0; i < return_value8_1; ++i)
1079 if (ovector8_1[i] != ovector8_2[i]) {
1080 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1081 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1082 is_successful = 0;
1083 }
1084 }
1085 #endif
1086
1087 #ifdef SUPPORT_PCRE16
1088 if (return_value16_1 != return_value16_2) {
1089 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1090 return_value16_1, return_value16_2, total, current->pattern, current->input);
1091 is_successful = 0;
1092 } else if (return_value16_1 >= 0) {
1093 return_value16_1 *= 2;
1094 for (i = 0; i < return_value16_1; ++i)
1095 if (ovector16_1[i] != ovector16_2[i]) {
1096 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1097 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1098 is_successful = 0;
1099 }
1100 }
1101 #endif
1102
1103 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1104 }
1105 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1106 }
1107
1108 if (is_successful) {
1109 #ifdef SUPPORT_PCRE8
1110 if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1111 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1112 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1113 total, current->pattern, current->input);
1114 is_successful = 0;
1115 }
1116
1117 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1118 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1119 total, current->pattern, current->input);
1120 is_successful = 0;
1121 }
1122 }
1123 #endif
1124 #ifdef SUPPORT_PCRE16
1125 if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1126 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1127 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1128 total, current->pattern, current->input);
1129 is_successful = 0;
1130 }
1131
1132 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1133 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1134 total, current->pattern, current->input);
1135 is_successful = 0;
1136 }
1137 }
1138 #endif
1139 }
1140
1141 if (is_successful)
1142 successful++;
1143
1144 #ifdef SUPPORT_PCRE8
1145 if (re8) {
1146 pcre_free_study(extra8);
1147 pcre_free(re8);
1148 }
1149 #endif
1150 #ifdef SUPPORT_PCRE16
1151 if (re16) {
1152 pcre16_free_study(extra16);
1153 pcre16_free(re16);
1154 }
1155 #endif
1156
1157 /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1158 printf(".");
1159 fflush(stdout);
1160 current++;
1161 }
1162 tables(1);
1163 #ifdef SUPPORT_PCRE8
1164 setstack8(NULL);
1165 #endif
1166 #ifdef SUPPORT_PCRE16
1167 setstack16(NULL);
1168 #endif
1169
1170 if (total == successful) {
1171 printf("\nAll JIT regression tests are successfully passed.\n");
1172 return 0;
1173 } else {
1174 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1175 return 1;
1176 }
1177 }
1178
1179 /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12