/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 852 - (show annotations) (download)
Thu Jan 5 19:18:12 2012 UTC (2 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 49641 byte(s)
Add pcre16 prefix to 16 bit structs
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65 \xc3\xa9 = 0xe9 = 233 (e')
66 \xc3\x89 = 0xc9 = 201 (E')
67 \xc3\xa1 = 0xe1 = 225 (a')
68 \xc3\x81 = 0xc1 = 193 (A')
69 \xc8\xba = 0x23a = 570
70 \xe2\xb1\xa5 = 0x2c65 = 11365
71 \xe1\xbd\xb8 = 0x1f78 = 8056
72 \xe1\xbf\xb8 = 0x1ff8 = 8184
73 \xf0\x90\x90\x80 = 0x10400 = 66560
74 \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76 \xcc\x8d = 0x30d = 781
77 Special:
78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 */
84
85 static int regression_tests(void);
86
87 int main(void)
88 {
89 int jit = 0;
90 #ifdef SUPPORT_PCRE8
91 pcre_config(PCRE_CONFIG_JIT, &jit);
92 #else
93 pcre16_config(PCRE_CONFIG_JIT, &jit);
94 #endif
95 if (!jit) {
96 printf("JIT must be enabled to run pcre_jit_test\n");
97 return 1;
98 }
99 return regression_tests();
100 }
101
102 /* --------------------------------------------------------------------------------------- */
103
104 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106 #endif
107
108 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115
116 #define OFFSET_MASK 0x00ffff
117 #define F_NO8 0x010000
118 #define F_NO16 0x020000
119 #define F_NOMATCH 0x040000
120 #define F_DIFF 0x080000
121 #define F_FORCECONV 0x100000
122 #define F_PROPERTY 0x200000
123
124 struct regression_test_case {
125 int flags;
126 int start_offset;
127 const char *pattern;
128 const char *input;
129 };
130
131 static struct regression_test_case regression_test_cases[] = {
132 /* Constant strings. */
133 { MUA, 0, "AbC", "AbAbC" },
134 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136 { MA, 0, "[^a]", "aAbB" },
137 { CMA, 0, "[^m]", "mMnN" },
138 { MA, 0, "a[^b][^#]", "abacd" },
139 { CMA, 0, "A[^B][^E]", "abacd" },
140 { CMUA, 0, "[^x][^#]", "XxBll" },
141 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147 { MUA, 0, "[axd]", "sAXd" },
148 { CMUA, 0, "[axd]", "sAXd" },
149 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159 { PCRE_CASELESS, 0, "a1", "Aa1" },
160 { MA, 0, "\\Ca", "cda" },
161 { CMA, 0, "\\Ca", "CDA" },
162 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168
169 /* Assertions. */
170 { MUA, 0, "\\b[^A]", "A_B#" },
171 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173 { MAP, 0, "\\B", "_\xa1" },
174 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182 { MA, 1 | F_NOMATCH, "^", "\n" },
183 { 0, 0, "^ab", "ab" },
184 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190 { 0, 0, "ab$", "ab" },
191 { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
194 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
196 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
197 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
198 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
199 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
200 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
201 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
202 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
203 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
204 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
205 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
206 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
207 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
208 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
212 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
213 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
214 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
215 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
217 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
218 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
219 { MA, 0, "\\Aa", "aaa" },
220 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
221 { MA, 1, "\\Ga", "aaa" },
222 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
223 { MA, 0, "a\\z", "aaa" },
224 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
225
226 /* Brackets. */
227 { MUA, 0, "(ab|bb|cd)", "bacde" },
228 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
229 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
230 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
231 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
232 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
233
234 /* Greedy and non-greedy ? operators. */
235 { MUA, 0, "(?:a)?a", "laab" },
236 { CMUA, 0, "(A)?A", "llaab" },
237 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
238 { MUA, 0, "(a)?a", "manm" },
239 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
240 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
241 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
242
243 /* Greedy and non-greedy + operators */
244 { MUA, 0, "(aa)+aa", "aaaaaaa" },
245 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
246 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
247 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
248 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
249 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
251
252 /* Greedy and non-greedy * operators */
253 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
254 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
255 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
256 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
257 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
258 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
259 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
260 { MA, 0, "((?:a|)*){0}a", "a" },
261
262 /* Combining ? + * operators */
263 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
264 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
265 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
267 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
268
269 /* Single character iterators. */
270 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
271 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
272 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
273 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
274 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
275 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
276 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
277 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
278 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
279 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
280 { MUA, 0, "(a?+[^b])+", "babaacacb" },
281 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
282 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
283 { CMUA, 0, "[c-f]+k", "DemmFke" },
284 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
285 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
286 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
287 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
288 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
289 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
290 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
291 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
292 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
293 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
294 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
295 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
296 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
297 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
298 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
299 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
300 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
302
303 /* Basic character sets. */
304 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
305 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
306 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
307 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
308 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
309 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
310
311 /* Unicode properties. */
312 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
313 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
314 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
315 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
316 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
317 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
318 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
320 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
321 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
323 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
324 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
325 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
326 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
327 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
328 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
329 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
330 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
331 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
332
333 /* Possible empty brackets. */
334 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
335 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
336 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
337 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
338 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
339 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
340 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
341 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
342 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
343 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
344
345 /* Start offset. */
346 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
347 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
348 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
350
351 /* Newline. */
352 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
353 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
355
356 /* Any character except newline or any newline. */
357 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
358 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
359 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
360 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
362 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
363 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
364 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
365 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
366 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
367 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
368 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
369 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
370 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
371 { MUA, 0, "\\R+", "ab\r\n\r" },
372 { MUA, 0, "\\R*", "ab\r\n\r" },
373 { MUA, 0, "\\R*", "\r\n\r" },
374 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
375 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
376 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
377 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
378 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
379 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
380 { MUA, 0, "\\R*\\R\\R", "\n\r" },
381 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
382 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
383
384 /* Atomic groups (no fallback from "next" direction). */
385 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
386 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
387 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
388 "bababcdedefgheijijklmlmnop" },
389 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
390 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
391 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
392 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
393 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
394 { MUA, 0, "(?>x|)*$", "aaa" },
395 { MUA, 0, "(?>(x)|)*$", "aaa" },
396 { MUA, 0, "(?>x|())*$", "aaa" },
397 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
398 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
399 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
400 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
401 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
402 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
403 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
404 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
405 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
406 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
407 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
408 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
409 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
410 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
411 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
412 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
413 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
415 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
416 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
417 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
418 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
419 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
420 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
421 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
422 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
423
424 /* Possessive quantifiers. */
425 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
426 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
427 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
428 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
429 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
430 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
431 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
432 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
433 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
434 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
435 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
436 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
437 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
438 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
439 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
440 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
441 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
442 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
443 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
444 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
445 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
446 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
447 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
448 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
449 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
450 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
451 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
452 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
453 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
454 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
455 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
456 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
457 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
458 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
459 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460
461 /* Back references. */
462 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
463 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
464 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
465 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
466 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
467 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
468 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
470 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
471 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
472 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
473 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
474 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
475 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
477 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
478 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
479 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
480 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
481 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
482 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
483 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
484 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
485
486 /* Assertions. */
487 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
488 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
489 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
490 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
491 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
492 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
493 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
494 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
495 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
496 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
497 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
498 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
499 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
500 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
501 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
502 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
503 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
504 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
505 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
506 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
508 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
509 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
510 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
511
512 /* Not empty, ACCEPT, FAIL */
513 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
514 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
515 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
516 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
517 { MUA, 0, "a(*ACCEPT)b", "ab" },
518 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
519 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
520 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
521 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
522 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
523 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
524 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
525 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
526 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
527 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
528 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
529 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
530 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
531 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
532
533 /* Conditional blocks. */
534 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
535 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
536 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
537 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
538 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
539 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
540 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
541 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
543 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
545 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?=a)ab)", "a" },
547 { MUA, 0, "(?(?<!b)c)", "b" },
548 { MUA, 0, "(?(DEFINE)a(b))", "a" },
549 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
550 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
551 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
552 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
553 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
554 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
555 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
556 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
557 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
558 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
559 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
560 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
561 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
562 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
563 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
564 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
565 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
566 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
567 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
568
569 /* Set start of match. */
570 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
571 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
572 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
573 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
574 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
575
576 /* First line. */
577 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
578 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
579 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
580 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
581 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
582 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
583 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
584 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
585 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
586 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
587 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
588 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
589 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
590 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
591 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
592 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
593 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
594
595 /* Recurse. */
596 { MUA, 0, "(a)(?1)", "aa" },
597 { MUA, 0, "((a))(?1)", "aa" },
598 { MUA, 0, "(b|a)(?1)", "aa" },
599 { MUA, 0, "(b|(a))(?1)", "aa" },
600 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
601 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
602 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
603 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
604 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
605 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
606 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
607 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
608 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
609 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
610 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
611 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
612 { MUA, 0, "b|<(?R)*>", "<<b>" },
613 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
614 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
615 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
616 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
617 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
618 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
619 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
620 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
621
622 /* 16 bit specific tests. */
623 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
624 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
625 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
626 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
627 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
628 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
629 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
630 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
631 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
632 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
633 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
634 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
635 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
636 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
637 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
638 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
639 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
640 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
642 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
643 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
645 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
646 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
647 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
648 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
649 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
650 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
651 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
652 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
653 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
654
655 /* Deep recursion. */
656 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
657 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
658 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
659
660 /* Deep recursion: Stack limit reached. */
661 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
662 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
663 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
664 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
665 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
666
667 { 0, 0, NULL, NULL }
668 };
669
670 static const unsigned char *tables(int mode)
671 {
672 /* The purpose of this function to allow valgrind
673 for reporting invalid reads and writes. */
674 static unsigned char *tables_copy;
675 const char *errorptr;
676 int erroroffset;
677 const unsigned char *default_tables;
678 #ifdef SUPPORT_PCRE8
679 pcre *regex;
680 char null_str[1] = { 0 };
681 #else
682 pcre16 *regex;
683 PCRE_SCHAR16 null_str[1] = { 0 };
684 #endif
685
686 if (mode) {
687 if (tables_copy)
688 free(tables_copy);
689 tables_copy = NULL;
690 return NULL;
691 }
692
693 if (tables_copy)
694 return tables_copy;
695
696 default_tables = NULL;
697 #ifdef SUPPORT_PCRE8
698 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
699 if (regex) {
700 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
701 pcre_free(regex);
702 }
703 #else
704 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
705 if (regex) {
706 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
707 pcre16_free(regex);
708 }
709 #endif
710 /* Shouldn't ever happen. */
711 if (!default_tables)
712 return NULL;
713
714 /* Unfortunately this value cannot get from pcre_fullinfo.
715 Since this is a test program, this is acceptable at the moment. */
716 tables_copy = (unsigned char *)malloc(1088);
717 if (!tables_copy)
718 return NULL;
719
720 memcpy(tables_copy, default_tables, 1088);
721 return tables_copy;
722 }
723
724 static pcre_jit_stack* callback8(void *arg)
725 {
726 return (pcre_jit_stack *)arg;
727 }
728
729 static pcre16_jit_stack* callback16(void *arg)
730 {
731 return (pcre16_jit_stack *)arg;
732 }
733
734 #ifdef SUPPORT_PCRE8
735 static void setstack8(pcre_extra *extra)
736 {
737 static pcre_jit_stack *stack;
738
739 if (!extra) {
740 if (stack)
741 pcre_jit_stack_free(stack);
742 stack = NULL;
743 return;
744 }
745
746 if (!stack)
747 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
748 /* Extra can be NULL. */
749 pcre_assign_jit_stack(extra, callback8, stack);
750 }
751 #endif /* SUPPORT_PCRE8 */
752
753 #ifdef SUPPORT_PCRE16
754 static void setstack16(pcre16_extra *extra)
755 {
756 static pcre16_jit_stack *stack;
757
758 if (!extra) {
759 if (stack)
760 pcre16_jit_stack_free(stack);
761 stack = NULL;
762 return;
763 }
764
765 if (!stack)
766 stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
767 /* Extra can be NULL. */
768 pcre16_assign_jit_stack(extra, callback16, stack);
769 }
770 #endif /* SUPPORT_PCRE8 */
771
772 #ifdef SUPPORT_PCRE16
773
774 static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length)
775 {
776 unsigned char *iptr = (unsigned char*)input;
777 unsigned short *optr = (unsigned short *)output;
778 unsigned int c;
779
780 if (max_length == 0)
781 return 0;
782
783 while (*iptr && max_length > 1) {
784 c = 0;
785 if (offsetmap)
786 *offsetmap++ = (int)(iptr - (unsigned char*)input);
787
788 if (!(*iptr & 0x80))
789 c = *iptr++;
790 else if (!(*iptr & 0x20)) {
791 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
792 iptr += 2;
793 } else if (!(*iptr & 0x10)) {
794 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
795 iptr += 3;
796 } else if (!(*iptr & 0x08)) {
797 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
798 iptr += 4;
799 }
800
801 if (c < 65536) {
802 *optr++ = c;
803 max_length--;
804 } else if (max_length <= 2) {
805 *optr = '\0';
806 return (int)(optr - (unsigned short *)output);
807 } else {
808 c -= 0x10000;
809 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
810 *optr++ = 0xdc00 | (c & 0x3ff);
811 max_length -= 2;
812 if (offsetmap)
813 offsetmap++;
814 }
815 }
816 if (offsetmap)
817 *offsetmap = (int)(iptr - (unsigned char*)input);
818 *optr = '\0';
819 return (int)(optr - (unsigned short *)output);
820 }
821
822 static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length)
823 {
824 unsigned char *iptr = (unsigned char*)input;
825 unsigned short *optr = (unsigned short *)output;
826
827 if (max_length == 0)
828 return 0;
829
830 while (*iptr && max_length > 1) {
831 *optr++ = *iptr++;
832 max_length--;
833 }
834 *optr = '\0';
835 return (int)(optr - (unsigned short *)output);
836 }
837
838 #define REGTEST_MAX_LENGTH 4096
839 static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
840 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
841
842 #endif /* SUPPORT_PCRE16 */
843
844 static int check_ascii(const char *input)
845 {
846 const unsigned char *ptr = (unsigned char *)input;
847 while (*ptr) {
848 if (*ptr > 127)
849 return 0;
850 ptr++;
851 }
852 return 1;
853 }
854
855 static int regression_tests(void)
856 {
857 struct regression_test_case *current = regression_test_cases;
858 const char *error;
859 int i, err_offs;
860 int is_successful, is_ascii_pattern, is_ascii_input;
861 int total = 0;
862 int successful = 0;
863 int counter = 0;
864 #ifdef SUPPORT_PCRE8
865 pcre *re8;
866 pcre_extra *extra8;
867 int ovector8_1[32];
868 int ovector8_2[32];
869 int return_value8_1, return_value8_2;
870 int utf8 = 0, ucp8 = 0;
871 int disabled_flags8 = 0;
872 #endif
873 #ifdef SUPPORT_PCRE16
874 pcre16 *re16;
875 pcre16_extra *extra16;
876 int ovector16_1[32];
877 int ovector16_2[32];
878 int return_value16_1, return_value16_2;
879 int utf16 = 0, ucp16 = 0;
880 int disabled_flags16 = 0;
881 int length16;
882 #endif
883
884 /* This test compares the behaviour of interpreter and JIT. Although disabling
885 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
886 still considered successful from pcre_jit_test point of view. */
887
888 printf("Running JIT regression\n");
889
890 #ifdef SUPPORT_PCRE8
891 pcre_config(PCRE_CONFIG_UTF8, &utf8);
892 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
893 if (!utf8)
894 disabled_flags8 |= PCRE_UTF8;
895 if (!ucp8)
896 disabled_flags8 |= PCRE_UCP;
897 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
898 #endif
899 #ifdef SUPPORT_PCRE16
900 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
901 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
902 if (!utf16)
903 disabled_flags16 |= PCRE_UTF8;
904 if (!ucp16)
905 disabled_flags16 |= PCRE_UCP;
906 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
907 #endif
908
909 while (current->pattern) {
910 /* printf("\nPattern: %s :\n", current->pattern); */
911 total++;
912 if (current->start_offset & F_PROPERTY) {
913 is_ascii_pattern = 0;
914 is_ascii_input = 0;
915 } else {
916 is_ascii_pattern = check_ascii(current->pattern);
917 is_ascii_input = check_ascii(current->input);
918 }
919
920 error = NULL;
921 #ifdef SUPPORT_PCRE8
922 re8 = NULL;
923 if (!(current->start_offset & F_NO8))
924 re8 = pcre_compile(current->pattern,
925 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
926 &error, &err_offs, tables(0));
927
928 extra8 = NULL;
929 if (re8) {
930 error = NULL;
931 extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
932 if (!extra8) {
933 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
934 pcre_free(re8);
935 re8 = NULL;
936 }
937 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
938 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
939 pcre_free_study(extra8);
940 pcre_free(re8);
941 re8 = NULL;
942 }
943 } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
944 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
945 #endif
946 #ifdef SUPPORT_PCRE16
947 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
948 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
949 else
950 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
951
952 re16 = NULL;
953 if (!(current->start_offset & F_NO16))
954 re16 = pcre16_compile(regtest_buf,
955 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
956 &error, &err_offs, tables(0));
957
958 extra16 = NULL;
959 if (re16) {
960 error = NULL;
961 extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
962 if (!extra16) {
963 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
964 pcre16_free(re16);
965 re16 = NULL;
966 }
967 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
968 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
969 pcre16_free_study(extra16);
970 pcre16_free(re16);
971 re16 = NULL;
972 }
973 } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
974 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
975 #endif
976
977 counter++;
978 if ((counter & 0x3) != 0) {
979 #ifdef SUPPORT_PCRE8
980 setstack8(NULL);
981 #endif
982 #ifdef SUPPORT_PCRE16
983 setstack16(NULL);
984 #endif
985 }
986
987 #ifdef SUPPORT_PCRE8
988 return_value8_1 = -1000;
989 return_value8_2 = -1000;
990 for (i = 0; i < 32; ++i)
991 ovector8_1[i] = -2;
992 for (i = 0; i < 32; ++i)
993 ovector8_2[i] = -2;
994 if (re8) {
995 setstack8(extra8);
996 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
997 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
998 return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
999 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
1000 }
1001 #endif
1002
1003 #ifdef SUPPORT_PCRE16
1004 return_value16_1 = -1000;
1005 return_value16_2 = -1000;
1006 for (i = 0; i < 32; ++i)
1007 ovector16_1[i] = -2;
1008 for (i = 0; i < 32; ++i)
1009 ovector16_2[i] = -2;
1010 if (re16) {
1011 setstack16(extra16);
1012 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1013 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1014 else
1015 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1016 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1017 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
1018 return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1019 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
1020 }
1021 #endif
1022
1023 /* If F_DIFF is set, just run the test, but do not compare the results.
1024 Segfaults can still be captured. */
1025
1026 is_successful = 1;
1027 if (!(current->start_offset & F_DIFF)) {
1028 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1029 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1030 /* All results must be the same. */
1031 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1032 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1033 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1034 total, current->pattern, current->input);
1035 is_successful = 0;
1036 } else if (return_value8_1 >= 0) {
1037 return_value8_1 *= 2;
1038 /* Transform back the results. */
1039 if (current->flags & PCRE_UTF8) {
1040 for (i = 0; i < return_value8_1; ++i) {
1041 if (ovector16_1[i] >= 0)
1042 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1043 if (ovector16_2[i] >= 0)
1044 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1045 }
1046 }
1047
1048 for (i = 0; i < return_value8_1; ++i)
1049 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1050 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1051 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1052 total, current->pattern, current->input);
1053 is_successful = 0;
1054 }
1055 }
1056 } else {
1057 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1058 /* Only the 8 bit and 16 bit results must be equal. */
1059 #ifdef SUPPORT_PCRE8
1060 if (return_value8_1 != return_value8_2) {
1061 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1062 return_value8_1, return_value8_2, total, current->pattern, current->input);
1063 is_successful = 0;
1064 } else if (return_value8_1 >= 0) {
1065 return_value8_1 *= 2;
1066 for (i = 0; i < return_value8_1; ++i)
1067 if (ovector8_1[i] != ovector8_2[i]) {
1068 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1069 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1070 is_successful = 0;
1071 }
1072 }
1073 #endif
1074
1075 #ifdef SUPPORT_PCRE16
1076 if (return_value16_1 != return_value16_2) {
1077 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1078 return_value16_1, return_value16_2, total, current->pattern, current->input);
1079 is_successful = 0;
1080 } else if (return_value16_1 >= 0) {
1081 return_value16_1 *= 2;
1082 for (i = 0; i < return_value16_1; ++i)
1083 if (ovector16_1[i] != ovector16_2[i]) {
1084 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1085 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1086 is_successful = 0;
1087 }
1088 }
1089 #endif
1090
1091 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1092 }
1093 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1094 }
1095
1096 if (is_successful) {
1097 #ifdef SUPPORT_PCRE8
1098 if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1099 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1100 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1101 total, current->pattern, current->input);
1102 is_successful = 0;
1103 }
1104
1105 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1106 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1107 total, current->pattern, current->input);
1108 is_successful = 0;
1109 }
1110 }
1111 #endif
1112 #ifdef SUPPORT_PCRE16
1113 if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1114 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1115 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1116 total, current->pattern, current->input);
1117 is_successful = 0;
1118 }
1119
1120 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1121 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1122 total, current->pattern, current->input);
1123 is_successful = 0;
1124 }
1125 }
1126 #endif
1127 }
1128
1129 if (is_successful)
1130 successful++;
1131
1132 #ifdef SUPPORT_PCRE8
1133 if (re8) {
1134 pcre_free_study(extra8);
1135 pcre_free(re8);
1136 }
1137 #endif
1138 #ifdef SUPPORT_PCRE16
1139 if (re16) {
1140 pcre16_free_study(extra16);
1141 pcre16_free(re16);
1142 }
1143 #endif
1144
1145 /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1146 printf(".");
1147 fflush(stdout);
1148 current++;
1149 }
1150 tables(1);
1151 #ifdef SUPPORT_PCRE8
1152 setstack8(NULL);
1153 #endif
1154 #ifdef SUPPORT_PCRE16
1155 setstack16(NULL);
1156 #endif
1157
1158 if (total == successful) {
1159 printf("\nAll JIT regression tests are successfully passed.\n");
1160 return 0;
1161 } else {
1162 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1163 return 1;
1164 }
1165 }
1166
1167 /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12