/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 914 - (show annotations) (download)
Mon Feb 13 06:04:50 2012 UTC (2 years, 2 months ago) by zherczeg
File MIME type: text/plain
File size: 51353 byte(s)
Partial matching support is added to the JIT compiler
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65 \xc3\xa9 = 0xe9 = 233 (e')
66 \xc3\x89 = 0xc9 = 201 (E')
67 \xc3\xa1 = 0xe1 = 225 (a')
68 \xc3\x81 = 0xc1 = 193 (A')
69 \xc8\xba = 0x23a = 570
70 \xe2\xb1\xa5 = 0x2c65 = 11365
71 \xe1\xbd\xb8 = 0x1f78 = 8056
72 \xe1\xbf\xb8 = 0x1ff8 = 8184
73 \xf0\x90\x90\x80 = 0x10400 = 66560
74 \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76 \xcc\x8d = 0x30d = 781
77 Special:
78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 */
84
85 static int regression_tests(void);
86
87 int main(void)
88 {
89 int jit = 0;
90 #ifdef SUPPORT_PCRE8
91 pcre_config(PCRE_CONFIG_JIT, &jit);
92 #else
93 pcre16_config(PCRE_CONFIG_JIT, &jit);
94 #endif
95 if (!jit) {
96 printf("JIT must be enabled to run pcre_jit_test\n");
97 return 1;
98 }
99 return regression_tests();
100 }
101
102 /* --------------------------------------------------------------------------------------- */
103
104 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106 #endif
107
108 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115
116 #define OFFSET_MASK 0x00ffff
117 #define F_NO8 0x010000
118 #define F_NO16 0x020000
119 #define F_NOMATCH 0x040000
120 #define F_DIFF 0x080000
121 #define F_FORCECONV 0x100000
122 #define F_PROPERTY 0x200000
123
124 struct regression_test_case {
125 int flags;
126 int start_offset;
127 const char *pattern;
128 const char *input;
129 };
130
131 static struct regression_test_case regression_test_cases[] = {
132 /* Constant strings. */
133 { MUA, 0, "AbC", "AbAbC" },
134 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136 { MA, 0, "[^a]", "aAbB" },
137 { CMA, 0, "[^m]", "mMnN" },
138 { MA, 0, "a[^b][^#]", "abacd" },
139 { CMA, 0, "A[^B][^E]", "abacd" },
140 { CMUA, 0, "[^x][^#]", "XxBll" },
141 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147 { MUA, 0, "[axd]", "sAXd" },
148 { CMUA, 0, "[axd]", "sAXd" },
149 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159 { PCRE_CASELESS, 0, "a1", "Aa1" },
160 { MA, 0, "\\Ca", "cda" },
161 { CMA, 0, "\\Ca", "CDA" },
162 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168
169 /* Assertions. */
170 { MUA, 0, "\\b[^A]", "A_B#" },
171 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173 { MAP, 0, "\\B", "_\xa1" },
174 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182 { MA, 1 | F_NOMATCH, "^", "\n" },
183 { 0, 0, "^ab", "ab" },
184 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190 { 0, 0, "ab$", "ab" },
191 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
192 { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
193 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
194 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
196 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
197 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
198 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
199 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
200 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
201 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
202 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
203 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
204 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
205 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
206 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
207 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
208 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
212 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
213 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
214 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
215 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
217 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
218 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
219 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
220 { MA, 0, "\\Aa", "aaa" },
221 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
222 { MA, 1, "\\Ga", "aaa" },
223 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
224 { MA, 0, "a\\z", "aaa" },
225 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
226
227 /* Brackets. */
228 { MUA, 0, "(ab|bb|cd)", "bacde" },
229 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
230 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
231 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
232 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
233 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
234
235 /* Greedy and non-greedy ? operators. */
236 { MUA, 0, "(?:a)?a", "laab" },
237 { CMUA, 0, "(A)?A", "llaab" },
238 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
239 { MUA, 0, "(a)?a", "manm" },
240 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
241 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
242 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
243
244 /* Greedy and non-greedy + operators */
245 { MUA, 0, "(aa)+aa", "aaaaaaa" },
246 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
247 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
248 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
249 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
251 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
252
253 /* Greedy and non-greedy * operators */
254 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
255 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
256 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
257 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
258 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
259 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
260 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
261 { MA, 0, "((?:a|)*){0}a", "a" },
262
263 /* Combining ? + * operators */
264 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
265 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
267 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
268 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
269
270 /* Single character iterators. */
271 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
272 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
273 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
274 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
275 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
276 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
277 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
278 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
279 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
280 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
281 { MUA, 0, "(a?+[^b])+", "babaacacb" },
282 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
283 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
284 { CMUA, 0, "[c-f]+k", "DemmFke" },
285 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
286 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
287 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
288 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
289 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
290 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
291 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
292 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
293 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
294 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
295 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
296 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
297 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
298 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
299 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
300 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
302 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
303
304 /* Basic character sets. */
305 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
306 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
307 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
308 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
309 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
310 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
311
312 /* Unicode properties. */
313 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
314 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
315 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
316 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
317 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
318 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
320 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
321 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
323 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
324 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
325 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
326 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
327 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
328 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
329 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
330 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
331 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
332 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
333
334 /* Possible empty brackets. */
335 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
336 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
337 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
338 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
339 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
340 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
341 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
342 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
343 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
344 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
345
346 /* Start offset. */
347 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
348 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
350 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
351
352 /* Newline. */
353 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
355 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
356
357 /* Any character except newline or any newline. */
358 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
359 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
360 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
362 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
363 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
364 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
365 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
366 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
367 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
368 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
369 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
370 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
371 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
372 { MUA, 0, "\\R+", "ab\r\n\r" },
373 { MUA, 0, "\\R*", "ab\r\n\r" },
374 { MUA, 0, "\\R*", "\r\n\r" },
375 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
376 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
377 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
378 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
379 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
380 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
381 { MUA, 0, "\\R*\\R\\R", "\n\r" },
382 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
383 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
384
385 /* Atomic groups (no fallback from "next" direction). */
386 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
387 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
388 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
389 "bababcdedefgheijijklmlmnop" },
390 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
391 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
392 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
393 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
394 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
395 { MUA, 0, "(?>x|)*$", "aaa" },
396 { MUA, 0, "(?>(x)|)*$", "aaa" },
397 { MUA, 0, "(?>x|())*$", "aaa" },
398 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
399 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
400 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
401 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
402 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
403 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
404 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
405 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
406 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
407 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
408 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
409 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
410 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
411 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
412 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
413 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
415 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
416 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
417 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
418 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
419 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
420 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
421 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
422 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
423 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
424
425 /* Possessive quantifiers. */
426 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
427 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
428 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
429 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
430 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
431 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
432 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
433 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
434 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
435 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
436 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
437 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
438 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
439 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
440 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
441 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
442 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
443 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
444 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
445 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
446 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
447 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
448 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
449 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
450 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
451 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
452 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
453 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
454 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
455 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
456 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
457 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
458 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
459 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
461
462 /* Back references. */
463 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
464 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
465 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
466 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
467 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
468 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
470 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
471 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
472 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
473 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
474 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
475 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
477 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
478 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
479 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
480 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
481 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
482 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
483 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
484 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
485 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
486
487 /* Assertions. */
488 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
489 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
490 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
491 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
492 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
493 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
494 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
495 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
496 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
497 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
498 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
499 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
500 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
501 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
502 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
503 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
504 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
505 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
506 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
508 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
509 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
510 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
511 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
512
513 /* Not empty, ACCEPT, FAIL */
514 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
515 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
516 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
517 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
518 { MUA, 0, "a(*ACCEPT)b", "ab" },
519 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
520 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
521 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
522 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
523 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
524 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
525 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
526 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
527 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
528 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
529 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
530 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
531 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
532 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
533
534 /* Conditional blocks. */
535 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
536 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
537 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
538 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
539 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
540 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
541 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
543 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
545 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
547 { MUA, 0, "(?(?=a)ab)", "a" },
548 { MUA, 0, "(?(?<!b)c)", "b" },
549 { MUA, 0, "(?(DEFINE)a(b))", "a" },
550 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
551 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
552 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
553 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
554 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
555 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
556 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
557 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
558 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
559 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
560 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
561 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
562 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
563 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
564 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
565 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
566 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
567 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
568 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
569
570 /* Set start of match. */
571 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
572 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
573 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
574 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
575 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
576
577 /* First line. */
578 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
579 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
580 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
581 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
582 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
583 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
584 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
585 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
586 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
587 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
588 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
589 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
590 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
591 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
592 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
593 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
594 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
595
596 /* Recurse. */
597 { MUA, 0, "(a)(?1)", "aa" },
598 { MUA, 0, "((a))(?1)", "aa" },
599 { MUA, 0, "(b|a)(?1)", "aa" },
600 { MUA, 0, "(b|(a))(?1)", "aa" },
601 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
602 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
603 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
604 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
605 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
606 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
607 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
608 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
609 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
610 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
611 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
612 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
613 { MUA, 0, "b|<(?R)*>", "<<b>" },
614 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
615 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
616 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
617 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
618 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
619 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
620 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
621 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
622
623 /* 16 bit specific tests. */
624 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
625 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
626 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
627 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
628 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
629 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
630 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
631 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
632 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
633 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
634 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
635 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
636 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
637 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
638 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
639 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
640 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
642 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
643 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
645 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
646 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
647 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
648 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
649 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
650 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
651 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
652 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
653 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
654 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
655
656 /* Partial matching. */
657 { MUA | PCRE_PARTIAL_SOFT, 0, "ab", "a" },
658 { MUA | PCRE_PARTIAL_SOFT, 0, "ab|a", "a" },
659 { MUA | PCRE_PARTIAL_HARD, 0, "ab|a", "a" },
660 { MUA | PCRE_PARTIAL_SOFT, 0, "\\b#", "a" },
661 { MUA | PCRE_PARTIAL_SOFT, 0, "(?<=a)b", "a" },
662 { MUA | PCRE_PARTIAL_SOFT, 0, "abc|(?<=xxa)bc", "xxab" },
663 { MUA | PCRE_PARTIAL_SOFT, 0, "a\\B", "a" },
664 { MUA | PCRE_PARTIAL_HARD, 0, "a\\b", "a" },
665
666 /* Deep recursion. */
667 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
668 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
669 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
670
671 /* Deep recursion: Stack limit reached. */
672 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
673 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
674 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
675 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
676 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
677
678 { 0, 0, NULL, NULL }
679 };
680
681 static const unsigned char *tables(int mode)
682 {
683 /* The purpose of this function to allow valgrind
684 for reporting invalid reads and writes. */
685 static unsigned char *tables_copy;
686 const char *errorptr;
687 int erroroffset;
688 unsigned char *default_tables;
689 #ifdef SUPPORT_PCRE8
690 pcre *regex;
691 char null_str[1] = { 0 };
692 #else
693 pcre16 *regex;
694 PCRE_UCHAR16 null_str[1] = { 0 };
695 #endif
696
697 if (mode) {
698 if (tables_copy)
699 free(tables_copy);
700 tables_copy = NULL;
701 return NULL;
702 }
703
704 if (tables_copy)
705 return tables_copy;
706
707 default_tables = NULL;
708 #ifdef SUPPORT_PCRE8
709 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
710 if (regex) {
711 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
712 pcre_free(regex);
713 }
714 #else
715 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
716 if (regex) {
717 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
718 pcre16_free(regex);
719 }
720 #endif
721 /* Shouldn't ever happen. */
722 if (!default_tables)
723 return NULL;
724
725 /* Unfortunately this value cannot get from pcre_fullinfo.
726 Since this is a test program, this is acceptable at the moment. */
727 tables_copy = (unsigned char *)malloc(1088);
728 if (!tables_copy)
729 return NULL;
730
731 memcpy(tables_copy, default_tables, 1088);
732 return tables_copy;
733 }
734
735 #ifdef SUPPORT_PCRE8
736 static pcre_jit_stack* callback8(void *arg)
737 {
738 return (pcre_jit_stack *)arg;
739 }
740 #endif
741
742 #ifdef SUPPORT_PCRE16
743 static pcre16_jit_stack* callback16(void *arg)
744 {
745 return (pcre16_jit_stack *)arg;
746 }
747 #endif
748
749 #ifdef SUPPORT_PCRE8
750 static void setstack8(pcre_extra *extra)
751 {
752 static pcre_jit_stack *stack;
753
754 if (!extra) {
755 if (stack)
756 pcre_jit_stack_free(stack);
757 stack = NULL;
758 return;
759 }
760
761 if (!stack)
762 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
763 /* Extra can be NULL. */
764 pcre_assign_jit_stack(extra, callback8, stack);
765 }
766 #endif /* SUPPORT_PCRE8 */
767
768 #ifdef SUPPORT_PCRE16
769 static void setstack16(pcre16_extra *extra)
770 {
771 static pcre16_jit_stack *stack;
772
773 if (!extra) {
774 if (stack)
775 pcre16_jit_stack_free(stack);
776 stack = NULL;
777 return;
778 }
779
780 if (!stack)
781 stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
782 /* Extra can be NULL. */
783 pcre16_assign_jit_stack(extra, callback16, stack);
784 }
785 #endif /* SUPPORT_PCRE8 */
786
787 #ifdef SUPPORT_PCRE16
788
789 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
790 {
791 unsigned char *iptr = (unsigned char*)input;
792 unsigned short *optr = (unsigned short *)output;
793 unsigned int c;
794
795 if (max_length == 0)
796 return 0;
797
798 while (*iptr && max_length > 1) {
799 c = 0;
800 if (offsetmap)
801 *offsetmap++ = (int)(iptr - (unsigned char*)input);
802
803 if (!(*iptr & 0x80))
804 c = *iptr++;
805 else if (!(*iptr & 0x20)) {
806 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
807 iptr += 2;
808 } else if (!(*iptr & 0x10)) {
809 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
810 iptr += 3;
811 } else if (!(*iptr & 0x08)) {
812 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
813 iptr += 4;
814 }
815
816 if (c < 65536) {
817 *optr++ = c;
818 max_length--;
819 } else if (max_length <= 2) {
820 *optr = '\0';
821 return (int)(optr - (unsigned short *)output);
822 } else {
823 c -= 0x10000;
824 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
825 *optr++ = 0xdc00 | (c & 0x3ff);
826 max_length -= 2;
827 if (offsetmap)
828 offsetmap++;
829 }
830 }
831 if (offsetmap)
832 *offsetmap = (int)(iptr - (unsigned char*)input);
833 *optr = '\0';
834 return (int)(optr - (unsigned short *)output);
835 }
836
837 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
838 {
839 unsigned char *iptr = (unsigned char*)input;
840 unsigned short *optr = (unsigned short *)output;
841
842 if (max_length == 0)
843 return 0;
844
845 while (*iptr && max_length > 1) {
846 *optr++ = *iptr++;
847 max_length--;
848 }
849 *optr = '\0';
850 return (int)(optr - (unsigned short *)output);
851 }
852
853 #define REGTEST_MAX_LENGTH 4096
854 static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
855 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
856
857 #endif /* SUPPORT_PCRE16 */
858
859 static int check_ascii(const char *input)
860 {
861 const unsigned char *ptr = (unsigned char *)input;
862 while (*ptr) {
863 if (*ptr > 127)
864 return 0;
865 ptr++;
866 }
867 return 1;
868 }
869
870 static int regression_tests(void)
871 {
872 struct regression_test_case *current = regression_test_cases;
873 const char *error;
874 char *cpu_info;
875 int i, err_offs;
876 int is_successful, is_ascii_pattern, is_ascii_input;
877 int total = 0;
878 int successful = 0;
879 int counter = 0;
880 int study_mode;
881 #ifdef SUPPORT_PCRE8
882 pcre *re8;
883 pcre_extra *extra8;
884 int ovector8_1[32];
885 int ovector8_2[32];
886 int return_value8_1, return_value8_2;
887 int utf8 = 0, ucp8 = 0;
888 int disabled_flags8 = 0;
889 #endif
890 #ifdef SUPPORT_PCRE16
891 pcre16 *re16;
892 pcre16_extra *extra16;
893 int ovector16_1[32];
894 int ovector16_2[32];
895 int return_value16_1, return_value16_2;
896 int utf16 = 0, ucp16 = 0;
897 int disabled_flags16 = 0;
898 int length16;
899 #endif
900
901 /* This test compares the behaviour of interpreter and JIT. Although disabling
902 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
903 still considered successful from pcre_jit_test point of view. */
904
905 #ifdef SUPPORT_PCRE8
906 pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
907 #else
908 pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
909 #endif
910
911 printf("Running JIT regression tests\n");
912 printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
913
914 #ifdef SUPPORT_PCRE8
915 pcre_config(PCRE_CONFIG_UTF8, &utf8);
916 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
917 if (!utf8)
918 disabled_flags8 |= PCRE_UTF8;
919 if (!ucp8)
920 disabled_flags8 |= PCRE_UCP;
921 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
922 #endif
923 #ifdef SUPPORT_PCRE16
924 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
925 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
926 if (!utf16)
927 disabled_flags16 |= PCRE_UTF8;
928 if (!ucp16)
929 disabled_flags16 |= PCRE_UCP;
930 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
931 #endif
932
933 while (current->pattern) {
934 /* printf("\nPattern: %s :\n", current->pattern); */
935 total++;
936 if (current->start_offset & F_PROPERTY) {
937 is_ascii_pattern = 0;
938 is_ascii_input = 0;
939 } else {
940 is_ascii_pattern = check_ascii(current->pattern);
941 is_ascii_input = check_ascii(current->input);
942 }
943
944 if (current->flags & PCRE_PARTIAL_SOFT)
945 study_mode = PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE;
946 else if (current->flags & PCRE_PARTIAL_HARD)
947 study_mode = PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
948 else
949 study_mode = PCRE_STUDY_JIT_COMPILE;
950 error = NULL;
951 #ifdef SUPPORT_PCRE8
952 re8 = NULL;
953 if (!(current->start_offset & F_NO8))
954 re8 = pcre_compile(current->pattern,
955 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags8),
956 &error, &err_offs, tables(0));
957
958 extra8 = NULL;
959 if (re8) {
960 error = NULL;
961 extra8 = pcre_study(re8, study_mode, &error);
962 if (!extra8) {
963 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
964 pcre_free(re8);
965 re8 = NULL;
966 }
967 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
968 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
969 pcre_free_study(extra8);
970 pcre_free(re8);
971 re8 = NULL;
972 }
973 } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
974 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
975 #endif
976 #ifdef SUPPORT_PCRE16
977 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
978 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
979 else
980 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
981
982 re16 = NULL;
983 if (!(current->start_offset & F_NO16))
984 re16 = pcre16_compile(regtest_buf,
985 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD | disabled_flags16),
986 &error, &err_offs, tables(0));
987
988 extra16 = NULL;
989 if (re16) {
990 error = NULL;
991 extra16 = pcre16_study(re16, study_mode, &error);
992 if (!extra16) {
993 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
994 pcre16_free(re16);
995 re16 = NULL;
996 }
997 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
998 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
999 pcre16_free_study(extra16);
1000 pcre16_free(re16);
1001 re16 = NULL;
1002 }
1003 } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
1004 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
1005 #endif
1006
1007 counter++;
1008 if ((counter & 0x3) != 0) {
1009 #ifdef SUPPORT_PCRE8
1010 setstack8(NULL);
1011 #endif
1012 #ifdef SUPPORT_PCRE16
1013 setstack16(NULL);
1014 #endif
1015 }
1016
1017 #ifdef SUPPORT_PCRE8
1018 return_value8_1 = -1000;
1019 return_value8_2 = -1000;
1020 for (i = 0; i < 32; ++i)
1021 ovector8_1[i] = -2;
1022 for (i = 0; i < 32; ++i)
1023 ovector8_2[i] = -2;
1024 if (re8) {
1025 setstack8(extra8);
1026 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1027 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_1, 32);
1028 return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1029 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector8_2, 32);
1030 }
1031 #endif
1032
1033 #ifdef SUPPORT_PCRE16
1034 return_value16_1 = -1000;
1035 return_value16_2 = -1000;
1036 for (i = 0; i < 32; ++i)
1037 ovector16_1[i] = -2;
1038 for (i = 0; i < 32; ++i)
1039 ovector16_2[i] = -2;
1040 if (re16) {
1041 setstack16(extra16);
1042 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1043 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1044 else
1045 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1046 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1047 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_1, 32);
1048 return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1049 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | PCRE_PARTIAL_SOFT | PCRE_PARTIAL_HARD), ovector16_2, 32);
1050 }
1051 #endif
1052
1053 /* printf("[%d-%d|%d-%d|%d-%d]%s", return_value8_1, return_value16_1, ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1054
1055 /* If F_DIFF is set, just run the test, but do not compare the results.
1056 Segfaults can still be captured. */
1057
1058 is_successful = 1;
1059 if (!(current->start_offset & F_DIFF)) {
1060 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1061 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1062 /* All results must be the same. */
1063 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1064 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1065 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1066 total, current->pattern, current->input);
1067 is_successful = 0;
1068 } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) {
1069 if (return_value8_1 == PCRE_ERROR_PARTIAL) {
1070 return_value8_1 = 2;
1071 return_value16_1 = 2;
1072 } else {
1073 return_value8_1 *= 2;
1074 return_value16_1 *= 2;
1075 }
1076
1077 /* Transform back the results. */
1078 if (current->flags & PCRE_UTF8) {
1079 for (i = 0; i < return_value8_1; ++i) {
1080 if (ovector16_1[i] >= 0)
1081 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1082 if (ovector16_2[i] >= 0)
1083 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1084 }
1085 }
1086
1087 for (i = 0; i < return_value8_1; ++i)
1088 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1089 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1090 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1091 total, current->pattern, current->input);
1092 is_successful = 0;
1093 }
1094 }
1095 } else {
1096 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1097 /* Only the 8 bit and 16 bit results must be equal. */
1098 #ifdef SUPPORT_PCRE8
1099 if (return_value8_1 != return_value8_2) {
1100 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1101 return_value8_1, return_value8_2, total, current->pattern, current->input);
1102 is_successful = 0;
1103 } else if (return_value8_1 >= 0 || return_value8_1 == PCRE_ERROR_PARTIAL) {
1104 if (return_value8_1 == PCRE_ERROR_PARTIAL)
1105 return_value8_1 = 2;
1106 else
1107 return_value8_1 *= 2;
1108
1109 for (i = 0; i < return_value8_1; ++i)
1110 if (ovector8_1[i] != ovector8_2[i]) {
1111 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1112 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1113 is_successful = 0;
1114 }
1115 }
1116 #endif
1117
1118 #ifdef SUPPORT_PCRE16
1119 if (return_value16_1 != return_value16_2) {
1120 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1121 return_value16_1, return_value16_2, total, current->pattern, current->input);
1122 is_successful = 0;
1123 } else if (return_value16_1 >= 0 || return_value16_1 == PCRE_ERROR_PARTIAL) {
1124 if (return_value16_1 == PCRE_ERROR_PARTIAL)
1125 return_value16_1 = 2;
1126 else
1127 return_value16_1 *= 2;
1128
1129 for (i = 0; i < return_value16_1; ++i)
1130 if (ovector16_1[i] != ovector16_2[i]) {
1131 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1132 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1133 is_successful = 0;
1134 }
1135 }
1136 #endif
1137
1138 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1139 }
1140 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1141 }
1142
1143 if (is_successful) {
1144 #ifdef SUPPORT_PCRE8
1145 if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1146 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1147 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1148 total, current->pattern, current->input);
1149 is_successful = 0;
1150 }
1151
1152 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1153 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1154 total, current->pattern, current->input);
1155 is_successful = 0;
1156 }
1157 }
1158 #endif
1159 #ifdef SUPPORT_PCRE16
1160 if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1161 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1162 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1163 total, current->pattern, current->input);
1164 is_successful = 0;
1165 }
1166
1167 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1168 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1169 total, current->pattern, current->input);
1170 is_successful = 0;
1171 }
1172 }
1173 #endif
1174 }
1175
1176 if (is_successful)
1177 successful++;
1178
1179 #ifdef SUPPORT_PCRE8
1180 if (re8) {
1181 pcre_free_study(extra8);
1182 pcre_free(re8);
1183 }
1184 #endif
1185 #ifdef SUPPORT_PCRE16
1186 if (re16) {
1187 pcre16_free_study(extra16);
1188 pcre16_free(re16);
1189 }
1190 #endif
1191
1192 printf(".");
1193 fflush(stdout);
1194 current++;
1195 }
1196 tables(1);
1197 #ifdef SUPPORT_PCRE8
1198 setstack8(NULL);
1199 #endif
1200 #ifdef SUPPORT_PCRE16
1201 setstack16(NULL);
1202 #endif
1203
1204 if (total == successful) {
1205 printf("\nAll JIT regression tests are successfully passed.\n");
1206 return 0;
1207 } else {
1208 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1209 return 1;
1210 }
1211 }
1212
1213 /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12