/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 881 - (show annotations) (download)
Sun Jan 15 18:07:05 2012 UTC (2 years, 8 months ago) by ph10
File MIME type: text/plain
File size: 49698 byte(s)
Get rid of a number of -Wunused-but-set-variable compiler warnings.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Main Library written by Philip Hazel
9 Copyright (c) 1997-2012 University of Cambridge
10
11 This JIT compiler regression test program was written by Zoltan Herczeg
12 Copyright (c) 2010-2012
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43 #ifdef HAVE_CONFIG_H
44 #include "config.h"
45 #endif
46
47 #include <stdio.h>
48 #include <string.h>
49 #include "pcre.h"
50
51 #define PCRE_BUG 0x80000000
52
53 /*
54 Letter characters:
55 \xe6\x92\xad = 0x64ad = 25773 (kanji)
56 Non-letter characters:
57 \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58 \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59 \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60 \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61 Newlines:
62 \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63 \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64 Othercase pairs:
65 \xc3\xa9 = 0xe9 = 233 (e')
66 \xc3\x89 = 0xc9 = 201 (E')
67 \xc3\xa1 = 0xe1 = 225 (a')
68 \xc3\x81 = 0xc1 = 193 (A')
69 \xc8\xba = 0x23a = 570
70 \xe2\xb1\xa5 = 0x2c65 = 11365
71 \xe1\xbd\xb8 = 0x1f78 = 8056
72 \xe1\xbf\xb8 = 0x1ff8 = 8184
73 \xf0\x90\x90\x80 = 0x10400 = 66560
74 \xf0\x90\x90\xa8 = 0x10428 = 66600
75 Mark property:
76 \xcc\x8d = 0x30d = 781
77 Special:
78 \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79 \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80 \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81 \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82 \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 */
84
85 static int regression_tests(void);
86
87 int main(void)
88 {
89 int jit = 0;
90 #ifdef SUPPORT_PCRE8
91 pcre_config(PCRE_CONFIG_JIT, &jit);
92 #else
93 pcre16_config(PCRE_CONFIG_JIT, &jit);
94 #endif
95 if (!jit) {
96 printf("JIT must be enabled to run pcre_jit_test\n");
97 return 1;
98 }
99 return regression_tests();
100 }
101
102 /* --------------------------------------------------------------------------------------- */
103
104 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105 #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106 #endif
107
108 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109 #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110 #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111 #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112 #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113 #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114 #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115
116 #define OFFSET_MASK 0x00ffff
117 #define F_NO8 0x010000
118 #define F_NO16 0x020000
119 #define F_NOMATCH 0x040000
120 #define F_DIFF 0x080000
121 #define F_FORCECONV 0x100000
122 #define F_PROPERTY 0x200000
123
124 struct regression_test_case {
125 int flags;
126 int start_offset;
127 const char *pattern;
128 const char *input;
129 };
130
131 static struct regression_test_case regression_test_cases[] = {
132 /* Constant strings. */
133 { MUA, 0, "AbC", "AbAbC" },
134 { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135 { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136 { MA, 0, "[^a]", "aAbB" },
137 { CMA, 0, "[^m]", "mMnN" },
138 { MA, 0, "a[^b][^#]", "abacd" },
139 { CMA, 0, "A[^B][^E]", "abacd" },
140 { CMUA, 0, "[^x][^#]", "XxBll" },
141 { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142 { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143 { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144 { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145 { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146 { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147 { MUA, 0, "[axd]", "sAXd" },
148 { CMUA, 0, "[axd]", "sAXd" },
149 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151 { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152 { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153 { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156 { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157 { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158 { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159 { PCRE_CASELESS, 0, "a1", "Aa1" },
160 { MA, 0, "\\Ca", "cda" },
161 { CMA, 0, "\\Ca", "CDA" },
162 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163 { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164 { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165 { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166 { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167 { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168
169 /* Assertions. */
170 { MUA, 0, "\\b[^A]", "A_B#" },
171 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173 { MAP, 0, "\\B", "_\xa1" },
174 { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175 { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176 { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177 { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178 { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180 { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181 { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182 { MA, 1 | F_NOMATCH, "^", "\n" },
183 { 0, 0, "^ab", "ab" },
184 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190 { 0, 0, "ab$", "ab" },
191 { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" },
192 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
193 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
194 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
195 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
196 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
197 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
198 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
199 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
200 { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
201 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
202 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
203 { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
204 { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
205 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
206 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
207 { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
208 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
209 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
210 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
211 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
212 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
213 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
214 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
215 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
216 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
217 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
218 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
219 { MA, 0, "\\Aa", "aaa" },
220 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
221 { MA, 1, "\\Ga", "aaa" },
222 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
223 { MA, 0, "a\\z", "aaa" },
224 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
225
226 /* Brackets. */
227 { MUA, 0, "(ab|bb|cd)", "bacde" },
228 { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
229 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
230 { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
231 { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
232 { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
233
234 /* Greedy and non-greedy ? operators. */
235 { MUA, 0, "(?:a)?a", "laab" },
236 { CMUA, 0, "(A)?A", "llaab" },
237 { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
238 { MUA, 0, "(a)?a", "manm" },
239 { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
240 { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
241 { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
242
243 /* Greedy and non-greedy + operators */
244 { MUA, 0, "(aa)+aa", "aaaaaaa" },
245 { MUA, 0, "(aa)+?aa", "aaaaaaa" },
246 { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
247 { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
248 { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
249 { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250 { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
251
252 /* Greedy and non-greedy * operators */
253 { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
254 { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
255 { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
256 { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
257 { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
258 { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
259 { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
260 { MA, 0, "((?:a|)*){0}a", "a" },
261
262 /* Combining ? + * operators */
263 { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
264 { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
265 { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266 { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
267 { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
268
269 /* Single character iterators. */
270 { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
271 { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
272 { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
273 { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
274 { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
275 { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
276 { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
277 { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
278 { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
279 { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
280 { MUA, 0, "(a?+[^b])+", "babaacacb" },
281 { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
282 { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
283 { CMUA, 0, "[c-f]+k", "DemmFke" },
284 { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
285 { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
286 { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
287 { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
288 { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
289 { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
290 { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
291 { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
292 { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
293 { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
294 { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
295 { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
296 { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
297 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
298 { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
299 { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
300 { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301 { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
302
303 /* Basic character sets. */
304 { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
305 { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
306 { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
307 { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
308 { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
309 { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
310
311 /* Unicode properties. */
312 { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
313 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
314 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
315 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
316 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
317 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
318 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
320 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
321 { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322 { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
323 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
324 { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
325 { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
326 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
327 { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
328 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
329 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
330 { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
331 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
332
333 /* Possible empty brackets. */
334 { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
335 { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
336 { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
337 { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
338 { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
339 { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
340 { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
341 { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
342 { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
343 { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
344
345 /* Start offset. */
346 { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
347 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
348 { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
350
351 /* Newline. */
352 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
353 { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
355
356 /* Any character except newline or any newline. */
357 { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
358 { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
359 { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
360 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
362 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
363 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
364 { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
365 { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
366 { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
367 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
368 { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
369 { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
370 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
371 { MUA, 0, "\\R+", "ab\r\n\r" },
372 { MUA, 0, "\\R*", "ab\r\n\r" },
373 { MUA, 0, "\\R*", "\r\n\r" },
374 { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
375 { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
376 { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
377 { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
378 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
379 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
380 { MUA, 0, "\\R*\\R\\R", "\n\r" },
381 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
382 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
383
384 /* Atomic groups (no fallback from "next" direction). */
385 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
386 { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
387 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
388 "bababcdedefgheijijklmlmnop" },
389 { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
390 { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
391 { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
392 { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
393 { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
394 { MUA, 0, "(?>x|)*$", "aaa" },
395 { MUA, 0, "(?>(x)|)*$", "aaa" },
396 { MUA, 0, "(?>x|())*$", "aaa" },
397 { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
398 { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
399 { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
400 { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
401 { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
402 { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
403 { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
404 { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
405 { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
406 { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
407 { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
408 { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
409 { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
410 { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
411 { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
412 { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
413 { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
415 { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
416 { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
417 { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
418 { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
419 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
420 { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
421 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
422 { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
423
424 /* Possessive quantifiers. */
425 { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
426 { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
427 { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
428 { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
429 { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
430 { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
431 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
432 { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
433 { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
434 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
435 { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
436 { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
437 { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
438 { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
439 { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
440 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
441 { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
442 { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
443 { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
444 { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
445 { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
446 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
447 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
448 { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
449 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
450 { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
451 { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
452 { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
453 { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
454 { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
455 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
456 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
457 { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
458 { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
459 { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460
461 /* Back references. */
462 { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
463 { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
464 { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
465 { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
466 { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
467 { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
468 { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469 { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
470 { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
471 { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
472 { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
473 { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
474 { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
475 { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476 { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
477 { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
478 { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
479 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
480 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
481 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
482 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
483 { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
484 { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
485
486 /* Assertions. */
487 { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
488 { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
489 { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
490 { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
491 { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
492 { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
493 { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
494 { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
495 { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
496 { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
497 { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
498 { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
499 { MUA, 0, "((?(?=a)a)+k)", "bbak" },
500 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
501 { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
502 { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
503 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
504 { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
505 { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
506 { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507 { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
508 { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
509 { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
510 { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
511
512 /* Not empty, ACCEPT, FAIL */
513 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
514 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
515 { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
516 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
517 { MUA, 0, "a(*ACCEPT)b", "ab" },
518 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
519 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
520 { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
521 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
522 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
523 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
524 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
525 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
526 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
527 { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
528 { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
529 { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
530 { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
531 { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
532
533 /* Conditional blocks. */
534 { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
535 { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
536 { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
537 { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
538 { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
539 { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
540 { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
541 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
543 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544 { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
545 { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
546 { MUA, 0, "(?(?=a)ab)", "a" },
547 { MUA, 0, "(?(?<!b)c)", "b" },
548 { MUA, 0, "(?(DEFINE)a(b))", "a" },
549 { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
550 { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
551 { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
552 { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
553 { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
554 { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
555 { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
556 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
557 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
558 { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
559 { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
560 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
561 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
562 { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
563 { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
564 { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
565 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
566 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
567 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
568
569 /* Set start of match. */
570 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
571 { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
572 { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
573 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
574 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
575
576 /* First line. */
577 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
578 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
579 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
580 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
581 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
582 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
583 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
584 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
585 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
586 { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
587 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
588 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
589 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
590 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
591 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
592 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
593 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
594
595 /* Recurse. */
596 { MUA, 0, "(a)(?1)", "aa" },
597 { MUA, 0, "((a))(?1)", "aa" },
598 { MUA, 0, "(b|a)(?1)", "aa" },
599 { MUA, 0, "(b|(a))(?1)", "aa" },
600 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
601 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
602 { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
603 { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
604 { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
605 { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
606 { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
607 { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
608 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
609 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
610 { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
611 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
612 { MUA, 0, "b|<(?R)*>", "<<b>" },
613 { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
614 { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
615 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
616 { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
617 { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
618 { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
619 { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
620 { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
621
622 /* 16 bit specific tests. */
623 { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
624 { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
625 { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
626 { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
627 { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
628 { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
629 { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
630 { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
631 { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
632 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
633 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
634 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
635 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
636 { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
637 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
638 { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
639 { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
640 { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641 { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
642 { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
643 { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
644 { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
645 { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
646 { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
647 { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
648 { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
649 { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
650 { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
651 { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
652 { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
653 { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
654
655 /* Deep recursion. */
656 { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
657 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
658 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
659
660 /* Deep recursion: Stack limit reached. */
661 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
662 { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
663 { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
664 { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
665 { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
666
667 { 0, 0, NULL, NULL }
668 };
669
670 static const unsigned char *tables(int mode)
671 {
672 /* The purpose of this function to allow valgrind
673 for reporting invalid reads and writes. */
674 static unsigned char *tables_copy;
675 const char *errorptr;
676 int erroroffset;
677 const unsigned char *default_tables;
678 #ifdef SUPPORT_PCRE8
679 pcre *regex;
680 char null_str[1] = { 0 };
681 #else
682 pcre16 *regex;
683 PCRE_UCHAR16 null_str[1] = { 0 };
684 #endif
685
686 if (mode) {
687 if (tables_copy)
688 free(tables_copy);
689 tables_copy = NULL;
690 return NULL;
691 }
692
693 if (tables_copy)
694 return tables_copy;
695
696 default_tables = NULL;
697 #ifdef SUPPORT_PCRE8
698 regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
699 if (regex) {
700 pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
701 pcre_free(regex);
702 }
703 #else
704 regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
705 if (regex) {
706 pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
707 pcre16_free(regex);
708 }
709 #endif
710 /* Shouldn't ever happen. */
711 if (!default_tables)
712 return NULL;
713
714 /* Unfortunately this value cannot get from pcre_fullinfo.
715 Since this is a test program, this is acceptable at the moment. */
716 tables_copy = (unsigned char *)malloc(1088);
717 if (!tables_copy)
718 return NULL;
719
720 memcpy(tables_copy, default_tables, 1088);
721 return tables_copy;
722 }
723
724 #ifdef SUPPORT_PCRE8
725 static pcre_jit_stack* callback8(void *arg)
726 {
727 return (pcre_jit_stack *)arg;
728 }
729 #endif
730
731 #ifdef SUPPORT_PCRE16
732 static pcre16_jit_stack* callback16(void *arg)
733 {
734 return (pcre16_jit_stack *)arg;
735 }
736 #endif
737
738 #ifdef SUPPORT_PCRE8
739 static void setstack8(pcre_extra *extra)
740 {
741 static pcre_jit_stack *stack;
742
743 if (!extra) {
744 if (stack)
745 pcre_jit_stack_free(stack);
746 stack = NULL;
747 return;
748 }
749
750 if (!stack)
751 stack = pcre_jit_stack_alloc(1, 1024 * 1024);
752 /* Extra can be NULL. */
753 pcre_assign_jit_stack(extra, callback8, stack);
754 }
755 #endif /* SUPPORT_PCRE8 */
756
757 #ifdef SUPPORT_PCRE16
758 static void setstack16(pcre16_extra *extra)
759 {
760 static pcre16_jit_stack *stack;
761
762 if (!extra) {
763 if (stack)
764 pcre16_jit_stack_free(stack);
765 stack = NULL;
766 return;
767 }
768
769 if (!stack)
770 stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
771 /* Extra can be NULL. */
772 pcre16_assign_jit_stack(extra, callback16, stack);
773 }
774 #endif /* SUPPORT_PCRE8 */
775
776 #ifdef SUPPORT_PCRE16
777
778 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
779 {
780 unsigned char *iptr = (unsigned char*)input;
781 unsigned short *optr = (unsigned short *)output;
782 unsigned int c;
783
784 if (max_length == 0)
785 return 0;
786
787 while (*iptr && max_length > 1) {
788 c = 0;
789 if (offsetmap)
790 *offsetmap++ = (int)(iptr - (unsigned char*)input);
791
792 if (!(*iptr & 0x80))
793 c = *iptr++;
794 else if (!(*iptr & 0x20)) {
795 c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
796 iptr += 2;
797 } else if (!(*iptr & 0x10)) {
798 c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
799 iptr += 3;
800 } else if (!(*iptr & 0x08)) {
801 c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
802 iptr += 4;
803 }
804
805 if (c < 65536) {
806 *optr++ = c;
807 max_length--;
808 } else if (max_length <= 2) {
809 *optr = '\0';
810 return (int)(optr - (unsigned short *)output);
811 } else {
812 c -= 0x10000;
813 *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
814 *optr++ = 0xdc00 | (c & 0x3ff);
815 max_length -= 2;
816 if (offsetmap)
817 offsetmap++;
818 }
819 }
820 if (offsetmap)
821 *offsetmap = (int)(iptr - (unsigned char*)input);
822 *optr = '\0';
823 return (int)(optr - (unsigned short *)output);
824 }
825
826 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
827 {
828 unsigned char *iptr = (unsigned char*)input;
829 unsigned short *optr = (unsigned short *)output;
830
831 if (max_length == 0)
832 return 0;
833
834 while (*iptr && max_length > 1) {
835 *optr++ = *iptr++;
836 max_length--;
837 }
838 *optr = '\0';
839 return (int)(optr - (unsigned short *)output);
840 }
841
842 #define REGTEST_MAX_LENGTH 4096
843 static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
844 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
845
846 #endif /* SUPPORT_PCRE16 */
847
848 static int check_ascii(const char *input)
849 {
850 const unsigned char *ptr = (unsigned char *)input;
851 while (*ptr) {
852 if (*ptr > 127)
853 return 0;
854 ptr++;
855 }
856 return 1;
857 }
858
859 static int regression_tests(void)
860 {
861 struct regression_test_case *current = regression_test_cases;
862 const char *error;
863 int i, err_offs;
864 int is_successful, is_ascii_pattern, is_ascii_input;
865 int total = 0;
866 int successful = 0;
867 int counter = 0;
868 #ifdef SUPPORT_PCRE8
869 pcre *re8;
870 pcre_extra *extra8;
871 int ovector8_1[32];
872 int ovector8_2[32];
873 int return_value8_1, return_value8_2;
874 int utf8 = 0, ucp8 = 0;
875 int disabled_flags8 = 0;
876 #endif
877 #ifdef SUPPORT_PCRE16
878 pcre16 *re16;
879 pcre16_extra *extra16;
880 int ovector16_1[32];
881 int ovector16_2[32];
882 int return_value16_1, return_value16_2;
883 int utf16 = 0, ucp16 = 0;
884 int disabled_flags16 = 0;
885 int length16;
886 #endif
887
888 /* This test compares the behaviour of interpreter and JIT. Although disabling
889 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
890 still considered successful from pcre_jit_test point of view. */
891
892 printf("Running JIT regression\n");
893
894 #ifdef SUPPORT_PCRE8
895 pcre_config(PCRE_CONFIG_UTF8, &utf8);
896 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
897 if (!utf8)
898 disabled_flags8 |= PCRE_UTF8;
899 if (!ucp8)
900 disabled_flags8 |= PCRE_UCP;
901 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
902 #endif
903 #ifdef SUPPORT_PCRE16
904 pcre16_config(PCRE_CONFIG_UTF16, &utf16);
905 pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
906 if (!utf16)
907 disabled_flags16 |= PCRE_UTF8;
908 if (!ucp16)
909 disabled_flags16 |= PCRE_UCP;
910 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
911 #endif
912
913 while (current->pattern) {
914 /* printf("\nPattern: %s :\n", current->pattern); */
915 total++;
916 if (current->start_offset & F_PROPERTY) {
917 is_ascii_pattern = 0;
918 is_ascii_input = 0;
919 } else {
920 is_ascii_pattern = check_ascii(current->pattern);
921 is_ascii_input = check_ascii(current->input);
922 }
923
924 error = NULL;
925 #ifdef SUPPORT_PCRE8
926 re8 = NULL;
927 if (!(current->start_offset & F_NO8))
928 re8 = pcre_compile(current->pattern,
929 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
930 &error, &err_offs, tables(0));
931
932 extra8 = NULL;
933 if (re8) {
934 error = NULL;
935 extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
936 if (!extra8) {
937 printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
938 pcre_free(re8);
939 re8 = NULL;
940 }
941 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
942 printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
943 pcre_free_study(extra8);
944 pcre_free(re8);
945 re8 = NULL;
946 }
947 } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
948 printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
949 #endif
950 #ifdef SUPPORT_PCRE16
951 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
952 convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
953 else
954 copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
955
956 re16 = NULL;
957 if (!(current->start_offset & F_NO16))
958 re16 = pcre16_compile(regtest_buf,
959 current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
960 &error, &err_offs, tables(0));
961
962 extra16 = NULL;
963 if (re16) {
964 error = NULL;
965 extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
966 if (!extra16) {
967 printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
968 pcre16_free(re16);
969 re16 = NULL;
970 }
971 if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
972 printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
973 pcre16_free_study(extra16);
974 pcre16_free(re16);
975 re16 = NULL;
976 }
977 } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
978 printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
979 #endif
980
981 counter++;
982 if ((counter & 0x3) != 0) {
983 #ifdef SUPPORT_PCRE8
984 setstack8(NULL);
985 #endif
986 #ifdef SUPPORT_PCRE16
987 setstack16(NULL);
988 #endif
989 }
990
991 #ifdef SUPPORT_PCRE8
992 return_value8_1 = -1000;
993 return_value8_2 = -1000;
994 for (i = 0; i < 32; ++i)
995 ovector8_1[i] = -2;
996 for (i = 0; i < 32; ++i)
997 ovector8_2[i] = -2;
998 if (re8) {
999 setstack8(extra8);
1000 return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1001 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
1002 return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1003 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
1004 }
1005 #endif
1006
1007 #ifdef SUPPORT_PCRE16
1008 return_value16_1 = -1000;
1009 return_value16_2 = -1000;
1010 for (i = 0; i < 32; ++i)
1011 ovector16_1[i] = -2;
1012 for (i = 0; i < 32; ++i)
1013 ovector16_2[i] = -2;
1014 if (re16) {
1015 setstack16(extra16);
1016 if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1017 length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1018 else
1019 length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1020 return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1021 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
1022 return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1023 current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
1024 }
1025 #endif
1026
1027 /* If F_DIFF is set, just run the test, but do not compare the results.
1028 Segfaults can still be captured. */
1029
1030 is_successful = 1;
1031 if (!(current->start_offset & F_DIFF)) {
1032 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1033 if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1034 /* All results must be the same. */
1035 if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1036 printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1037 return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1038 total, current->pattern, current->input);
1039 is_successful = 0;
1040 } else if (return_value8_1 >= 0) {
1041 return_value8_1 *= 2;
1042 /* Transform back the results. */
1043 if (current->flags & PCRE_UTF8) {
1044 for (i = 0; i < return_value8_1; ++i) {
1045 if (ovector16_1[i] >= 0)
1046 ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1047 if (ovector16_2[i] >= 0)
1048 ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1049 }
1050 }
1051
1052 for (i = 0; i < return_value8_1; ++i)
1053 if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1054 printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1055 i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1056 total, current->pattern, current->input);
1057 is_successful = 0;
1058 }
1059 }
1060 } else {
1061 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1062 /* Only the 8 bit and 16 bit results must be equal. */
1063 #ifdef SUPPORT_PCRE8
1064 if (return_value8_1 != return_value8_2) {
1065 printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1066 return_value8_1, return_value8_2, total, current->pattern, current->input);
1067 is_successful = 0;
1068 } else if (return_value8_1 >= 0) {
1069 return_value8_1 *= 2;
1070 for (i = 0; i < return_value8_1; ++i)
1071 if (ovector8_1[i] != ovector8_2[i]) {
1072 printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1073 i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1074 is_successful = 0;
1075 }
1076 }
1077 #endif
1078
1079 #ifdef SUPPORT_PCRE16
1080 if (return_value16_1 != return_value16_2) {
1081 printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1082 return_value16_1, return_value16_2, total, current->pattern, current->input);
1083 is_successful = 0;
1084 } else if (return_value16_1 >= 0) {
1085 return_value16_1 *= 2;
1086 for (i = 0; i < return_value16_1; ++i)
1087 if (ovector16_1[i] != ovector16_2[i]) {
1088 printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1089 i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1090 is_successful = 0;
1091 }
1092 }
1093 #endif
1094
1095 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1096 }
1097 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1098 }
1099
1100 if (is_successful) {
1101 #ifdef SUPPORT_PCRE8
1102 if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1103 if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1104 printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1105 total, current->pattern, current->input);
1106 is_successful = 0;
1107 }
1108
1109 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1110 printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1111 total, current->pattern, current->input);
1112 is_successful = 0;
1113 }
1114 }
1115 #endif
1116 #ifdef SUPPORT_PCRE16
1117 if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1118 if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1119 printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1120 total, current->pattern, current->input);
1121 is_successful = 0;
1122 }
1123
1124 if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1125 printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1126 total, current->pattern, current->input);
1127 is_successful = 0;
1128 }
1129 }
1130 #endif
1131 }
1132
1133 if (is_successful)
1134 successful++;
1135
1136 #ifdef SUPPORT_PCRE8
1137 if (re8) {
1138 pcre_free_study(extra8);
1139 pcre_free(re8);
1140 }
1141 #endif
1142 #ifdef SUPPORT_PCRE16
1143 if (re16) {
1144 pcre16_free_study(extra16);
1145 pcre16_free(re16);
1146 }
1147 #endif
1148
1149 /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1150 printf(".");
1151 fflush(stdout);
1152 current++;
1153 }
1154 tables(1);
1155 #ifdef SUPPORT_PCRE8
1156 setstack8(NULL);
1157 #endif
1158 #ifdef SUPPORT_PCRE16
1159 setstack16(NULL);
1160 #endif
1161
1162 if (total == successful) {
1163 printf("\nAll JIT regression tests are successfully passed.\n");
1164 return 0;
1165 } else {
1166 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1167 return 1;
1168 }
1169 }
1170
1171 /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12