/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 836 - (hide annotations) (download)
Wed Dec 28 17:16:11 2011 UTC (16 months, 3 weeks ago) by ph10
File MIME type: text/plain
File size: 49528 byte(s)
Merging all the changes from the pcre16 branch into the trunk.

1 ph10 667 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Main Library written by Philip Hazel
9 ph10 836 Copyright (c) 1997-2012 University of Cambridge
10 ph10 667
11     This JIT compiler regression test program was written by Zoltan Herczeg
12 ph10 836 Copyright (c) 2010-2012
13 ph10 667
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43 ph10 698 #ifdef HAVE_CONFIG_H
44     #include "config.h"
45     #endif
46    
47 ph10 667 #include <stdio.h>
48     #include <string.h>
49     #include "pcre.h"
50    
51     #define PCRE_BUG 0x80000000
52    
53     /*
54 ph10 836 Letter characters:
55     \xe6\x92\xad = 0x64ad = 25773 (kanji)
56     Non-letter characters:
57     \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58     \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59     \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60     \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61     Newlines:
62     \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63     \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64     Othercase pairs:
65     \xc3\xa9 = 0xe9 = 233 (e')
66     \xc3\x89 = 0xc9 = 201 (E')
67     \xc3\xa1 = 0xe1 = 225 (a')
68     \xc3\x81 = 0xc1 = 193 (A')
69     \xc8\xba = 0x23a = 570
70     \xe2\xb1\xa5 = 0x2c65 = 11365
71     \xe1\xbd\xb8 = 0x1f78 = 8056
72     \xe1\xbf\xb8 = 0x1ff8 = 8184
73     \xf0\x90\x90\x80 = 0x10400 = 66560
74     \xf0\x90\x90\xa8 = 0x10428 = 66600
75     Mark property:
76     \xcc\x8d = 0x30d = 781
77     Special:
78     \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79     \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80     \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81     \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82     \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 ph10 691 */
84 ph10 667
85 ph10 677 static int regression_tests(void);
86 ph10 667
87     int main(void)
88     {
89 ph10 698 int jit = 0;
90 ph10 836 #ifdef SUPPORT_PCRE8
91 ph10 698 pcre_config(PCRE_CONFIG_JIT, &jit);
92 ph10 836 #else
93     pcre16_config(PCRE_CONFIG_JIT, &jit);
94     #endif
95 ph10 698 if (!jit) {
96     printf("JIT must be enabled to run pcre_jit_test\n");
97     return 1;
98     }
99     return regression_tests();
100 ph10 667 }
101    
102 ph10 836 /* --------------------------------------------------------------------------------------- */
103 ph10 667
104 ph10 836 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105     #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106     #endif
107 ph10 667
108 ph10 836 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109     #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110     #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111     #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112     #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113     #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114     #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115 ph10 667
116 ph10 836 #define OFFSET_MASK 0x00ffff
117     #define F_NO8 0x010000
118     #define F_NO16 0x020000
119     #define F_NOMATCH 0x040000
120     #define F_DIFF 0x080000
121     #define F_FORCECONV 0x100000
122     #define F_PROPERTY 0x200000
123 ph10 667
124     struct regression_test_case {
125     int flags;
126     int start_offset;
127     const char *pattern;
128     const char *input;
129     };
130    
131     static struct regression_test_case regression_test_cases[] = {
132     /* Constant strings. */
133     { MUA, 0, "AbC", "AbAbC" },
134     { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135     { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136     { MA, 0, "[^a]", "aAbB" },
137     { CMA, 0, "[^m]", "mMnN" },
138     { MA, 0, "a[^b][^#]", "abacd" },
139     { CMA, 0, "A[^B][^E]", "abacd" },
140     { CMUA, 0, "[^x][^#]", "XxBll" },
141     { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142     { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143     { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144     { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145     { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146     { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147     { MUA, 0, "[axd]", "sAXd" },
148     { CMUA, 0, "[axd]", "sAXd" },
149 ph10 836 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 ph10 667 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151     { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152     { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153     { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 ph10 698 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 ph10 667 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156     { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157     { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158     { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159     { PCRE_CASELESS, 0, "a1", "Aa1" },
160 zherczeg 736 { MA, 0, "\\Ca", "cda" },
161     { CMA, 0, "\\Ca", "CDA" },
162 ph10 836 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163     { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164     { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165     { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166     { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167     { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168 ph10 667
169     /* Assertions. */
170     { MUA, 0, "\\b[^A]", "A_B#" },
171 ph10 836 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 ph10 667 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173     { MAP, 0, "\\B", "_\xa1" },
174     { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175     { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176     { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177     { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178     { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 ph10 836 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180     { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181     { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182     { MA, 1 | F_NOMATCH, "^", "\n" },
183 ph10 667 { 0, 0, "^ab", "ab" },
184 ph10 836 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 ph10 667 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190     { 0, 0, "ab$", "ab" },
191 ph10 836 { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" },
192 ph10 667 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
193     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
194     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
195     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
196 ph10 836 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
197 ph10 667 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
198 ph10 836 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
199     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
200     { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
201 ph10 667 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
202     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
203     { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
204     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
205     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
206     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
207     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
208     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
209     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
210     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
211     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
212     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
213     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
214     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
215     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
216     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
217     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
218     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
219     { MA, 0, "\\Aa", "aaa" },
220 ph10 836 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
221 ph10 667 { MA, 1, "\\Ga", "aaa" },
222 ph10 836 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
223 ph10 667 { MA, 0, "a\\z", "aaa" },
224 ph10 836 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
225 ph10 667
226     /* Brackets. */
227     { MUA, 0, "(ab|bb|cd)", "bacde" },
228     { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
229     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
230     { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
231     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
232     { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
233    
234     /* Greedy and non-greedy ? operators. */
235     { MUA, 0, "(?:a)?a", "laab" },
236     { CMUA, 0, "(A)?A", "llaab" },
237     { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
238     { MUA, 0, "(a)?a", "manm" },
239     { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
240     { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
241     { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
242    
243     /* Greedy and non-greedy + operators */
244     { MUA, 0, "(aa)+aa", "aaaaaaa" },
245     { MUA, 0, "(aa)+?aa", "aaaaaaa" },
246     { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
247     { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
248     { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
249     { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250     { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
251    
252     /* Greedy and non-greedy * operators */
253     { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
254     { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
255     { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
256     { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
257     { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
258     { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
259     { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
260     { MA, 0, "((?:a|)*){0}a", "a" },
261    
262     /* Combining ? + * operators */
263     { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
264     { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
265     { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266     { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
267     { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
268    
269     /* Single character iterators. */
270     { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
271     { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
272     { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
273     { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
274     { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
275     { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
276     { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
277     { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
278     { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
279     { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
280     { MUA, 0, "(a?+[^b])+", "babaacacb" },
281     { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
282     { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
283     { CMUA, 0, "[c-f]+k", "DemmFke" },
284     { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
285     { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
286     { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
287     { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
288     { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
289     { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
290     { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
291     { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
292     { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
293     { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
294     { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
295     { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
296     { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
297 ph10 836 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
298     { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
299     { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
300     { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301     { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
302 ph10 667
303     /* Basic character sets. */
304     { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
305     { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
306     { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
307     { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
308     { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
309     { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
310    
311     /* Unicode properties. */
312     { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
313 ph10 836 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
314 ph10 667 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
315 ph10 836 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
316     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
317     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
318     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
320     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
321     { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322     { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
323 ph10 667 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
324     { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
325     { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
326 ph10 836 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
327     { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
328 ph10 667 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
329 ph10 836 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
330     { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
331 ph10 667 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
332    
333     /* Possible empty brackets. */
334     { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
335     { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
336     { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
337     { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
338     { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
339     { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
340     { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
341     { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
342     { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
343     { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
344    
345     /* Start offset. */
346     { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
347 ph10 836 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
348     { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 ph10 667 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
350    
351     /* Newline. */
352     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
353     { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
355    
356     /* Any character except newline or any newline. */
357     { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
358     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
359     { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
360     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
362 ph10 836 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
363 ph10 667 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
364     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
365     { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
366     { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
367     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
368     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
369     { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
370 ph10 836 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
371 ph10 667 { MUA, 0, "\\R+", "ab\r\n\r" },
372     { MUA, 0, "\\R*", "ab\r\n\r" },
373     { MUA, 0, "\\R*", "\r\n\r" },
374     { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
375     { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
376     { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
377     { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
378 ph10 836 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
379 ph10 667 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
380     { MUA, 0, "\\R*\\R\\R", "\n\r" },
381 ph10 836 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
382 ph10 667 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
383    
384     /* Atomic groups (no fallback from "next" direction). */
385 ph10 836 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
386     { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
387 ph10 667 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
388     "bababcdedefgheijijklmlmnop" },
389     { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
390     { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
391     { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
392     { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
393     { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
394     { MUA, 0, "(?>x|)*$", "aaa" },
395     { MUA, 0, "(?>(x)|)*$", "aaa" },
396     { MUA, 0, "(?>x|())*$", "aaa" },
397     { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
398     { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
399     { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
400     { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
401     { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
402     { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
403     { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
404     { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
405     { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
406     { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
407     { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
408     { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
409     { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
410     { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
411     { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
412     { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
413     { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 ph10 836 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
415     { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
416     { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
417     { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
418     { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
419     { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
420     { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
421 ph10 667 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
422     { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
423    
424 ph10 698 /* Possessive quantifiers. */
425     { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
426     { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
427     { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
428     { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
429     { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
430     { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
431 ph10 667 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
432     { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
433     { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
434     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
435     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
436     { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
437     { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
438     { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
439     { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
440 ph10 698 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
441     { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
442     { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
443     { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
444     { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
445     { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
446 ph10 667 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
447     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
448     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
449     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
450     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
451     { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
452     { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
453     { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
454     { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
455 ph10 836 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
456 ph10 667 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
457     { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
458     { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
459     { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460    
461     /* Back references. */
462     { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
463     { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
464     { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
465     { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
466     { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
467     { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
468     { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469     { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
470     { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
471     { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
472     { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
473     { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
474     { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
475     { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476     { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
477     { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
478     { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
479 ph10 836 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
480     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
481     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
482     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
483     { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
484     { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
485 ph10 667
486     /* Assertions. */
487     { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
488     { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
489     { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
490     { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
491     { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
492     { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
493     { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
494     { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
495     { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
496     { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
497     { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
498     { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
499     { MUA, 0, "((?(?=a)a)+k)", "bbak" },
500 ph10 836 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
501     { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
502     { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
503 ph10 667 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
504     { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
505     { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
506     { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507     { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
508     { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
509     { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
510     { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
511    
512     /* Not empty, ACCEPT, FAIL */
513 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
514 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
515     { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
516     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
517     { MUA, 0, "a(*ACCEPT)b", "ab" },
518 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
519 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
520     { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
521 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
522 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
523     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
524     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
525 ph10 836 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
526 ph10 667 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
527     { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
528     { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
529     { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
530     { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
531     { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
532    
533     /* Conditional blocks. */
534     { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
535     { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
536     { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
537     { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
538     { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
539     { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
540     { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
541     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 ph10 836 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
543 ph10 667 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544     { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
545     { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
546     { MUA, 0, "(?(?=a)ab)", "a" },
547     { MUA, 0, "(?(?<!b)c)", "b" },
548     { MUA, 0, "(?(DEFINE)a(b))", "a" },
549     { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
550     { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
551     { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
552     { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
553     { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
554     { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
555     { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
556 ph10 836 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
557 ph10 667 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
558     { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
559     { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
560 ph10 836 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
561 ph10 667 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
562     { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
563     { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
564     { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
565 zherczeg 741 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
566     { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
567     { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
568 ph10 667
569 ph10 698 /* Set start of match. */
570 ph10 667 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
571     { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
572     { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
573 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
574 ph10 667 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
575    
576     /* First line. */
577 ph10 836 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
578     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
579 ph10 667 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
580 ph10 836 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
581     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
582     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
583     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
584     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
585     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
586     { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
587     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
588     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
589 ph10 667 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
590     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
591 ph10 836 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
592     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
593 ph10 667 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
594    
595     /* Recurse. */
596     { MUA, 0, "(a)(?1)", "aa" },
597     { MUA, 0, "((a))(?1)", "aa" },
598     { MUA, 0, "(b|a)(?1)", "aa" },
599     { MUA, 0, "(b|(a))(?1)", "aa" },
600 ph10 836 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
601 ph10 667 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
602     { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
603     { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
604     { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
605     { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
606     { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
607     { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
608 ph10 836 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
609 ph10 667 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
610     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
611 ph10 836 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
612 ph10 667 { MUA, 0, "b|<(?R)*>", "<<b>" },
613     { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
614     { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
615 zherczeg 741 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
616     { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
617     { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
618     { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
619     { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
620     { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
621 ph10 667
622 ph10 836 /* 16 bit specific tests. */
623     { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
624     { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
625     { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
626     { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
627     { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
628     { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
629     { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
630     { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
631     { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
632     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
633     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
634     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
635     { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
636     { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
637     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
638     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
639     { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
640     { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641     { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
642     { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
643     { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
644     { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
645     { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
646     { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
647     { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
648     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
649     { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
650     { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
651     { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
652     { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
653     { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
654    
655 ph10 667 /* Deep recursion. */
656     { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
657 ph10 698 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
658 ph10 677 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
659 ph10 691
660 ph10 677 /* Deep recursion: Stack limit reached. */
661 ph10 836 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
662     { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
663     { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
664     { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
665     { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
666 ph10 667
667     { 0, 0, NULL, NULL }
668     };
669    
670 ph10 836 static const unsigned char *tables(int mode)
671     {
672     /* The purpose of this function to allow valgrind
673     for reporting invalid reads and writes. */
674     static unsigned char *tables_copy;
675     pcre *regex;
676     const char *errorptr;
677     int erroroffset;
678     const unsigned char *default_tables;
679     #ifdef SUPPORT_PCRE8
680     char null_str[1] = { 0 };
681     #else
682     PCRE_SCHAR16 null_str[1] = { 0 };
683     #endif
684    
685     if (mode) {
686     if (tables_copy)
687     free(tables_copy);
688     tables_copy = NULL;
689     return NULL;
690     }
691    
692     if (tables_copy)
693     return tables_copy;
694    
695     default_tables = NULL;
696     #ifdef SUPPORT_PCRE8
697     regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
698     if (regex) {
699     pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
700     pcre_free(regex);
701     }
702     #else
703     regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
704     if (regex) {
705     pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
706     pcre16_free(regex);
707     }
708     #endif
709     /* Shouldn't ever happen. */
710     if (!default_tables)
711     return NULL;
712    
713     /* Unfortunately this value cannot get from pcre_fullinfo.
714     Since this is a test program, this is acceptable at the moment. */
715     tables_copy = (unsigned char *)malloc(1088);
716     if (!tables_copy)
717     return NULL;
718    
719     memcpy(tables_copy, default_tables, 1088);
720     return tables_copy;
721     }
722    
723     static pcre_jit_stack* callback(void *arg)
724     {
725     return (pcre_jit_stack *)arg;
726     }
727    
728     #ifdef SUPPORT_PCRE8
729     static void setstack8(pcre_extra *extra)
730     {
731     static pcre_jit_stack *stack;
732    
733     if (!extra) {
734     if (stack)
735     pcre_jit_stack_free(stack);
736     stack = NULL;
737     return;
738     }
739    
740     if (!stack)
741     stack = pcre_jit_stack_alloc(1, 1024 * 1024);
742     /* Extra can be NULL. */
743     pcre_assign_jit_stack(extra, callback, stack);
744     }
745     #endif /* SUPPORT_PCRE8 */
746    
747     #ifdef SUPPORT_PCRE16
748     static void setstack16(pcre_extra *extra)
749     {
750     static pcre_jit_stack *stack;
751    
752     if (!extra) {
753     if (stack)
754     pcre16_jit_stack_free(stack);
755     stack = NULL;
756     return;
757     }
758    
759     if (!stack)
760     stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
761     /* Extra can be NULL. */
762     pcre16_assign_jit_stack(extra, callback, stack);
763     }
764     #endif /* SUPPORT_PCRE8 */
765    
766     #ifdef SUPPORT_PCRE16
767    
768     static int convert_utf8_to_utf16(const char *input, PCRE_SCHAR16 *output, int *offsetmap, int max_length)
769     {
770     unsigned char *iptr = (unsigned char*)input;
771     unsigned short *optr = (unsigned short *)output;
772     unsigned int c;
773    
774     if (max_length == 0)
775     return 0;
776    
777     while (*iptr && max_length > 1) {
778     c = 0;
779     if (offsetmap)
780     *offsetmap++ = (int)(iptr - (unsigned char*)input);
781    
782     if (!(*iptr & 0x80))
783     c = *iptr++;
784     else if (!(*iptr & 0x20)) {
785     c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
786     iptr += 2;
787     } else if (!(*iptr & 0x10)) {
788     c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
789     iptr += 3;
790     } else if (!(*iptr & 0x08)) {
791     c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
792     iptr += 4;
793     }
794    
795     if (c < 65536) {
796     *optr++ = c;
797     max_length--;
798     } else if (max_length <= 2) {
799     *optr = '\0';
800     return (int)(optr - (unsigned short *)output);
801     } else {
802     c -= 0x10000;
803     *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
804     *optr++ = 0xdc00 | (c & 0x3ff);
805     max_length -= 2;
806     if (offsetmap)
807     offsetmap++;
808     }
809     }
810     if (offsetmap)
811     *offsetmap = (int)(iptr - (unsigned char*)input);
812     *optr = '\0';
813     return (int)(optr - (unsigned short *)output);
814     }
815    
816     static int copy_char8_to_char16(const char *input, PCRE_SCHAR16 *output, int max_length)
817     {
818     unsigned char *iptr = (unsigned char*)input;
819     unsigned short *optr = (unsigned short *)output;
820    
821     if (max_length == 0)
822     return 0;
823    
824     while (*iptr && max_length > 1) {
825     *optr++ = *iptr++;
826     max_length--;
827     }
828     *optr = '\0';
829     return (int)(optr - (unsigned short *)output);
830     }
831    
832     #define REGTEST_MAX_LENGTH 4096
833     static PCRE_SCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
834     static int regtest_offsetmap[REGTEST_MAX_LENGTH];
835    
836     #endif /* SUPPORT_PCRE16 */
837    
838     static int check_ascii(const char *input)
839     {
840     const unsigned char *ptr = (unsigned char *)input;
841     while (*ptr) {
842     if (*ptr > 127)
843     return 0;
844     ptr++;
845     }
846     return 1;
847     }
848    
849 ph10 677 static int regression_tests(void)
850 ph10 667 {
851     struct regression_test_case *current = regression_test_cases;
852     const char *error;
853     int i, err_offs;
854 ph10 836 int is_successful, is_ascii_pattern, is_ascii_input;
855     int total = 0;
856     int successful = 0;
857 ph10 667 int counter = 0;
858 ph10 836 #ifdef SUPPORT_PCRE8
859     pcre *re8;
860     pcre_extra *extra8;
861     int ovector8_1[32];
862     int ovector8_2[32];
863     int return_value8_1, return_value8_2;
864     int utf8 = 0, ucp8 = 0;
865     int disabled_flags8 = 0;
866     #endif
867     #ifdef SUPPORT_PCRE16
868     pcre *re16;
869     pcre_extra *extra16;
870     int ovector16_1[32];
871     int ovector16_2[32];
872     int return_value16_1, return_value16_2;
873     int utf16 = 0, ucp16 = 0;
874     int disabled_flags16 = 0;
875     int length16;
876     #endif
877 ph10 667
878 ph10 698 /* This test compares the behaviour of interpreter and JIT. Although disabling
879 ph10 836 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
880 ph10 698 still considered successful from pcre_jit_test point of view. */
881    
882 ph10 836 printf("Running JIT regression\n");
883    
884     #ifdef SUPPORT_PCRE8
885 ph10 698 pcre_config(PCRE_CONFIG_UTF8, &utf8);
886 ph10 836 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
887 ph10 698 if (!utf8)
888 ph10 836 disabled_flags8 |= PCRE_UTF8;
889     if (!ucp8)
890     disabled_flags8 |= PCRE_UCP;
891     printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
892     #endif
893     #ifdef SUPPORT_PCRE16
894     pcre16_config(PCRE_CONFIG_UTF16, &utf16);
895     pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
896     if (!utf16)
897     disabled_flags16 |= PCRE_UTF8;
898     if (!ucp16)
899     disabled_flags16 |= PCRE_UCP;
900     printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
901     #endif
902 ph10 698
903 ph10 667 while (current->pattern) {
904 ph10 698 /* printf("\nPattern: %s :\n", current->pattern); */
905 ph10 667 total++;
906 ph10 836 if (current->start_offset & F_PROPERTY) {
907     is_ascii_pattern = 0;
908     is_ascii_input = 0;
909     } else {
910     is_ascii_pattern = check_ascii(current->pattern);
911     is_ascii_input = check_ascii(current->input);
912     }
913 ph10 667
914     error = NULL;
915 ph10 836 #ifdef SUPPORT_PCRE8
916     re8 = NULL;
917     if (!(current->start_offset & F_NO8))
918     re8 = pcre_compile(current->pattern,
919     current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
920     &error, &err_offs, tables(0));
921 ph10 667
922 ph10 836 extra8 = NULL;
923     if (re8) {
924     error = NULL;
925     extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
926     if (!extra8) {
927     printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
928     pcre_free(re8);
929     re8 = NULL;
930 ph10 698 }
931 ph10 836 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
932     printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
933     pcre_free_study(extra8);
934     pcre_free(re8);
935     re8 = NULL;
936     }
937     } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
938     printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
939     #endif
940     #ifdef SUPPORT_PCRE16
941     if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
942     convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
943     else
944     copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
945 ph10 667
946 ph10 836 re16 = NULL;
947     if (!(current->start_offset & F_NO16))
948     re16 = pcre16_compile(regtest_buf,
949     current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
950     &error, &err_offs, tables(0));
951 ph10 667
952 ph10 836 extra16 = NULL;
953     if (re16) {
954     error = NULL;
955     extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
956     if (!extra16) {
957     printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
958     pcre16_free(re16);
959     re16 = NULL;
960     }
961     if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
962     printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
963     pcre16_free_study(extra16);
964     pcre16_free(re16);
965     re16 = NULL;
966     }
967     } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
968     printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
969     #endif
970 ph10 667
971     counter++;
972 ph10 836 if ((counter & 0x3) != 0) {
973     #ifdef SUPPORT_PCRE8
974     setstack8(NULL);
975     #endif
976     #ifdef SUPPORT_PCRE16
977     setstack16(NULL);
978     #endif
979     }
980 ph10 667
981 ph10 836 #ifdef SUPPORT_PCRE8
982     return_value8_1 = -1000;
983     return_value8_2 = -1000;
984 ph10 667 for (i = 0; i < 32; ++i)
985 ph10 836 ovector8_1[i] = -2;
986     for (i = 0; i < 32; ++i)
987     ovector8_2[i] = -2;
988     if (re8) {
989     setstack8(extra8);
990     return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
991     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
992     return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
993     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
994     }
995     #endif
996 ph10 667
997 ph10 836 #ifdef SUPPORT_PCRE16
998     return_value16_1 = -1000;
999     return_value16_2 = -1000;
1000 ph10 667 for (i = 0; i < 32; ++i)
1001 ph10 836 ovector16_1[i] = -2;
1002     for (i = 0; i < 32; ++i)
1003     ovector16_2[i] = -2;
1004     if (re16) {
1005     setstack16(extra16);
1006     if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1007     length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1008     else
1009     length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1010     return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1011     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
1012     return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1013     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
1014     }
1015     #endif
1016 ph10 667
1017 ph10 836 /* If F_DIFF is set, just run the test, but do not compare the results.
1018 ph10 667 Segfaults can still be captured. */
1019    
1020 ph10 836 is_successful = 1;
1021     if (!(current->start_offset & F_DIFF)) {
1022     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1023     if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1024     /* All results must be the same. */
1025     if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1026     printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1027     return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1028     total, current->pattern, current->input);
1029     is_successful = 0;
1030     } else if (return_value8_1 >= 0) {
1031     return_value8_1 *= 2;
1032     /* Transform back the results. */
1033     if (current->flags & PCRE_UTF8) {
1034     for (i = 0; i < return_value8_1; ++i) {
1035     if (ovector16_1[i] >= 0)
1036     ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1037     if (ovector16_2[i] >= 0)
1038     ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1039     }
1040 ph10 667 }
1041 ph10 836
1042     for (i = 0; i < return_value8_1; ++i)
1043     if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1044     printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1045     i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1046     total, current->pattern, current->input);
1047     is_successful = 0;
1048     }
1049 ph10 667 }
1050 ph10 836 } else {
1051     #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1052     /* Only the 8 bit and 16 bit results must be equal. */
1053     #ifdef SUPPORT_PCRE8
1054     if (return_value8_1 != return_value8_2) {
1055     printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1056     return_value8_1, return_value8_2, total, current->pattern, current->input);
1057     is_successful = 0;
1058     } else if (return_value8_1 >= 0) {
1059     return_value8_1 *= 2;
1060     for (i = 0; i < return_value8_1; ++i)
1061     if (ovector8_1[i] != ovector8_2[i]) {
1062     printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1063     i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1064     is_successful = 0;
1065     }
1066     }
1067     #endif
1068    
1069     #ifdef SUPPORT_PCRE16
1070     if (return_value16_1 != return_value16_2) {
1071     printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1072     return_value16_1, return_value16_2, total, current->pattern, current->input);
1073     is_successful = 0;
1074     } else if (return_value16_1 >= 0) {
1075     return_value16_1 *= 2;
1076     for (i = 0; i < return_value16_1; ++i)
1077     if (ovector16_1[i] != ovector16_2[i]) {
1078     printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1079     i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1080     is_successful = 0;
1081     }
1082     }
1083     #endif
1084    
1085     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1086 ph10 667 }
1087 ph10 836 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1088 ph10 667 }
1089    
1090 ph10 836 if (is_successful) {
1091     #ifdef SUPPORT_PCRE8
1092     if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1093     if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1094     printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1095     total, current->pattern, current->input);
1096     is_successful = 0;
1097     }
1098 ph10 667
1099 ph10 836 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1100     printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1101     total, current->pattern, current->input);
1102     is_successful = 0;
1103     }
1104     }
1105     #endif
1106     #ifdef SUPPORT_PCRE16
1107     if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1108     if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1109     printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1110     total, current->pattern, current->input);
1111     is_successful = 0;
1112     }
1113    
1114     if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1115     printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1116     total, current->pattern, current->input);
1117     is_successful = 0;
1118     }
1119     }
1120     #endif
1121     }
1122    
1123     if (is_successful)
1124     successful++;
1125    
1126     #ifdef SUPPORT_PCRE8
1127     if (re8) {
1128     pcre_free_study(extra8);
1129     pcre_free(re8);
1130     }
1131     #endif
1132     #ifdef SUPPORT_PCRE16
1133     if (re16) {
1134     pcre16_free_study(extra16);
1135     pcre16_free(re16);
1136     }
1137     #endif
1138    
1139     /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1140 ph10 667 printf(".");
1141     fflush(stdout);
1142     current++;
1143     }
1144 ph10 836 tables(1);
1145     #ifdef SUPPORT_PCRE8
1146     setstack8(NULL);
1147     #endif
1148     #ifdef SUPPORT_PCRE16
1149     setstack16(NULL);
1150     #endif
1151 ph10 667
1152 ph10 836 if (total == successful) {
1153 ph10 667 printf("\nAll JIT regression tests are successfully passed.\n");
1154 ph10 677 return 0;
1155 ph10 698 } else {
1156 ph10 836 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1157 ph10 698 return 1;
1158     }
1159 ph10 667 }
1160    
1161     /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12