/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 884 - (hide annotations) (download)
Tue Jan 17 11:52:43 2012 UTC (16 months ago) by zherczeg
File MIME type: text/plain
File size: 49921 byte(s)
JIT test prints cpu info
1 ph10 667 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Main Library written by Philip Hazel
9 ph10 836 Copyright (c) 1997-2012 University of Cambridge
10 ph10 667
11     This JIT compiler regression test program was written by Zoltan Herczeg
12 ph10 836 Copyright (c) 2010-2012
13 ph10 667
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43 ph10 698 #ifdef HAVE_CONFIG_H
44     #include "config.h"
45     #endif
46    
47 ph10 667 #include <stdio.h>
48     #include <string.h>
49     #include "pcre.h"
50    
51     #define PCRE_BUG 0x80000000
52    
53     /*
54 ph10 836 Letter characters:
55     \xe6\x92\xad = 0x64ad = 25773 (kanji)
56     Non-letter characters:
57     \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58     \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59     \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60     \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61     Newlines:
62     \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63     \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64     Othercase pairs:
65     \xc3\xa9 = 0xe9 = 233 (e')
66     \xc3\x89 = 0xc9 = 201 (E')
67     \xc3\xa1 = 0xe1 = 225 (a')
68     \xc3\x81 = 0xc1 = 193 (A')
69     \xc8\xba = 0x23a = 570
70     \xe2\xb1\xa5 = 0x2c65 = 11365
71     \xe1\xbd\xb8 = 0x1f78 = 8056
72     \xe1\xbf\xb8 = 0x1ff8 = 8184
73     \xf0\x90\x90\x80 = 0x10400 = 66560
74     \xf0\x90\x90\xa8 = 0x10428 = 66600
75     Mark property:
76     \xcc\x8d = 0x30d = 781
77     Special:
78     \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79     \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80     \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81     \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82     \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 ph10 691 */
84 ph10 667
85 ph10 677 static int regression_tests(void);
86 ph10 667
87     int main(void)
88     {
89 ph10 698 int jit = 0;
90 ph10 836 #ifdef SUPPORT_PCRE8
91 ph10 698 pcre_config(PCRE_CONFIG_JIT, &jit);
92 ph10 836 #else
93     pcre16_config(PCRE_CONFIG_JIT, &jit);
94     #endif
95 ph10 698 if (!jit) {
96     printf("JIT must be enabled to run pcre_jit_test\n");
97     return 1;
98     }
99     return regression_tests();
100 ph10 667 }
101    
102 ph10 836 /* --------------------------------------------------------------------------------------- */
103 ph10 667
104 ph10 836 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105     #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106     #endif
107 ph10 667
108 ph10 836 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109     #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110     #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111     #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112     #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113     #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114     #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115 ph10 667
116 ph10 836 #define OFFSET_MASK 0x00ffff
117     #define F_NO8 0x010000
118     #define F_NO16 0x020000
119     #define F_NOMATCH 0x040000
120     #define F_DIFF 0x080000
121     #define F_FORCECONV 0x100000
122     #define F_PROPERTY 0x200000
123 ph10 667
124     struct regression_test_case {
125     int flags;
126     int start_offset;
127     const char *pattern;
128     const char *input;
129     };
130    
131     static struct regression_test_case regression_test_cases[] = {
132     /* Constant strings. */
133     { MUA, 0, "AbC", "AbAbC" },
134     { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135     { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136     { MA, 0, "[^a]", "aAbB" },
137     { CMA, 0, "[^m]", "mMnN" },
138     { MA, 0, "a[^b][^#]", "abacd" },
139     { CMA, 0, "A[^B][^E]", "abacd" },
140     { CMUA, 0, "[^x][^#]", "XxBll" },
141     { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142     { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143     { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144     { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145     { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146     { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147     { MUA, 0, "[axd]", "sAXd" },
148     { CMUA, 0, "[axd]", "sAXd" },
149 ph10 836 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 ph10 667 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151     { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152     { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153     { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 ph10 698 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 ph10 667 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156     { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157     { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158     { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159     { PCRE_CASELESS, 0, "a1", "Aa1" },
160 zherczeg 736 { MA, 0, "\\Ca", "cda" },
161     { CMA, 0, "\\Ca", "CDA" },
162 ph10 836 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163     { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164     { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165     { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166     { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167     { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168 ph10 667
169     /* Assertions. */
170     { MUA, 0, "\\b[^A]", "A_B#" },
171 ph10 836 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 ph10 667 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173     { MAP, 0, "\\B", "_\xa1" },
174     { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175     { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176     { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177     { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178     { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 ph10 836 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180     { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181     { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182     { MA, 1 | F_NOMATCH, "^", "\n" },
183 ph10 667 { 0, 0, "^ab", "ab" },
184 ph10 836 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 ph10 667 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190     { 0, 0, "ab$", "ab" },
191 ph10 836 { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" },
192 ph10 667 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
193     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
194     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
195     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
196 ph10 836 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
197 ph10 667 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
198 ph10 836 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
199     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
200     { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
201 ph10 667 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
202     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
203     { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
204     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
205     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
206     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
207     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
208     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
209     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
210     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
211     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
212     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
213     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
214     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
215     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
216     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
217     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
218     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
219     { MA, 0, "\\Aa", "aaa" },
220 ph10 836 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
221 ph10 667 { MA, 1, "\\Ga", "aaa" },
222 ph10 836 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
223 ph10 667 { MA, 0, "a\\z", "aaa" },
224 ph10 836 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
225 ph10 667
226     /* Brackets. */
227     { MUA, 0, "(ab|bb|cd)", "bacde" },
228     { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
229     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
230     { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
231     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
232     { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
233    
234     /* Greedy and non-greedy ? operators. */
235     { MUA, 0, "(?:a)?a", "laab" },
236     { CMUA, 0, "(A)?A", "llaab" },
237     { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
238     { MUA, 0, "(a)?a", "manm" },
239     { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
240     { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
241     { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
242    
243     /* Greedy and non-greedy + operators */
244     { MUA, 0, "(aa)+aa", "aaaaaaa" },
245     { MUA, 0, "(aa)+?aa", "aaaaaaa" },
246     { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
247     { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
248     { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
249     { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250     { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
251    
252     /* Greedy and non-greedy * operators */
253     { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
254     { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
255     { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
256     { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
257     { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
258     { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
259     { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
260     { MA, 0, "((?:a|)*){0}a", "a" },
261    
262     /* Combining ? + * operators */
263     { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
264     { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
265     { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266     { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
267     { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
268    
269     /* Single character iterators. */
270     { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
271     { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
272     { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
273     { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
274     { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
275     { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
276     { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
277     { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
278     { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
279     { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
280     { MUA, 0, "(a?+[^b])+", "babaacacb" },
281     { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
282     { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
283     { CMUA, 0, "[c-f]+k", "DemmFke" },
284     { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
285     { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
286     { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
287     { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
288     { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
289     { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
290     { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
291     { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
292     { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
293     { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
294     { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
295     { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
296     { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
297 ph10 836 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
298     { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
299     { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
300     { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301     { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
302 ph10 667
303     /* Basic character sets. */
304     { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
305     { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
306     { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
307     { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
308     { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
309     { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
310    
311     /* Unicode properties. */
312     { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
313 ph10 836 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
314 ph10 667 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
315 ph10 836 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
316     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
317     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
318     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
320     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
321     { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322     { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
323 ph10 667 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
324     { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
325     { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
326 ph10 836 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
327     { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
328 ph10 667 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
329 ph10 836 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
330     { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
331 ph10 667 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
332    
333     /* Possible empty brackets. */
334     { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
335     { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
336     { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
337     { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
338     { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
339     { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
340     { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
341     { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
342     { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
343     { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
344    
345     /* Start offset. */
346     { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
347 ph10 836 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
348     { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 ph10 667 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
350    
351     /* Newline. */
352     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
353     { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
355    
356     /* Any character except newline or any newline. */
357     { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
358     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
359     { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
360     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
362 ph10 836 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
363 ph10 667 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
364     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
365     { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
366     { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
367     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
368     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
369     { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
370 ph10 836 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
371 ph10 667 { MUA, 0, "\\R+", "ab\r\n\r" },
372     { MUA, 0, "\\R*", "ab\r\n\r" },
373     { MUA, 0, "\\R*", "\r\n\r" },
374     { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
375     { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
376     { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
377     { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
378 ph10 836 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
379 ph10 667 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
380     { MUA, 0, "\\R*\\R\\R", "\n\r" },
381 ph10 836 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
382 ph10 667 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
383    
384     /* Atomic groups (no fallback from "next" direction). */
385 ph10 836 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
386     { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
387 ph10 667 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
388     "bababcdedefgheijijklmlmnop" },
389     { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
390     { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
391     { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
392     { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
393     { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
394     { MUA, 0, "(?>x|)*$", "aaa" },
395     { MUA, 0, "(?>(x)|)*$", "aaa" },
396     { MUA, 0, "(?>x|())*$", "aaa" },
397     { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
398     { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
399     { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
400     { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
401     { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
402     { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
403     { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
404     { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
405     { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
406     { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
407     { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
408     { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
409     { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
410     { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
411     { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
412     { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
413     { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 ph10 836 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
415     { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
416     { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
417     { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
418     { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
419     { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
420     { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
421 ph10 667 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
422     { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
423    
424 ph10 698 /* Possessive quantifiers. */
425     { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
426     { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
427     { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
428     { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
429     { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
430     { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
431 ph10 667 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
432     { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
433     { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
434     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
435     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
436     { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
437     { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
438     { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
439     { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
440 ph10 698 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
441     { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
442     { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
443     { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
444     { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
445     { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
446 ph10 667 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
447     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
448     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
449     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
450     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
451     { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
452     { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
453     { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
454     { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
455 ph10 836 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
456 ph10 667 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
457     { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
458     { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
459     { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460    
461     /* Back references. */
462     { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
463     { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
464     { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
465     { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
466     { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
467     { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
468     { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469     { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
470     { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
471     { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
472     { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
473     { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
474     { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
475     { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476     { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
477     { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
478     { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
479 ph10 836 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
480     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
481     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
482     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
483     { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
484     { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
485 ph10 667
486     /* Assertions. */
487     { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
488     { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
489     { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
490     { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
491     { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
492     { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
493     { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
494     { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
495     { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
496     { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
497     { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
498     { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
499     { MUA, 0, "((?(?=a)a)+k)", "bbak" },
500 ph10 836 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
501     { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
502     { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
503 ph10 667 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
504     { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
505     { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
506     { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507     { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
508     { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
509     { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
510     { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
511    
512     /* Not empty, ACCEPT, FAIL */
513 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
514 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
515     { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
516     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
517     { MUA, 0, "a(*ACCEPT)b", "ab" },
518 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
519 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
520     { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
521 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
522 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
523     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
524     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
525 ph10 836 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
526 ph10 667 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
527     { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
528     { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
529     { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
530     { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
531     { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
532    
533     /* Conditional blocks. */
534     { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
535     { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
536     { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
537     { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
538     { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
539     { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
540     { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
541     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 ph10 836 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
543 ph10 667 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544     { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
545     { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
546     { MUA, 0, "(?(?=a)ab)", "a" },
547     { MUA, 0, "(?(?<!b)c)", "b" },
548     { MUA, 0, "(?(DEFINE)a(b))", "a" },
549     { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
550     { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
551     { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
552     { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
553     { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
554     { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
555     { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
556 ph10 836 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
557 ph10 667 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
558     { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
559     { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
560 ph10 836 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
561 ph10 667 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
562     { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
563     { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
564     { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
565 zherczeg 741 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
566     { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
567     { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
568 ph10 667
569 ph10 698 /* Set start of match. */
570 ph10 667 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
571     { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
572     { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
573 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
574 ph10 667 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
575    
576     /* First line. */
577 ph10 836 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
578     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
579 ph10 667 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
580 ph10 836 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
581     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
582     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
583     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
584     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
585     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
586     { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
587     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
588     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
589 ph10 667 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
590     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
591 ph10 836 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
592     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
593 ph10 667 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
594    
595     /* Recurse. */
596     { MUA, 0, "(a)(?1)", "aa" },
597     { MUA, 0, "((a))(?1)", "aa" },
598     { MUA, 0, "(b|a)(?1)", "aa" },
599     { MUA, 0, "(b|(a))(?1)", "aa" },
600 ph10 836 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
601 ph10 667 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
602     { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
603     { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
604     { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
605     { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
606     { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
607     { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
608 ph10 836 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
609 ph10 667 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
610     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
611 ph10 836 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
612 ph10 667 { MUA, 0, "b|<(?R)*>", "<<b>" },
613     { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
614     { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
615 zherczeg 741 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
616     { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
617     { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
618     { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
619     { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
620     { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
621 ph10 667
622 ph10 836 /* 16 bit specific tests. */
623     { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
624     { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
625     { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
626     { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
627     { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
628     { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
629     { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
630     { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
631     { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
632     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
633     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
634     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
635     { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
636     { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
637     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
638     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
639     { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
640     { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641     { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
642     { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
643     { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
644     { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
645     { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
646     { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
647     { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
648     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
649     { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
650     { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
651     { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
652     { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
653     { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
654    
655 ph10 667 /* Deep recursion. */
656     { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
657 ph10 698 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
658 ph10 677 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
659 ph10 691
660 ph10 677 /* Deep recursion: Stack limit reached. */
661 ph10 836 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
662     { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
663     { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
664     { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
665     { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
666 ph10 667
667     { 0, 0, NULL, NULL }
668     };
669    
670 ph10 836 static const unsigned char *tables(int mode)
671     {
672     /* The purpose of this function to allow valgrind
673     for reporting invalid reads and writes. */
674     static unsigned char *tables_copy;
675     const char *errorptr;
676     int erroroffset;
677     const unsigned char *default_tables;
678     #ifdef SUPPORT_PCRE8
679 zherczeg 852 pcre *regex;
680 ph10 836 char null_str[1] = { 0 };
681     #else
682 zherczeg 852 pcre16 *regex;
683 zherczeg 860 PCRE_UCHAR16 null_str[1] = { 0 };
684 ph10 836 #endif
685    
686     if (mode) {
687     if (tables_copy)
688     free(tables_copy);
689     tables_copy = NULL;
690     return NULL;
691     }
692    
693     if (tables_copy)
694     return tables_copy;
695    
696     default_tables = NULL;
697     #ifdef SUPPORT_PCRE8
698     regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
699     if (regex) {
700     pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
701     pcre_free(regex);
702     }
703     #else
704     regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
705     if (regex) {
706     pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
707     pcre16_free(regex);
708     }
709     #endif
710     /* Shouldn't ever happen. */
711     if (!default_tables)
712     return NULL;
713    
714     /* Unfortunately this value cannot get from pcre_fullinfo.
715     Since this is a test program, this is acceptable at the moment. */
716     tables_copy = (unsigned char *)malloc(1088);
717     if (!tables_copy)
718     return NULL;
719    
720     memcpy(tables_copy, default_tables, 1088);
721     return tables_copy;
722     }
723    
724 ph10 881 #ifdef SUPPORT_PCRE8
725 zherczeg 852 static pcre_jit_stack* callback8(void *arg)
726 ph10 836 {
727     return (pcre_jit_stack *)arg;
728     }
729 ph10 881 #endif
730 ph10 836
731 ph10 881 #ifdef SUPPORT_PCRE16
732 zherczeg 852 static pcre16_jit_stack* callback16(void *arg)
733     {
734     return (pcre16_jit_stack *)arg;
735     }
736 ph10 881 #endif
737 zherczeg 852
738 ph10 836 #ifdef SUPPORT_PCRE8
739     static void setstack8(pcre_extra *extra)
740     {
741     static pcre_jit_stack *stack;
742    
743     if (!extra) {
744     if (stack)
745     pcre_jit_stack_free(stack);
746     stack = NULL;
747     return;
748     }
749    
750     if (!stack)
751     stack = pcre_jit_stack_alloc(1, 1024 * 1024);
752     /* Extra can be NULL. */
753 zherczeg 852 pcre_assign_jit_stack(extra, callback8, stack);
754 ph10 836 }
755     #endif /* SUPPORT_PCRE8 */
756    
757     #ifdef SUPPORT_PCRE16
758 zherczeg 850 static void setstack16(pcre16_extra *extra)
759 ph10 836 {
760 zherczeg 852 static pcre16_jit_stack *stack;
761 ph10 836
762     if (!extra) {
763     if (stack)
764     pcre16_jit_stack_free(stack);
765     stack = NULL;
766     return;
767     }
768    
769     if (!stack)
770     stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
771     /* Extra can be NULL. */
772 zherczeg 852 pcre16_assign_jit_stack(extra, callback16, stack);
773 ph10 836 }
774     #endif /* SUPPORT_PCRE8 */
775    
776     #ifdef SUPPORT_PCRE16
777    
778 zherczeg 860 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
779 ph10 836 {
780     unsigned char *iptr = (unsigned char*)input;
781     unsigned short *optr = (unsigned short *)output;
782     unsigned int c;
783    
784     if (max_length == 0)
785     return 0;
786    
787     while (*iptr && max_length > 1) {
788     c = 0;
789     if (offsetmap)
790     *offsetmap++ = (int)(iptr - (unsigned char*)input);
791    
792     if (!(*iptr & 0x80))
793     c = *iptr++;
794     else if (!(*iptr & 0x20)) {
795     c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
796     iptr += 2;
797     } else if (!(*iptr & 0x10)) {
798     c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
799     iptr += 3;
800     } else if (!(*iptr & 0x08)) {
801     c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
802     iptr += 4;
803     }
804    
805     if (c < 65536) {
806     *optr++ = c;
807     max_length--;
808     } else if (max_length <= 2) {
809     *optr = '\0';
810     return (int)(optr - (unsigned short *)output);
811     } else {
812     c -= 0x10000;
813     *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
814     *optr++ = 0xdc00 | (c & 0x3ff);
815     max_length -= 2;
816     if (offsetmap)
817     offsetmap++;
818     }
819     }
820     if (offsetmap)
821     *offsetmap = (int)(iptr - (unsigned char*)input);
822     *optr = '\0';
823     return (int)(optr - (unsigned short *)output);
824     }
825    
826 zherczeg 860 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
827 ph10 836 {
828     unsigned char *iptr = (unsigned char*)input;
829     unsigned short *optr = (unsigned short *)output;
830    
831     if (max_length == 0)
832     return 0;
833    
834     while (*iptr && max_length > 1) {
835     *optr++ = *iptr++;
836     max_length--;
837     }
838     *optr = '\0';
839     return (int)(optr - (unsigned short *)output);
840     }
841    
842     #define REGTEST_MAX_LENGTH 4096
843 zherczeg 860 static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
844 ph10 836 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
845    
846     #endif /* SUPPORT_PCRE16 */
847    
848     static int check_ascii(const char *input)
849     {
850     const unsigned char *ptr = (unsigned char *)input;
851     while (*ptr) {
852     if (*ptr > 127)
853     return 0;
854     ptr++;
855     }
856     return 1;
857     }
858    
859 ph10 677 static int regression_tests(void)
860 ph10 667 {
861     struct regression_test_case *current = regression_test_cases;
862     const char *error;
863 zherczeg 884 const char *cpu_info;
864 ph10 667 int i, err_offs;
865 ph10 836 int is_successful, is_ascii_pattern, is_ascii_input;
866     int total = 0;
867     int successful = 0;
868 ph10 667 int counter = 0;
869 ph10 836 #ifdef SUPPORT_PCRE8
870     pcre *re8;
871     pcre_extra *extra8;
872     int ovector8_1[32];
873     int ovector8_2[32];
874     int return_value8_1, return_value8_2;
875     int utf8 = 0, ucp8 = 0;
876     int disabled_flags8 = 0;
877     #endif
878     #ifdef SUPPORT_PCRE16
879 zherczeg 852 pcre16 *re16;
880 zherczeg 850 pcre16_extra *extra16;
881 ph10 836 int ovector16_1[32];
882     int ovector16_2[32];
883     int return_value16_1, return_value16_2;
884     int utf16 = 0, ucp16 = 0;
885     int disabled_flags16 = 0;
886     int length16;
887     #endif
888 ph10 667
889 ph10 698 /* This test compares the behaviour of interpreter and JIT. Although disabling
890 ph10 836 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
891 ph10 698 still considered successful from pcre_jit_test point of view. */
892    
893 zherczeg 884 #ifdef SUPPORT_PCRE8
894     pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
895     #else
896     pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
897     #endif
898 ph10 836
899 zherczeg 884 printf("Running JIT regression tests\n");
900     printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
901    
902 ph10 836 #ifdef SUPPORT_PCRE8
903 ph10 698 pcre_config(PCRE_CONFIG_UTF8, &utf8);
904 ph10 836 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
905 ph10 698 if (!utf8)
906 ph10 836 disabled_flags8 |= PCRE_UTF8;
907     if (!ucp8)
908     disabled_flags8 |= PCRE_UCP;
909 zherczeg 884 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
910 ph10 836 #endif
911     #ifdef SUPPORT_PCRE16
912     pcre16_config(PCRE_CONFIG_UTF16, &utf16);
913     pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
914     if (!utf16)
915     disabled_flags16 |= PCRE_UTF8;
916     if (!ucp16)
917     disabled_flags16 |= PCRE_UCP;
918 zherczeg 884 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
919 ph10 836 #endif
920 ph10 698
921 ph10 667 while (current->pattern) {
922 ph10 698 /* printf("\nPattern: %s :\n", current->pattern); */
923 ph10 667 total++;
924 ph10 836 if (current->start_offset & F_PROPERTY) {
925     is_ascii_pattern = 0;
926     is_ascii_input = 0;
927     } else {
928     is_ascii_pattern = check_ascii(current->pattern);
929     is_ascii_input = check_ascii(current->input);
930     }
931 ph10 667
932     error = NULL;
933 ph10 836 #ifdef SUPPORT_PCRE8
934     re8 = NULL;
935     if (!(current->start_offset & F_NO8))
936     re8 = pcre_compile(current->pattern,
937     current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
938     &error, &err_offs, tables(0));
939 ph10 667
940 ph10 836 extra8 = NULL;
941     if (re8) {
942     error = NULL;
943     extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
944     if (!extra8) {
945     printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
946     pcre_free(re8);
947     re8 = NULL;
948 ph10 698 }
949 ph10 836 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
950     printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
951     pcre_free_study(extra8);
952     pcre_free(re8);
953     re8 = NULL;
954     }
955     } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
956     printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
957     #endif
958     #ifdef SUPPORT_PCRE16
959     if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
960     convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
961     else
962     copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
963 ph10 667
964 ph10 836 re16 = NULL;
965     if (!(current->start_offset & F_NO16))
966     re16 = pcre16_compile(regtest_buf,
967     current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
968     &error, &err_offs, tables(0));
969 ph10 667
970 ph10 836 extra16 = NULL;
971     if (re16) {
972     error = NULL;
973     extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
974     if (!extra16) {
975     printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
976     pcre16_free(re16);
977     re16 = NULL;
978     }
979     if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
980     printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
981     pcre16_free_study(extra16);
982     pcre16_free(re16);
983     re16 = NULL;
984     }
985     } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
986     printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
987     #endif
988 ph10 667
989     counter++;
990 ph10 836 if ((counter & 0x3) != 0) {
991     #ifdef SUPPORT_PCRE8
992     setstack8(NULL);
993     #endif
994     #ifdef SUPPORT_PCRE16
995     setstack16(NULL);
996     #endif
997     }
998 ph10 667
999 ph10 836 #ifdef SUPPORT_PCRE8
1000     return_value8_1 = -1000;
1001     return_value8_2 = -1000;
1002 ph10 667 for (i = 0; i < 32; ++i)
1003 ph10 836 ovector8_1[i] = -2;
1004     for (i = 0; i < 32; ++i)
1005     ovector8_2[i] = -2;
1006     if (re8) {
1007     setstack8(extra8);
1008     return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1009     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
1010     return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1011     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
1012     }
1013     #endif
1014 ph10 667
1015 ph10 836 #ifdef SUPPORT_PCRE16
1016     return_value16_1 = -1000;
1017     return_value16_2 = -1000;
1018 ph10 667 for (i = 0; i < 32; ++i)
1019 ph10 836 ovector16_1[i] = -2;
1020     for (i = 0; i < 32; ++i)
1021     ovector16_2[i] = -2;
1022     if (re16) {
1023     setstack16(extra16);
1024     if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1025     length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1026     else
1027     length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1028     return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1029     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
1030     return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1031     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
1032     }
1033     #endif
1034 ph10 667
1035 ph10 836 /* If F_DIFF is set, just run the test, but do not compare the results.
1036 ph10 667 Segfaults can still be captured. */
1037    
1038 ph10 836 is_successful = 1;
1039     if (!(current->start_offset & F_DIFF)) {
1040     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1041     if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1042     /* All results must be the same. */
1043     if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1044     printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1045     return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1046     total, current->pattern, current->input);
1047     is_successful = 0;
1048     } else if (return_value8_1 >= 0) {
1049     return_value8_1 *= 2;
1050     /* Transform back the results. */
1051     if (current->flags & PCRE_UTF8) {
1052     for (i = 0; i < return_value8_1; ++i) {
1053     if (ovector16_1[i] >= 0)
1054     ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1055     if (ovector16_2[i] >= 0)
1056     ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1057     }
1058 ph10 667 }
1059 ph10 836
1060     for (i = 0; i < return_value8_1; ++i)
1061     if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1062     printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1063     i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1064     total, current->pattern, current->input);
1065     is_successful = 0;
1066     }
1067 ph10 667 }
1068 ph10 836 } else {
1069     #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1070     /* Only the 8 bit and 16 bit results must be equal. */
1071     #ifdef SUPPORT_PCRE8
1072     if (return_value8_1 != return_value8_2) {
1073     printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1074     return_value8_1, return_value8_2, total, current->pattern, current->input);
1075     is_successful = 0;
1076     } else if (return_value8_1 >= 0) {
1077     return_value8_1 *= 2;
1078     for (i = 0; i < return_value8_1; ++i)
1079     if (ovector8_1[i] != ovector8_2[i]) {
1080     printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1081     i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1082     is_successful = 0;
1083     }
1084     }
1085     #endif
1086    
1087     #ifdef SUPPORT_PCRE16
1088     if (return_value16_1 != return_value16_2) {
1089     printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1090     return_value16_1, return_value16_2, total, current->pattern, current->input);
1091     is_successful = 0;
1092     } else if (return_value16_1 >= 0) {
1093     return_value16_1 *= 2;
1094     for (i = 0; i < return_value16_1; ++i)
1095     if (ovector16_1[i] != ovector16_2[i]) {
1096     printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1097     i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1098     is_successful = 0;
1099     }
1100     }
1101     #endif
1102    
1103     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1104 ph10 667 }
1105 ph10 836 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1106 ph10 667 }
1107    
1108 ph10 836 if (is_successful) {
1109     #ifdef SUPPORT_PCRE8
1110     if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1111     if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1112     printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1113     total, current->pattern, current->input);
1114     is_successful = 0;
1115     }
1116 ph10 667
1117 ph10 836 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1118     printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1119     total, current->pattern, current->input);
1120     is_successful = 0;
1121     }
1122     }
1123     #endif
1124     #ifdef SUPPORT_PCRE16
1125     if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1126     if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1127     printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1128     total, current->pattern, current->input);
1129     is_successful = 0;
1130     }
1131    
1132     if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1133     printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1134     total, current->pattern, current->input);
1135     is_successful = 0;
1136     }
1137     }
1138     #endif
1139     }
1140    
1141     if (is_successful)
1142     successful++;
1143    
1144     #ifdef SUPPORT_PCRE8
1145     if (re8) {
1146     pcre_free_study(extra8);
1147     pcre_free(re8);
1148     }
1149     #endif
1150     #ifdef SUPPORT_PCRE16
1151     if (re16) {
1152     pcre16_free_study(extra16);
1153     pcre16_free(re16);
1154     }
1155     #endif
1156    
1157     /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1158 ph10 667 printf(".");
1159     fflush(stdout);
1160     current++;
1161     }
1162 ph10 836 tables(1);
1163     #ifdef SUPPORT_PCRE8
1164     setstack8(NULL);
1165     #endif
1166     #ifdef SUPPORT_PCRE16
1167     setstack16(NULL);
1168     #endif
1169 ph10 667
1170 ph10 836 if (total == successful) {
1171 ph10 667 printf("\nAll JIT regression tests are successfully passed.\n");
1172 ph10 677 return 0;
1173 ph10 698 } else {
1174 ph10 836 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1175 ph10 698 return 1;
1176     }
1177 ph10 667 }
1178    
1179     /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12