/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 911 - (hide annotations) (download)
Fri Feb 10 08:05:30 2012 UTC (15 months, 1 week ago) by zherczeg
File MIME type: text/plain
File size: 49971 byte(s)
Fixing a wrong JIT test case
1 ph10 667 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Main Library written by Philip Hazel
9 ph10 836 Copyright (c) 1997-2012 University of Cambridge
10 ph10 667
11     This JIT compiler regression test program was written by Zoltan Herczeg
12 ph10 836 Copyright (c) 2010-2012
13 ph10 667
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43 ph10 698 #ifdef HAVE_CONFIG_H
44     #include "config.h"
45     #endif
46    
47 ph10 667 #include <stdio.h>
48     #include <string.h>
49     #include "pcre.h"
50    
51     #define PCRE_BUG 0x80000000
52    
53     /*
54 ph10 836 Letter characters:
55     \xe6\x92\xad = 0x64ad = 25773 (kanji)
56     Non-letter characters:
57     \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58     \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59     \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60     \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61     Newlines:
62     \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63     \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64     Othercase pairs:
65     \xc3\xa9 = 0xe9 = 233 (e')
66     \xc3\x89 = 0xc9 = 201 (E')
67     \xc3\xa1 = 0xe1 = 225 (a')
68     \xc3\x81 = 0xc1 = 193 (A')
69     \xc8\xba = 0x23a = 570
70     \xe2\xb1\xa5 = 0x2c65 = 11365
71     \xe1\xbd\xb8 = 0x1f78 = 8056
72     \xe1\xbf\xb8 = 0x1ff8 = 8184
73     \xf0\x90\x90\x80 = 0x10400 = 66560
74     \xf0\x90\x90\xa8 = 0x10428 = 66600
75     Mark property:
76     \xcc\x8d = 0x30d = 781
77     Special:
78     \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79     \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80     \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81     \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82     \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 ph10 691 */
84 ph10 667
85 ph10 677 static int regression_tests(void);
86 ph10 667
87     int main(void)
88     {
89 ph10 698 int jit = 0;
90 ph10 836 #ifdef SUPPORT_PCRE8
91 ph10 698 pcre_config(PCRE_CONFIG_JIT, &jit);
92 ph10 836 #else
93     pcre16_config(PCRE_CONFIG_JIT, &jit);
94     #endif
95 ph10 698 if (!jit) {
96     printf("JIT must be enabled to run pcre_jit_test\n");
97     return 1;
98     }
99     return regression_tests();
100 ph10 667 }
101    
102 ph10 836 /* --------------------------------------------------------------------------------------- */
103 ph10 667
104 ph10 836 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105     #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106     #endif
107 ph10 667
108 ph10 836 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109     #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110     #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111     #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112     #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113     #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114     #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115 ph10 667
116 ph10 836 #define OFFSET_MASK 0x00ffff
117     #define F_NO8 0x010000
118     #define F_NO16 0x020000
119     #define F_NOMATCH 0x040000
120     #define F_DIFF 0x080000
121     #define F_FORCECONV 0x100000
122     #define F_PROPERTY 0x200000
123 ph10 667
124     struct regression_test_case {
125     int flags;
126     int start_offset;
127     const char *pattern;
128     const char *input;
129     };
130    
131     static struct regression_test_case regression_test_cases[] = {
132     /* Constant strings. */
133     { MUA, 0, "AbC", "AbAbC" },
134     { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135     { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136     { MA, 0, "[^a]", "aAbB" },
137     { CMA, 0, "[^m]", "mMnN" },
138     { MA, 0, "a[^b][^#]", "abacd" },
139     { CMA, 0, "A[^B][^E]", "abacd" },
140     { CMUA, 0, "[^x][^#]", "XxBll" },
141     { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142     { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143     { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144     { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145     { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146     { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147     { MUA, 0, "[axd]", "sAXd" },
148     { CMUA, 0, "[axd]", "sAXd" },
149 ph10 836 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 ph10 667 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151     { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152     { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153     { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 ph10 698 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 ph10 667 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156     { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157     { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158     { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159     { PCRE_CASELESS, 0, "a1", "Aa1" },
160 zherczeg 736 { MA, 0, "\\Ca", "cda" },
161     { CMA, 0, "\\Ca", "CDA" },
162 ph10 836 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163     { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164     { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165     { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166     { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167     { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168 ph10 667
169     /* Assertions. */
170     { MUA, 0, "\\b[^A]", "A_B#" },
171 ph10 836 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 ph10 667 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173     { MAP, 0, "\\B", "_\xa1" },
174     { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175     { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176     { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177     { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178     { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 ph10 836 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180     { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181     { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182     { MA, 1 | F_NOMATCH, "^", "\n" },
183 ph10 667 { 0, 0, "^ab", "ab" },
184 ph10 836 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 ph10 667 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190     { 0, 0, "ab$", "ab" },
191 zherczeg 911 { 0, 0 | F_NOMATCH, "ab$", "abab\n\n" },
192     { PCRE_DOLLAR_ENDONLY, 0 | F_NOMATCH, "ab$", "abab\r\n" },
193 ph10 667 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
194     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
195     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
196     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
197 ph10 836 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
198 ph10 667 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
199 ph10 836 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
200     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
201     { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
202 ph10 667 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
203     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
204     { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
205     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
206     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
207     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
208     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
209     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
210     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
211     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
212     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
213     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
214     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
215     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
216     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
217     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
218     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
219     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
220     { MA, 0, "\\Aa", "aaa" },
221 ph10 836 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
222 ph10 667 { MA, 1, "\\Ga", "aaa" },
223 ph10 836 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
224 ph10 667 { MA, 0, "a\\z", "aaa" },
225 ph10 836 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
226 ph10 667
227     /* Brackets. */
228     { MUA, 0, "(ab|bb|cd)", "bacde" },
229     { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
230     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
231     { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
232     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
233     { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
234    
235     /* Greedy and non-greedy ? operators. */
236     { MUA, 0, "(?:a)?a", "laab" },
237     { CMUA, 0, "(A)?A", "llaab" },
238     { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
239     { MUA, 0, "(a)?a", "manm" },
240     { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
241     { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
242     { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
243    
244     /* Greedy and non-greedy + operators */
245     { MUA, 0, "(aa)+aa", "aaaaaaa" },
246     { MUA, 0, "(aa)+?aa", "aaaaaaa" },
247     { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
248     { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
249     { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250     { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
251     { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
252    
253     /* Greedy and non-greedy * operators */
254     { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
255     { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
256     { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
257     { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
258     { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
259     { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
260     { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
261     { MA, 0, "((?:a|)*){0}a", "a" },
262    
263     /* Combining ? + * operators */
264     { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
265     { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266     { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
267     { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
268     { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
269    
270     /* Single character iterators. */
271     { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
272     { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
273     { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
274     { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
275     { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
276     { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
277     { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
278     { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
279     { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
280     { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
281     { MUA, 0, "(a?+[^b])+", "babaacacb" },
282     { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
283     { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
284     { CMUA, 0, "[c-f]+k", "DemmFke" },
285     { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
286     { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
287     { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
288     { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
289     { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
290     { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
291     { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
292     { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
293     { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
294     { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
295     { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
296     { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
297     { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
298 ph10 836 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
299     { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
300     { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301     { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
302     { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
303 ph10 667
304     /* Basic character sets. */
305     { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
306     { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
307     { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
308     { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
309     { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
310     { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
311    
312     /* Unicode properties. */
313     { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
314 ph10 836 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
315 ph10 667 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
316 ph10 836 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
317     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
318     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
320     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
321     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322     { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
323     { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
324 ph10 667 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
325     { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
326     { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
327 ph10 836 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
328     { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
329 ph10 667 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
330 ph10 836 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
331     { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
332 ph10 667 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
333    
334     /* Possible empty brackets. */
335     { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
336     { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
337     { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
338     { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
339     { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
340     { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
341     { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
342     { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
343     { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
344     { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
345    
346     /* Start offset. */
347     { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
348 ph10 836 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349     { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
350 ph10 667 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
351    
352     /* Newline. */
353     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354     { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
355     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
356    
357     /* Any character except newline or any newline. */
358     { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
359     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
360     { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
362     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
363 ph10 836 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
364 ph10 667 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
365     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
366     { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
367     { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
368     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
369     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
370     { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
371 ph10 836 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
372 ph10 667 { MUA, 0, "\\R+", "ab\r\n\r" },
373     { MUA, 0, "\\R*", "ab\r\n\r" },
374     { MUA, 0, "\\R*", "\r\n\r" },
375     { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
376     { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
377     { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
378     { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
379 ph10 836 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
380 ph10 667 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
381     { MUA, 0, "\\R*\\R\\R", "\n\r" },
382 ph10 836 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
383 ph10 667 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
384    
385     /* Atomic groups (no fallback from "next" direction). */
386 ph10 836 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
387     { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
388 ph10 667 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
389     "bababcdedefgheijijklmlmnop" },
390     { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
391     { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
392     { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
393     { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
394     { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
395     { MUA, 0, "(?>x|)*$", "aaa" },
396     { MUA, 0, "(?>(x)|)*$", "aaa" },
397     { MUA, 0, "(?>x|())*$", "aaa" },
398     { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
399     { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
400     { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
401     { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
402     { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
403     { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
404     { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
405     { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
406     { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
407     { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
408     { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
409     { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
410     { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
411     { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
412     { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
413     { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414     { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
415 ph10 836 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
416     { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
417     { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
418     { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
419     { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
420     { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
421     { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
422 ph10 667 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
423     { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
424    
425 ph10 698 /* Possessive quantifiers. */
426     { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
427     { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
428     { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
429     { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
430     { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
431     { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
432 ph10 667 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
433     { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
434     { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
435     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
436     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
437     { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
438     { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
439     { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
440     { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
441 ph10 698 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
442     { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
443     { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
444     { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
445     { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
446     { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
447 ph10 667 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
448     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
449     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
450     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
451     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
452     { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
453     { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
454     { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
455     { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
456 ph10 836 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
457 ph10 667 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
458     { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
459     { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460     { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
461    
462     /* Back references. */
463     { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
464     { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
465     { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
466     { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
467     { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
468     { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469     { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
470     { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
471     { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
472     { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
473     { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
474     { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
475     { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476     { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
477     { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
478     { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
479     { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
480 ph10 836 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
481     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
482     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
483     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
484     { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
485     { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
486 ph10 667
487     /* Assertions. */
488     { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
489     { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
490     { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
491     { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
492     { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
493     { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
494     { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
495     { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
496     { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
497     { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
498     { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
499     { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
500     { MUA, 0, "((?(?=a)a)+k)", "bbak" },
501 ph10 836 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
502     { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
503     { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
504 ph10 667 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
505     { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
506     { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507     { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
508     { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
509     { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
510     { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
511     { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
512    
513     /* Not empty, ACCEPT, FAIL */
514 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
515 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
516     { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
517     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
518     { MUA, 0, "a(*ACCEPT)b", "ab" },
519 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
520 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
521     { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
522 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
523 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
524     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
525     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
526 ph10 836 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
527 ph10 667 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
528     { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
529     { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
530     { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
531     { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
532     { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
533    
534     /* Conditional blocks. */
535     { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
536     { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
537     { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
538     { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
539     { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
540     { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
541     { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
543 ph10 836 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544 ph10 667 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
545     { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
546     { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
547     { MUA, 0, "(?(?=a)ab)", "a" },
548     { MUA, 0, "(?(?<!b)c)", "b" },
549     { MUA, 0, "(?(DEFINE)a(b))", "a" },
550     { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
551     { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
552     { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
553     { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
554     { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
555     { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
556     { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
557 ph10 836 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
558 ph10 667 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
559     { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
560     { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
561 ph10 836 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
562 ph10 667 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
563     { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
564     { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
565     { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
566 zherczeg 741 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
567     { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
568     { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
569 ph10 667
570 ph10 698 /* Set start of match. */
571 ph10 667 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
572     { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
573     { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
574 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
575 ph10 667 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
576    
577     /* First line. */
578 ph10 836 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
579     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
580 ph10 667 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
581 ph10 836 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
582     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
583     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
584     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
585     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
586     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
587     { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
588     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
589     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
590 ph10 667 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
591     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
592 ph10 836 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
593     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
594 ph10 667 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
595    
596     /* Recurse. */
597     { MUA, 0, "(a)(?1)", "aa" },
598     { MUA, 0, "((a))(?1)", "aa" },
599     { MUA, 0, "(b|a)(?1)", "aa" },
600     { MUA, 0, "(b|(a))(?1)", "aa" },
601 ph10 836 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
602 ph10 667 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
603     { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
604     { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
605     { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
606     { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
607     { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
608     { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
609 ph10 836 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
610 ph10 667 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
611     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
612 ph10 836 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
613 ph10 667 { MUA, 0, "b|<(?R)*>", "<<b>" },
614     { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
615     { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
616 zherczeg 741 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
617     { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
618     { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
619     { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
620     { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
621     { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
622 ph10 667
623 ph10 836 /* 16 bit specific tests. */
624     { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
625     { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
626     { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
627     { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
628     { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
629     { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
630     { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
631     { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
632     { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
633     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
634     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
635     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
636     { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
637     { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
638     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
639     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
640     { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641     { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
642     { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
643     { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
644     { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
645     { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
646     { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
647     { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
648     { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
649     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
650     { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
651     { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
652     { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
653     { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
654     { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
655    
656 ph10 667 /* Deep recursion. */
657     { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
658 ph10 698 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
659 ph10 677 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
660 ph10 691
661 ph10 677 /* Deep recursion: Stack limit reached. */
662 ph10 836 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
663     { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
664     { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
665     { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
666     { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
667 ph10 667
668     { 0, 0, NULL, NULL }
669     };
670    
671 ph10 836 static const unsigned char *tables(int mode)
672     {
673     /* The purpose of this function to allow valgrind
674     for reporting invalid reads and writes. */
675     static unsigned char *tables_copy;
676     const char *errorptr;
677     int erroroffset;
678 zherczeg 911 unsigned char *default_tables;
679 ph10 836 #ifdef SUPPORT_PCRE8
680 zherczeg 852 pcre *regex;
681 ph10 836 char null_str[1] = { 0 };
682     #else
683 zherczeg 852 pcre16 *regex;
684 zherczeg 860 PCRE_UCHAR16 null_str[1] = { 0 };
685 ph10 836 #endif
686    
687     if (mode) {
688     if (tables_copy)
689     free(tables_copy);
690     tables_copy = NULL;
691     return NULL;
692     }
693    
694     if (tables_copy)
695     return tables_copy;
696    
697     default_tables = NULL;
698     #ifdef SUPPORT_PCRE8
699     regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
700     if (regex) {
701     pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
702     pcre_free(regex);
703     }
704     #else
705     regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
706     if (regex) {
707     pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
708     pcre16_free(regex);
709     }
710     #endif
711     /* Shouldn't ever happen. */
712     if (!default_tables)
713     return NULL;
714    
715     /* Unfortunately this value cannot get from pcre_fullinfo.
716     Since this is a test program, this is acceptable at the moment. */
717     tables_copy = (unsigned char *)malloc(1088);
718     if (!tables_copy)
719     return NULL;
720    
721     memcpy(tables_copy, default_tables, 1088);
722     return tables_copy;
723     }
724    
725 ph10 881 #ifdef SUPPORT_PCRE8
726 zherczeg 852 static pcre_jit_stack* callback8(void *arg)
727 ph10 836 {
728     return (pcre_jit_stack *)arg;
729     }
730 ph10 881 #endif
731 ph10 836
732 ph10 881 #ifdef SUPPORT_PCRE16
733 zherczeg 852 static pcre16_jit_stack* callback16(void *arg)
734     {
735     return (pcre16_jit_stack *)arg;
736     }
737 ph10 881 #endif
738 zherczeg 852
739 ph10 836 #ifdef SUPPORT_PCRE8
740     static void setstack8(pcre_extra *extra)
741     {
742     static pcre_jit_stack *stack;
743    
744     if (!extra) {
745     if (stack)
746     pcre_jit_stack_free(stack);
747     stack = NULL;
748     return;
749     }
750    
751     if (!stack)
752     stack = pcre_jit_stack_alloc(1, 1024 * 1024);
753     /* Extra can be NULL. */
754 zherczeg 852 pcre_assign_jit_stack(extra, callback8, stack);
755 ph10 836 }
756     #endif /* SUPPORT_PCRE8 */
757    
758     #ifdef SUPPORT_PCRE16
759 zherczeg 850 static void setstack16(pcre16_extra *extra)
760 ph10 836 {
761 zherczeg 852 static pcre16_jit_stack *stack;
762 ph10 836
763     if (!extra) {
764     if (stack)
765     pcre16_jit_stack_free(stack);
766     stack = NULL;
767     return;
768     }
769    
770     if (!stack)
771     stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
772     /* Extra can be NULL. */
773 zherczeg 852 pcre16_assign_jit_stack(extra, callback16, stack);
774 ph10 836 }
775     #endif /* SUPPORT_PCRE8 */
776    
777     #ifdef SUPPORT_PCRE16
778    
779 zherczeg 860 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
780 ph10 836 {
781     unsigned char *iptr = (unsigned char*)input;
782     unsigned short *optr = (unsigned short *)output;
783     unsigned int c;
784    
785     if (max_length == 0)
786     return 0;
787    
788     while (*iptr && max_length > 1) {
789     c = 0;
790     if (offsetmap)
791     *offsetmap++ = (int)(iptr - (unsigned char*)input);
792    
793     if (!(*iptr & 0x80))
794     c = *iptr++;
795     else if (!(*iptr & 0x20)) {
796     c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
797     iptr += 2;
798     } else if (!(*iptr & 0x10)) {
799     c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
800     iptr += 3;
801     } else if (!(*iptr & 0x08)) {
802     c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
803     iptr += 4;
804     }
805    
806     if (c < 65536) {
807     *optr++ = c;
808     max_length--;
809     } else if (max_length <= 2) {
810     *optr = '\0';
811     return (int)(optr - (unsigned short *)output);
812     } else {
813     c -= 0x10000;
814     *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
815     *optr++ = 0xdc00 | (c & 0x3ff);
816     max_length -= 2;
817     if (offsetmap)
818     offsetmap++;
819     }
820     }
821     if (offsetmap)
822     *offsetmap = (int)(iptr - (unsigned char*)input);
823     *optr = '\0';
824     return (int)(optr - (unsigned short *)output);
825     }
826    
827 zherczeg 860 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
828 ph10 836 {
829     unsigned char *iptr = (unsigned char*)input;
830     unsigned short *optr = (unsigned short *)output;
831    
832     if (max_length == 0)
833     return 0;
834    
835     while (*iptr && max_length > 1) {
836     *optr++ = *iptr++;
837     max_length--;
838     }
839     *optr = '\0';
840     return (int)(optr - (unsigned short *)output);
841     }
842    
843     #define REGTEST_MAX_LENGTH 4096
844 zherczeg 860 static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
845 ph10 836 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
846    
847     #endif /* SUPPORT_PCRE16 */
848    
849     static int check_ascii(const char *input)
850     {
851     const unsigned char *ptr = (unsigned char *)input;
852     while (*ptr) {
853     if (*ptr > 127)
854     return 0;
855     ptr++;
856     }
857     return 1;
858     }
859    
860 ph10 677 static int regression_tests(void)
861 ph10 667 {
862     struct regression_test_case *current = regression_test_cases;
863     const char *error;
864 zherczeg 911 char *cpu_info;
865 ph10 667 int i, err_offs;
866 ph10 836 int is_successful, is_ascii_pattern, is_ascii_input;
867     int total = 0;
868     int successful = 0;
869 ph10 667 int counter = 0;
870 ph10 836 #ifdef SUPPORT_PCRE8
871     pcre *re8;
872     pcre_extra *extra8;
873     int ovector8_1[32];
874     int ovector8_2[32];
875     int return_value8_1, return_value8_2;
876     int utf8 = 0, ucp8 = 0;
877     int disabled_flags8 = 0;
878     #endif
879     #ifdef SUPPORT_PCRE16
880 zherczeg 852 pcre16 *re16;
881 zherczeg 850 pcre16_extra *extra16;
882 ph10 836 int ovector16_1[32];
883     int ovector16_2[32];
884     int return_value16_1, return_value16_2;
885     int utf16 = 0, ucp16 = 0;
886     int disabled_flags16 = 0;
887     int length16;
888     #endif
889 ph10 667
890 ph10 698 /* This test compares the behaviour of interpreter and JIT. Although disabling
891 ph10 836 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
892 ph10 698 still considered successful from pcre_jit_test point of view. */
893    
894 zherczeg 884 #ifdef SUPPORT_PCRE8
895     pcre_config(PCRE_CONFIG_JITTARGET, &cpu_info);
896     #else
897     pcre16_config(PCRE_CONFIG_JITTARGET, &cpu_info);
898     #endif
899 ph10 836
900 zherczeg 884 printf("Running JIT regression tests\n");
901     printf(" target CPU of SLJIT compiler: %s\n", cpu_info);
902    
903 ph10 836 #ifdef SUPPORT_PCRE8
904 ph10 698 pcre_config(PCRE_CONFIG_UTF8, &utf8);
905 ph10 836 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
906 ph10 698 if (!utf8)
907 ph10 836 disabled_flags8 |= PCRE_UTF8;
908     if (!ucp8)
909     disabled_flags8 |= PCRE_UCP;
910 zherczeg 884 printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
911 ph10 836 #endif
912     #ifdef SUPPORT_PCRE16
913     pcre16_config(PCRE_CONFIG_UTF16, &utf16);
914     pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
915     if (!utf16)
916     disabled_flags16 |= PCRE_UTF8;
917     if (!ucp16)
918     disabled_flags16 |= PCRE_UCP;
919 zherczeg 884 printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
920 ph10 836 #endif
921 ph10 698
922 ph10 667 while (current->pattern) {
923 ph10 698 /* printf("\nPattern: %s :\n", current->pattern); */
924 ph10 667 total++;
925 ph10 836 if (current->start_offset & F_PROPERTY) {
926     is_ascii_pattern = 0;
927     is_ascii_input = 0;
928     } else {
929     is_ascii_pattern = check_ascii(current->pattern);
930     is_ascii_input = check_ascii(current->input);
931     }
932 ph10 667
933     error = NULL;
934 ph10 836 #ifdef SUPPORT_PCRE8
935     re8 = NULL;
936     if (!(current->start_offset & F_NO8))
937     re8 = pcre_compile(current->pattern,
938     current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
939     &error, &err_offs, tables(0));
940 ph10 667
941 ph10 836 extra8 = NULL;
942     if (re8) {
943     error = NULL;
944     extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
945     if (!extra8) {
946     printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
947     pcre_free(re8);
948     re8 = NULL;
949 ph10 698 }
950 ph10 836 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
951     printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
952     pcre_free_study(extra8);
953     pcre_free(re8);
954     re8 = NULL;
955     }
956     } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
957     printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
958     #endif
959     #ifdef SUPPORT_PCRE16
960     if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
961     convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
962     else
963     copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
964 ph10 667
965 ph10 836 re16 = NULL;
966     if (!(current->start_offset & F_NO16))
967     re16 = pcre16_compile(regtest_buf,
968     current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
969     &error, &err_offs, tables(0));
970 ph10 667
971 ph10 836 extra16 = NULL;
972     if (re16) {
973     error = NULL;
974     extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
975     if (!extra16) {
976     printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
977     pcre16_free(re16);
978     re16 = NULL;
979     }
980     if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
981     printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
982     pcre16_free_study(extra16);
983     pcre16_free(re16);
984     re16 = NULL;
985     }
986     } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
987     printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
988     #endif
989 ph10 667
990     counter++;
991 ph10 836 if ((counter & 0x3) != 0) {
992     #ifdef SUPPORT_PCRE8
993     setstack8(NULL);
994     #endif
995     #ifdef SUPPORT_PCRE16
996     setstack16(NULL);
997     #endif
998     }
999 ph10 667
1000 ph10 836 #ifdef SUPPORT_PCRE8
1001     return_value8_1 = -1000;
1002     return_value8_2 = -1000;
1003 ph10 667 for (i = 0; i < 32; ++i)
1004 ph10 836 ovector8_1[i] = -2;
1005     for (i = 0; i < 32; ++i)
1006     ovector8_2[i] = -2;
1007     if (re8) {
1008     setstack8(extra8);
1009     return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1010     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
1011     return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1012     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
1013     }
1014     #endif
1015 ph10 667
1016 ph10 836 #ifdef SUPPORT_PCRE16
1017     return_value16_1 = -1000;
1018     return_value16_2 = -1000;
1019 ph10 667 for (i = 0; i < 32; ++i)
1020 ph10 836 ovector16_1[i] = -2;
1021     for (i = 0; i < 32; ++i)
1022     ovector16_2[i] = -2;
1023     if (re16) {
1024     setstack16(extra16);
1025     if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1026     length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1027     else
1028     length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1029     return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1030     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
1031     return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1032     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
1033     }
1034     #endif
1035 ph10 667
1036 ph10 836 /* If F_DIFF is set, just run the test, but do not compare the results.
1037 ph10 667 Segfaults can still be captured. */
1038    
1039 ph10 836 is_successful = 1;
1040     if (!(current->start_offset & F_DIFF)) {
1041     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1042     if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1043     /* All results must be the same. */
1044     if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1045     printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1046     return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1047     total, current->pattern, current->input);
1048     is_successful = 0;
1049     } else if (return_value8_1 >= 0) {
1050     return_value8_1 *= 2;
1051     /* Transform back the results. */
1052     if (current->flags & PCRE_UTF8) {
1053     for (i = 0; i < return_value8_1; ++i) {
1054     if (ovector16_1[i] >= 0)
1055     ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1056     if (ovector16_2[i] >= 0)
1057     ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1058     }
1059 ph10 667 }
1060 ph10 836
1061     for (i = 0; i < return_value8_1; ++i)
1062     if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1063     printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1064     i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1065     total, current->pattern, current->input);
1066     is_successful = 0;
1067     }
1068 ph10 667 }
1069 ph10 836 } else {
1070     #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1071     /* Only the 8 bit and 16 bit results must be equal. */
1072     #ifdef SUPPORT_PCRE8
1073     if (return_value8_1 != return_value8_2) {
1074     printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1075     return_value8_1, return_value8_2, total, current->pattern, current->input);
1076     is_successful = 0;
1077     } else if (return_value8_1 >= 0) {
1078     return_value8_1 *= 2;
1079     for (i = 0; i < return_value8_1; ++i)
1080     if (ovector8_1[i] != ovector8_2[i]) {
1081     printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1082     i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1083     is_successful = 0;
1084     }
1085     }
1086     #endif
1087    
1088     #ifdef SUPPORT_PCRE16
1089     if (return_value16_1 != return_value16_2) {
1090     printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1091     return_value16_1, return_value16_2, total, current->pattern, current->input);
1092     is_successful = 0;
1093     } else if (return_value16_1 >= 0) {
1094     return_value16_1 *= 2;
1095     for (i = 0; i < return_value16_1; ++i)
1096     if (ovector16_1[i] != ovector16_2[i]) {
1097     printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1098     i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1099     is_successful = 0;
1100     }
1101     }
1102     #endif
1103    
1104     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1105 ph10 667 }
1106 ph10 836 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1107 ph10 667 }
1108    
1109 ph10 836 if (is_successful) {
1110     #ifdef SUPPORT_PCRE8
1111     if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1112     if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1113     printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1114     total, current->pattern, current->input);
1115     is_successful = 0;
1116     }
1117 ph10 667
1118 ph10 836 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1119     printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1120     total, current->pattern, current->input);
1121     is_successful = 0;
1122     }
1123     }
1124     #endif
1125     #ifdef SUPPORT_PCRE16
1126     if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1127     if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1128     printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1129     total, current->pattern, current->input);
1130     is_successful = 0;
1131     }
1132    
1133     if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1134     printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1135     total, current->pattern, current->input);
1136     is_successful = 0;
1137     }
1138     }
1139     #endif
1140     }
1141    
1142     if (is_successful)
1143     successful++;
1144    
1145     #ifdef SUPPORT_PCRE8
1146     if (re8) {
1147     pcre_free_study(extra8);
1148     pcre_free(re8);
1149     }
1150     #endif
1151     #ifdef SUPPORT_PCRE16
1152     if (re16) {
1153     pcre16_free_study(extra16);
1154     pcre16_free(re16);
1155     }
1156     #endif
1157    
1158     /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1159 ph10 667 printf(".");
1160     fflush(stdout);
1161     current++;
1162     }
1163 ph10 836 tables(1);
1164     #ifdef SUPPORT_PCRE8
1165     setstack8(NULL);
1166     #endif
1167     #ifdef SUPPORT_PCRE16
1168     setstack16(NULL);
1169     #endif
1170 ph10 667
1171 ph10 836 if (total == successful) {
1172 ph10 667 printf("\nAll JIT regression tests are successfully passed.\n");
1173 ph10 677 return 0;
1174 ph10 698 } else {
1175 ph10 836 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1176 ph10 698 return 1;
1177     }
1178 ph10 667 }
1179    
1180     /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12