/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 860 - (hide annotations) (download)
Mon Jan 9 20:12:58 2012 UTC (2 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 49641 byte(s)
rename PCRE_SCHAR16 to PCRE_UCHAR16 and JIT compiler update
1 ph10 667 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Main Library written by Philip Hazel
9 ph10 836 Copyright (c) 1997-2012 University of Cambridge
10 ph10 667
11     This JIT compiler regression test program was written by Zoltan Herczeg
12 ph10 836 Copyright (c) 2010-2012
13 ph10 667
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43 ph10 698 #ifdef HAVE_CONFIG_H
44     #include "config.h"
45     #endif
46    
47 ph10 667 #include <stdio.h>
48     #include <string.h>
49     #include "pcre.h"
50    
51     #define PCRE_BUG 0x80000000
52    
53     /*
54 ph10 836 Letter characters:
55     \xe6\x92\xad = 0x64ad = 25773 (kanji)
56     Non-letter characters:
57     \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58     \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59     \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60     \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61     Newlines:
62     \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63     \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64     Othercase pairs:
65     \xc3\xa9 = 0xe9 = 233 (e')
66     \xc3\x89 = 0xc9 = 201 (E')
67     \xc3\xa1 = 0xe1 = 225 (a')
68     \xc3\x81 = 0xc1 = 193 (A')
69     \xc8\xba = 0x23a = 570
70     \xe2\xb1\xa5 = 0x2c65 = 11365
71     \xe1\xbd\xb8 = 0x1f78 = 8056
72     \xe1\xbf\xb8 = 0x1ff8 = 8184
73     \xf0\x90\x90\x80 = 0x10400 = 66560
74     \xf0\x90\x90\xa8 = 0x10428 = 66600
75     Mark property:
76     \xcc\x8d = 0x30d = 781
77     Special:
78     \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79     \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80     \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81     \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82     \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 ph10 691 */
84 ph10 667
85 ph10 677 static int regression_tests(void);
86 ph10 667
87     int main(void)
88     {
89 ph10 698 int jit = 0;
90 ph10 836 #ifdef SUPPORT_PCRE8
91 ph10 698 pcre_config(PCRE_CONFIG_JIT, &jit);
92 ph10 836 #else
93     pcre16_config(PCRE_CONFIG_JIT, &jit);
94     #endif
95 ph10 698 if (!jit) {
96     printf("JIT must be enabled to run pcre_jit_test\n");
97     return 1;
98     }
99     return regression_tests();
100 ph10 667 }
101    
102 ph10 836 /* --------------------------------------------------------------------------------------- */
103 ph10 667
104 ph10 836 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105     #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106     #endif
107 ph10 667
108 ph10 836 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109     #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110     #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111     #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112     #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113     #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114     #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115 ph10 667
116 ph10 836 #define OFFSET_MASK 0x00ffff
117     #define F_NO8 0x010000
118     #define F_NO16 0x020000
119     #define F_NOMATCH 0x040000
120     #define F_DIFF 0x080000
121     #define F_FORCECONV 0x100000
122     #define F_PROPERTY 0x200000
123 ph10 667
124     struct regression_test_case {
125     int flags;
126     int start_offset;
127     const char *pattern;
128     const char *input;
129     };
130    
131     static struct regression_test_case regression_test_cases[] = {
132     /* Constant strings. */
133     { MUA, 0, "AbC", "AbAbC" },
134     { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135     { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136     { MA, 0, "[^a]", "aAbB" },
137     { CMA, 0, "[^m]", "mMnN" },
138     { MA, 0, "a[^b][^#]", "abacd" },
139     { CMA, 0, "A[^B][^E]", "abacd" },
140     { CMUA, 0, "[^x][^#]", "XxBll" },
141     { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142     { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143     { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144     { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145     { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146     { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147     { MUA, 0, "[axd]", "sAXd" },
148     { CMUA, 0, "[axd]", "sAXd" },
149 ph10 836 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 ph10 667 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151     { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152     { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153     { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 ph10 698 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 ph10 667 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156     { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157     { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158     { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159     { PCRE_CASELESS, 0, "a1", "Aa1" },
160 zherczeg 736 { MA, 0, "\\Ca", "cda" },
161     { CMA, 0, "\\Ca", "CDA" },
162 ph10 836 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163     { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164     { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165     { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166     { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167     { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168 ph10 667
169     /* Assertions. */
170     { MUA, 0, "\\b[^A]", "A_B#" },
171 ph10 836 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 ph10 667 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173     { MAP, 0, "\\B", "_\xa1" },
174     { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175     { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176     { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177     { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178     { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 ph10 836 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180     { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181     { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182     { MA, 1 | F_NOMATCH, "^", "\n" },
183 ph10 667 { 0, 0, "^ab", "ab" },
184 ph10 836 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 ph10 667 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190     { 0, 0, "ab$", "ab" },
191 ph10 836 { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" },
192 ph10 667 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
193     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
194     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
195     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
196 ph10 836 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
197 ph10 667 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
198 ph10 836 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
199     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
200     { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
201 ph10 667 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
202     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
203     { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
204     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
205     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
206     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
207     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
208     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
209     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
210     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
211     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
212     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
213     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
214     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
215     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
216     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
217     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
218     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
219     { MA, 0, "\\Aa", "aaa" },
220 ph10 836 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
221 ph10 667 { MA, 1, "\\Ga", "aaa" },
222 ph10 836 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
223 ph10 667 { MA, 0, "a\\z", "aaa" },
224 ph10 836 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
225 ph10 667
226     /* Brackets. */
227     { MUA, 0, "(ab|bb|cd)", "bacde" },
228     { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
229     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
230     { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
231     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
232     { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
233    
234     /* Greedy and non-greedy ? operators. */
235     { MUA, 0, "(?:a)?a", "laab" },
236     { CMUA, 0, "(A)?A", "llaab" },
237     { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
238     { MUA, 0, "(a)?a", "manm" },
239     { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
240     { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
241     { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
242    
243     /* Greedy and non-greedy + operators */
244     { MUA, 0, "(aa)+aa", "aaaaaaa" },
245     { MUA, 0, "(aa)+?aa", "aaaaaaa" },
246     { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
247     { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
248     { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
249     { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250     { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
251    
252     /* Greedy and non-greedy * operators */
253     { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
254     { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
255     { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
256     { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
257     { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
258     { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
259     { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
260     { MA, 0, "((?:a|)*){0}a", "a" },
261    
262     /* Combining ? + * operators */
263     { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
264     { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
265     { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266     { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
267     { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
268    
269     /* Single character iterators. */
270     { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
271     { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
272     { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
273     { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
274     { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
275     { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
276     { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
277     { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
278     { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
279     { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
280     { MUA, 0, "(a?+[^b])+", "babaacacb" },
281     { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
282     { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
283     { CMUA, 0, "[c-f]+k", "DemmFke" },
284     { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
285     { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
286     { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
287     { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
288     { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
289     { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
290     { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
291     { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
292     { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
293     { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
294     { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
295     { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
296     { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
297 ph10 836 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
298     { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
299     { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
300     { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301     { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
302 ph10 667
303     /* Basic character sets. */
304     { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
305     { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
306     { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
307     { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
308     { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
309     { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
310    
311     /* Unicode properties. */
312     { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
313 ph10 836 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
314 ph10 667 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
315 ph10 836 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
316     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
317     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
318     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
320     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
321     { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322     { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
323 ph10 667 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
324     { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
325     { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
326 ph10 836 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
327     { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
328 ph10 667 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
329 ph10 836 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
330     { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
331 ph10 667 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
332    
333     /* Possible empty brackets. */
334     { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
335     { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
336     { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
337     { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
338     { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
339     { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
340     { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
341     { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
342     { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
343     { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
344    
345     /* Start offset. */
346     { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
347 ph10 836 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
348     { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 ph10 667 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
350    
351     /* Newline. */
352     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
353     { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
355    
356     /* Any character except newline or any newline. */
357     { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
358     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
359     { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
360     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
362 ph10 836 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
363 ph10 667 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
364     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
365     { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
366     { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
367     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
368     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
369     { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
370 ph10 836 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
371 ph10 667 { MUA, 0, "\\R+", "ab\r\n\r" },
372     { MUA, 0, "\\R*", "ab\r\n\r" },
373     { MUA, 0, "\\R*", "\r\n\r" },
374     { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
375     { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
376     { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
377     { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
378 ph10 836 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
379 ph10 667 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
380     { MUA, 0, "\\R*\\R\\R", "\n\r" },
381 ph10 836 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
382 ph10 667 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
383    
384     /* Atomic groups (no fallback from "next" direction). */
385 ph10 836 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
386     { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
387 ph10 667 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
388     "bababcdedefgheijijklmlmnop" },
389     { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
390     { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
391     { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
392     { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
393     { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
394     { MUA, 0, "(?>x|)*$", "aaa" },
395     { MUA, 0, "(?>(x)|)*$", "aaa" },
396     { MUA, 0, "(?>x|())*$", "aaa" },
397     { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
398     { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
399     { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
400     { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
401     { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
402     { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
403     { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
404     { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
405     { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
406     { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
407     { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
408     { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
409     { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
410     { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
411     { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
412     { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
413     { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 ph10 836 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
415     { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
416     { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
417     { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
418     { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
419     { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
420     { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
421 ph10 667 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
422     { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
423    
424 ph10 698 /* Possessive quantifiers. */
425     { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
426     { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
427     { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
428     { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
429     { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
430     { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
431 ph10 667 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
432     { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
433     { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
434     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
435     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
436     { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
437     { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
438     { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
439     { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
440 ph10 698 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
441     { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
442     { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
443     { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
444     { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
445     { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
446 ph10 667 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
447     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
448     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
449     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
450     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
451     { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
452     { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
453     { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
454     { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
455 ph10 836 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
456 ph10 667 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
457     { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
458     { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
459     { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460    
461     /* Back references. */
462     { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
463     { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
464     { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
465     { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
466     { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
467     { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
468     { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469     { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
470     { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
471     { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
472     { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
473     { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
474     { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
475     { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476     { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
477     { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
478     { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
479 ph10 836 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
480     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
481     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
482     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
483     { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
484     { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
485 ph10 667
486     /* Assertions. */
487     { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
488     { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
489     { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
490     { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
491     { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
492     { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
493     { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
494     { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
495     { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
496     { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
497     { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
498     { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
499     { MUA, 0, "((?(?=a)a)+k)", "bbak" },
500 ph10 836 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
501     { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
502     { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
503 ph10 667 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
504     { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
505     { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
506     { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507     { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
508     { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
509     { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
510     { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
511    
512     /* Not empty, ACCEPT, FAIL */
513 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
514 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
515     { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
516     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
517     { MUA, 0, "a(*ACCEPT)b", "ab" },
518 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
519 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
520     { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
521 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
522 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
523     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
524     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
525 ph10 836 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
526 ph10 667 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
527     { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
528     { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
529     { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
530     { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
531     { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
532    
533     /* Conditional blocks. */
534     { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
535     { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
536     { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
537     { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
538     { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
539     { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
540     { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
541     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 ph10 836 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
543 ph10 667 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544     { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
545     { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
546     { MUA, 0, "(?(?=a)ab)", "a" },
547     { MUA, 0, "(?(?<!b)c)", "b" },
548     { MUA, 0, "(?(DEFINE)a(b))", "a" },
549     { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
550     { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
551     { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
552     { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
553     { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
554     { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
555     { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
556 ph10 836 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
557 ph10 667 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
558     { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
559     { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
560 ph10 836 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
561 ph10 667 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
562     { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
563     { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
564     { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
565 zherczeg 741 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
566     { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
567     { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
568 ph10 667
569 ph10 698 /* Set start of match. */
570 ph10 667 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
571     { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
572     { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
573 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
574 ph10 667 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
575    
576     /* First line. */
577 ph10 836 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
578     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
579 ph10 667 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
580 ph10 836 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
581     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
582     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
583     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
584     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
585     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
586     { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
587     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
588     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
589 ph10 667 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
590     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
591 ph10 836 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
592     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
593 ph10 667 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
594    
595     /* Recurse. */
596     { MUA, 0, "(a)(?1)", "aa" },
597     { MUA, 0, "((a))(?1)", "aa" },
598     { MUA, 0, "(b|a)(?1)", "aa" },
599     { MUA, 0, "(b|(a))(?1)", "aa" },
600 ph10 836 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
601 ph10 667 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
602     { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
603     { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
604     { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
605     { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
606     { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
607     { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
608 ph10 836 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
609 ph10 667 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
610     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
611 ph10 836 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
612 ph10 667 { MUA, 0, "b|<(?R)*>", "<<b>" },
613     { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
614     { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
615 zherczeg 741 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
616     { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
617     { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
618     { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
619     { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
620     { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
621 ph10 667
622 ph10 836 /* 16 bit specific tests. */
623     { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
624     { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
625     { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
626     { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
627     { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
628     { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
629     { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
630     { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
631     { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
632     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
633     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
634     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
635     { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
636     { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
637     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
638     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
639     { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
640     { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641     { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
642     { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
643     { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
644     { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
645     { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
646     { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
647     { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
648     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
649     { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
650     { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
651     { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
652     { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
653     { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
654    
655 ph10 667 /* Deep recursion. */
656     { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
657 ph10 698 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
658 ph10 677 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
659 ph10 691
660 ph10 677 /* Deep recursion: Stack limit reached. */
661 ph10 836 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
662     { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
663     { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
664     { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
665     { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
666 ph10 667
667     { 0, 0, NULL, NULL }
668     };
669    
670 ph10 836 static const unsigned char *tables(int mode)
671     {
672     /* The purpose of this function to allow valgrind
673     for reporting invalid reads and writes. */
674     static unsigned char *tables_copy;
675     const char *errorptr;
676     int erroroffset;
677     const unsigned char *default_tables;
678     #ifdef SUPPORT_PCRE8
679 zherczeg 852 pcre *regex;
680 ph10 836 char null_str[1] = { 0 };
681     #else
682 zherczeg 852 pcre16 *regex;
683 zherczeg 860 PCRE_UCHAR16 null_str[1] = { 0 };
684 ph10 836 #endif
685    
686     if (mode) {
687     if (tables_copy)
688     free(tables_copy);
689     tables_copy = NULL;
690     return NULL;
691     }
692    
693     if (tables_copy)
694     return tables_copy;
695    
696     default_tables = NULL;
697     #ifdef SUPPORT_PCRE8
698     regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
699     if (regex) {
700     pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
701     pcre_free(regex);
702     }
703     #else
704     regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
705     if (regex) {
706     pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
707     pcre16_free(regex);
708     }
709     #endif
710     /* Shouldn't ever happen. */
711     if (!default_tables)
712     return NULL;
713    
714     /* Unfortunately this value cannot get from pcre_fullinfo.
715     Since this is a test program, this is acceptable at the moment. */
716     tables_copy = (unsigned char *)malloc(1088);
717     if (!tables_copy)
718     return NULL;
719    
720     memcpy(tables_copy, default_tables, 1088);
721     return tables_copy;
722     }
723    
724 zherczeg 852 static pcre_jit_stack* callback8(void *arg)
725 ph10 836 {
726     return (pcre_jit_stack *)arg;
727     }
728    
729 zherczeg 852 static pcre16_jit_stack* callback16(void *arg)
730     {
731     return (pcre16_jit_stack *)arg;
732     }
733    
734 ph10 836 #ifdef SUPPORT_PCRE8
735     static void setstack8(pcre_extra *extra)
736     {
737     static pcre_jit_stack *stack;
738    
739     if (!extra) {
740     if (stack)
741     pcre_jit_stack_free(stack);
742     stack = NULL;
743     return;
744     }
745    
746     if (!stack)
747     stack = pcre_jit_stack_alloc(1, 1024 * 1024);
748     /* Extra can be NULL. */
749 zherczeg 852 pcre_assign_jit_stack(extra, callback8, stack);
750 ph10 836 }
751     #endif /* SUPPORT_PCRE8 */
752    
753     #ifdef SUPPORT_PCRE16
754 zherczeg 850 static void setstack16(pcre16_extra *extra)
755 ph10 836 {
756 zherczeg 852 static pcre16_jit_stack *stack;
757 ph10 836
758     if (!extra) {
759     if (stack)
760     pcre16_jit_stack_free(stack);
761     stack = NULL;
762     return;
763     }
764    
765     if (!stack)
766     stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
767     /* Extra can be NULL. */
768 zherczeg 852 pcre16_assign_jit_stack(extra, callback16, stack);
769 ph10 836 }
770     #endif /* SUPPORT_PCRE8 */
771    
772     #ifdef SUPPORT_PCRE16
773    
774 zherczeg 860 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
775 ph10 836 {
776     unsigned char *iptr = (unsigned char*)input;
777     unsigned short *optr = (unsigned short *)output;
778     unsigned int c;
779    
780     if (max_length == 0)
781     return 0;
782    
783     while (*iptr && max_length > 1) {
784     c = 0;
785     if (offsetmap)
786     *offsetmap++ = (int)(iptr - (unsigned char*)input);
787    
788     if (!(*iptr & 0x80))
789     c = *iptr++;
790     else if (!(*iptr & 0x20)) {
791     c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
792     iptr += 2;
793     } else if (!(*iptr & 0x10)) {
794     c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
795     iptr += 3;
796     } else if (!(*iptr & 0x08)) {
797     c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
798     iptr += 4;
799     }
800    
801     if (c < 65536) {
802     *optr++ = c;
803     max_length--;
804     } else if (max_length <= 2) {
805     *optr = '\0';
806     return (int)(optr - (unsigned short *)output);
807     } else {
808     c -= 0x10000;
809     *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
810     *optr++ = 0xdc00 | (c & 0x3ff);
811     max_length -= 2;
812     if (offsetmap)
813     offsetmap++;
814     }
815     }
816     if (offsetmap)
817     *offsetmap = (int)(iptr - (unsigned char*)input);
818     *optr = '\0';
819     return (int)(optr - (unsigned short *)output);
820     }
821    
822 zherczeg 860 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
823 ph10 836 {
824     unsigned char *iptr = (unsigned char*)input;
825     unsigned short *optr = (unsigned short *)output;
826    
827     if (max_length == 0)
828     return 0;
829    
830     while (*iptr && max_length > 1) {
831     *optr++ = *iptr++;
832     max_length--;
833     }
834     *optr = '\0';
835     return (int)(optr - (unsigned short *)output);
836     }
837    
838     #define REGTEST_MAX_LENGTH 4096
839 zherczeg 860 static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
840 ph10 836 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
841    
842     #endif /* SUPPORT_PCRE16 */
843    
844     static int check_ascii(const char *input)
845     {
846     const unsigned char *ptr = (unsigned char *)input;
847     while (*ptr) {
848     if (*ptr > 127)
849     return 0;
850     ptr++;
851     }
852     return 1;
853     }
854    
855 ph10 677 static int regression_tests(void)
856 ph10 667 {
857     struct regression_test_case *current = regression_test_cases;
858     const char *error;
859     int i, err_offs;
860 ph10 836 int is_successful, is_ascii_pattern, is_ascii_input;
861     int total = 0;
862     int successful = 0;
863 ph10 667 int counter = 0;
864 ph10 836 #ifdef SUPPORT_PCRE8
865     pcre *re8;
866     pcre_extra *extra8;
867     int ovector8_1[32];
868     int ovector8_2[32];
869     int return_value8_1, return_value8_2;
870     int utf8 = 0, ucp8 = 0;
871     int disabled_flags8 = 0;
872     #endif
873     #ifdef SUPPORT_PCRE16
874 zherczeg 852 pcre16 *re16;
875 zherczeg 850 pcre16_extra *extra16;
876 ph10 836 int ovector16_1[32];
877     int ovector16_2[32];
878     int return_value16_1, return_value16_2;
879     int utf16 = 0, ucp16 = 0;
880     int disabled_flags16 = 0;
881     int length16;
882     #endif
883 ph10 667
884 ph10 698 /* This test compares the behaviour of interpreter and JIT. Although disabling
885 ph10 836 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
886 ph10 698 still considered successful from pcre_jit_test point of view. */
887    
888 ph10 836 printf("Running JIT regression\n");
889    
890     #ifdef SUPPORT_PCRE8
891 ph10 698 pcre_config(PCRE_CONFIG_UTF8, &utf8);
892 ph10 836 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
893 ph10 698 if (!utf8)
894 ph10 836 disabled_flags8 |= PCRE_UTF8;
895     if (!ucp8)
896     disabled_flags8 |= PCRE_UCP;
897     printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
898     #endif
899     #ifdef SUPPORT_PCRE16
900     pcre16_config(PCRE_CONFIG_UTF16, &utf16);
901     pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
902     if (!utf16)
903     disabled_flags16 |= PCRE_UTF8;
904     if (!ucp16)
905     disabled_flags16 |= PCRE_UCP;
906     printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
907     #endif
908 ph10 698
909 ph10 667 while (current->pattern) {
910 ph10 698 /* printf("\nPattern: %s :\n", current->pattern); */
911 ph10 667 total++;
912 ph10 836 if (current->start_offset & F_PROPERTY) {
913     is_ascii_pattern = 0;
914     is_ascii_input = 0;
915     } else {
916     is_ascii_pattern = check_ascii(current->pattern);
917     is_ascii_input = check_ascii(current->input);
918     }
919 ph10 667
920     error = NULL;
921 ph10 836 #ifdef SUPPORT_PCRE8
922     re8 = NULL;
923     if (!(current->start_offset & F_NO8))
924     re8 = pcre_compile(current->pattern,
925     current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
926     &error, &err_offs, tables(0));
927 ph10 667
928 ph10 836 extra8 = NULL;
929     if (re8) {
930     error = NULL;
931     extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
932     if (!extra8) {
933     printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
934     pcre_free(re8);
935     re8 = NULL;
936 ph10 698 }
937 ph10 836 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
938     printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
939     pcre_free_study(extra8);
940     pcre_free(re8);
941     re8 = NULL;
942     }
943     } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
944     printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
945     #endif
946     #ifdef SUPPORT_PCRE16
947     if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
948     convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
949     else
950     copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
951 ph10 667
952 ph10 836 re16 = NULL;
953     if (!(current->start_offset & F_NO16))
954     re16 = pcre16_compile(regtest_buf,
955     current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
956     &error, &err_offs, tables(0));
957 ph10 667
958 ph10 836 extra16 = NULL;
959     if (re16) {
960     error = NULL;
961     extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
962     if (!extra16) {
963     printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
964     pcre16_free(re16);
965     re16 = NULL;
966     }
967     if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
968     printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
969     pcre16_free_study(extra16);
970     pcre16_free(re16);
971     re16 = NULL;
972     }
973     } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
974     printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
975     #endif
976 ph10 667
977     counter++;
978 ph10 836 if ((counter & 0x3) != 0) {
979     #ifdef SUPPORT_PCRE8
980     setstack8(NULL);
981     #endif
982     #ifdef SUPPORT_PCRE16
983     setstack16(NULL);
984     #endif
985     }
986 ph10 667
987 ph10 836 #ifdef SUPPORT_PCRE8
988     return_value8_1 = -1000;
989     return_value8_2 = -1000;
990 ph10 667 for (i = 0; i < 32; ++i)
991 ph10 836 ovector8_1[i] = -2;
992     for (i = 0; i < 32; ++i)
993     ovector8_2[i] = -2;
994     if (re8) {
995     setstack8(extra8);
996     return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
997     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
998     return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
999     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
1000     }
1001     #endif
1002 ph10 667
1003 ph10 836 #ifdef SUPPORT_PCRE16
1004     return_value16_1 = -1000;
1005     return_value16_2 = -1000;
1006 ph10 667 for (i = 0; i < 32; ++i)
1007 ph10 836 ovector16_1[i] = -2;
1008     for (i = 0; i < 32; ++i)
1009     ovector16_2[i] = -2;
1010     if (re16) {
1011     setstack16(extra16);
1012     if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1013     length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1014     else
1015     length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1016     return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1017     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
1018     return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1019     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
1020     }
1021     #endif
1022 ph10 667
1023 ph10 836 /* If F_DIFF is set, just run the test, but do not compare the results.
1024 ph10 667 Segfaults can still be captured. */
1025    
1026 ph10 836 is_successful = 1;
1027     if (!(current->start_offset & F_DIFF)) {
1028     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1029     if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1030     /* All results must be the same. */
1031     if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1032     printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1033     return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1034     total, current->pattern, current->input);
1035     is_successful = 0;
1036     } else if (return_value8_1 >= 0) {
1037     return_value8_1 *= 2;
1038     /* Transform back the results. */
1039     if (current->flags & PCRE_UTF8) {
1040     for (i = 0; i < return_value8_1; ++i) {
1041     if (ovector16_1[i] >= 0)
1042     ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1043     if (ovector16_2[i] >= 0)
1044     ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1045     }
1046 ph10 667 }
1047 ph10 836
1048     for (i = 0; i < return_value8_1; ++i)
1049     if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1050     printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1051     i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1052     total, current->pattern, current->input);
1053     is_successful = 0;
1054     }
1055 ph10 667 }
1056 ph10 836 } else {
1057     #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1058     /* Only the 8 bit and 16 bit results must be equal. */
1059     #ifdef SUPPORT_PCRE8
1060     if (return_value8_1 != return_value8_2) {
1061     printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1062     return_value8_1, return_value8_2, total, current->pattern, current->input);
1063     is_successful = 0;
1064     } else if (return_value8_1 >= 0) {
1065     return_value8_1 *= 2;
1066     for (i = 0; i < return_value8_1; ++i)
1067     if (ovector8_1[i] != ovector8_2[i]) {
1068     printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1069     i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1070     is_successful = 0;
1071     }
1072     }
1073     #endif
1074    
1075     #ifdef SUPPORT_PCRE16
1076     if (return_value16_1 != return_value16_2) {
1077     printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1078     return_value16_1, return_value16_2, total, current->pattern, current->input);
1079     is_successful = 0;
1080     } else if (return_value16_1 >= 0) {
1081     return_value16_1 *= 2;
1082     for (i = 0; i < return_value16_1; ++i)
1083     if (ovector16_1[i] != ovector16_2[i]) {
1084     printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1085     i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1086     is_successful = 0;
1087     }
1088     }
1089     #endif
1090    
1091     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1092 ph10 667 }
1093 ph10 836 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1094 ph10 667 }
1095    
1096 ph10 836 if (is_successful) {
1097     #ifdef SUPPORT_PCRE8
1098     if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1099     if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1100     printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1101     total, current->pattern, current->input);
1102     is_successful = 0;
1103     }
1104 ph10 667
1105 ph10 836 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1106     printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1107     total, current->pattern, current->input);
1108     is_successful = 0;
1109     }
1110     }
1111     #endif
1112     #ifdef SUPPORT_PCRE16
1113     if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1114     if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1115     printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1116     total, current->pattern, current->input);
1117     is_successful = 0;
1118     }
1119    
1120     if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1121     printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1122     total, current->pattern, current->input);
1123     is_successful = 0;
1124     }
1125     }
1126     #endif
1127     }
1128    
1129     if (is_successful)
1130     successful++;
1131    
1132     #ifdef SUPPORT_PCRE8
1133     if (re8) {
1134     pcre_free_study(extra8);
1135     pcre_free(re8);
1136     }
1137     #endif
1138     #ifdef SUPPORT_PCRE16
1139     if (re16) {
1140     pcre16_free_study(extra16);
1141     pcre16_free(re16);
1142     }
1143     #endif
1144    
1145     /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1146 ph10 667 printf(".");
1147     fflush(stdout);
1148     current++;
1149     }
1150 ph10 836 tables(1);
1151     #ifdef SUPPORT_PCRE8
1152     setstack8(NULL);
1153     #endif
1154     #ifdef SUPPORT_PCRE16
1155     setstack16(NULL);
1156     #endif
1157 ph10 667
1158 ph10 836 if (total == successful) {
1159 ph10 667 printf("\nAll JIT regression tests are successfully passed.\n");
1160 ph10 677 return 0;
1161 ph10 698 } else {
1162 ph10 836 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1163 ph10 698 return 1;
1164     }
1165 ph10 667 }
1166    
1167     /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12