/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 881 - (hide annotations) (download)
Sun Jan 15 18:07:05 2012 UTC (16 months, 1 week ago) by ph10
File MIME type: text/plain
File size: 49698 byte(s)
Get rid of a number of -Wunused-but-set-variable compiler warnings.

1 ph10 667 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Main Library written by Philip Hazel
9 ph10 836 Copyright (c) 1997-2012 University of Cambridge
10 ph10 667
11     This JIT compiler regression test program was written by Zoltan Herczeg
12 ph10 836 Copyright (c) 2010-2012
13 ph10 667
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43 ph10 698 #ifdef HAVE_CONFIG_H
44     #include "config.h"
45     #endif
46    
47 ph10 667 #include <stdio.h>
48     #include <string.h>
49     #include "pcre.h"
50    
51     #define PCRE_BUG 0x80000000
52    
53     /*
54 ph10 836 Letter characters:
55     \xe6\x92\xad = 0x64ad = 25773 (kanji)
56     Non-letter characters:
57     \xc2\xa1 = 0xa1 = (Inverted Exclamation Mark)
58     \xf3\xa9\xb7\x80 = 0xe9dc0 = 957888
59     \xed\xa0\x80 = 55296 = 0xd800 (Invalid UTF character)
60     \xed\xb0\x80 = 56320 = 0xdc00 (Invalid UTF character)
61     Newlines:
62     \xc2\x85 = 0x85 = 133 (NExt Line = NEL)
63     \xe2\x80\xa8 = 0x2028 = 8232 (Line Separator)
64     Othercase pairs:
65     \xc3\xa9 = 0xe9 = 233 (e')
66     \xc3\x89 = 0xc9 = 201 (E')
67     \xc3\xa1 = 0xe1 = 225 (a')
68     \xc3\x81 = 0xc1 = 193 (A')
69     \xc8\xba = 0x23a = 570
70     \xe2\xb1\xa5 = 0x2c65 = 11365
71     \xe1\xbd\xb8 = 0x1f78 = 8056
72     \xe1\xbf\xb8 = 0x1ff8 = 8184
73     \xf0\x90\x90\x80 = 0x10400 = 66560
74     \xf0\x90\x90\xa8 = 0x10428 = 66600
75     Mark property:
76     \xcc\x8d = 0x30d = 781
77     Special:
78     \xdf\xbf = 0x7ff = 2047 (highest 2 byte character)
79     \xe0\xa0\x80 = 0x800 = 2048 (lowest 2 byte character)
80     \xef\xbf\xbf = 0xffff = 65535 (highest 3 byte character)
81     \xf0\x90\x80\x80 = 0x10000 = 65536 (lowest 4 byte character)
82     \xf4\x8f\xbf\xbf = 0x10ffff = 1114111 (highest allowed utf character)
83 ph10 691 */
84 ph10 667
85 ph10 677 static int regression_tests(void);
86 ph10 667
87     int main(void)
88     {
89 ph10 698 int jit = 0;
90 ph10 836 #ifdef SUPPORT_PCRE8
91 ph10 698 pcre_config(PCRE_CONFIG_JIT, &jit);
92 ph10 836 #else
93     pcre16_config(PCRE_CONFIG_JIT, &jit);
94     #endif
95 ph10 698 if (!jit) {
96     printf("JIT must be enabled to run pcre_jit_test\n");
97     return 1;
98     }
99     return regression_tests();
100 ph10 667 }
101    
102 ph10 836 /* --------------------------------------------------------------------------------------- */
103 ph10 667
104 ph10 836 #if !(defined SUPPORT_PCRE8) && !(defined SUPPORT_PCRE16)
105     #error SUPPORT_PCRE8 or SUPPORT_PCRE16 must be defined
106     #endif
107 ph10 667
108 ph10 836 #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
109     #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
110     #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
111     #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
112     #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
113     #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
114     #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
115 ph10 667
116 ph10 836 #define OFFSET_MASK 0x00ffff
117     #define F_NO8 0x010000
118     #define F_NO16 0x020000
119     #define F_NOMATCH 0x040000
120     #define F_DIFF 0x080000
121     #define F_FORCECONV 0x100000
122     #define F_PROPERTY 0x200000
123 ph10 667
124     struct regression_test_case {
125     int flags;
126     int start_offset;
127     const char *pattern;
128     const char *input;
129     };
130    
131     static struct regression_test_case regression_test_cases[] = {
132     /* Constant strings. */
133     { MUA, 0, "AbC", "AbAbC" },
134     { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
135     { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
136     { MA, 0, "[^a]", "aAbB" },
137     { CMA, 0, "[^m]", "mMnN" },
138     { MA, 0, "a[^b][^#]", "abacd" },
139     { CMA, 0, "A[^B][^E]", "abacd" },
140     { CMUA, 0, "[^x][^#]", "XxBll" },
141     { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
142     { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
143     { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
144     { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
145     { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
146     { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
147     { MUA, 0, "[axd]", "sAXd" },
148     { CMUA, 0, "[axd]", "sAXd" },
149 ph10 836 { CMUA, 0 | F_NOMATCH, "[^axd]", "DxA" },
150 ph10 667 { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
151     { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
152     { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
153     { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
154 ph10 698 { MUA, 0, "[^a]", "\xc2\x80[]" },
155 ph10 667 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
156     { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
157     { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
158     { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
159     { PCRE_CASELESS, 0, "a1", "Aa1" },
160 zherczeg 736 { MA, 0, "\\Ca", "cda" },
161     { CMA, 0, "\\Ca", "CDA" },
162 ph10 836 { MA, 0 | F_NOMATCH, "\\Cx", "cda" },
163     { CMA, 0 | F_NOMATCH, "\\Cx", "CDA" },
164     { CMUAP, 0, "\xf0\x90\x90\x80\xf0\x90\x90\xa8", "\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
165     { CMUAP, 0, "\xf0\x90\x90\x80{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
166     { CMUAP, 0, "\xf0\x90\x90\xa8{2}", "\xf0\x90\x90\x80#\xf0\x90\x90\xa8\xf0\x90\x90\x80" },
167     { CMUAP, 0, "\xe1\xbd\xb8\xe1\xbf\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
168 ph10 667
169     /* Assertions. */
170     { MUA, 0, "\\b[^A]", "A_B#" },
171 ph10 836 { MA, 0 | F_NOMATCH, "\\b\\W", "\n*" },
172 ph10 667 { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
173     { MAP, 0, "\\B", "_\xa1" },
174     { MAP, 0, "\\b_\\b[,A]\\B", "_," },
175     { MUAP, 0, "\\b", "\xe6\x92\xad!" },
176     { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
177     { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
178     { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
179 ph10 836 { MUA, 0 | F_NOMATCH, "\\b.", "\xcd\xbe" },
180     { CMUAP, 0, "\\By", "\xf0\x90\x90\xa8y" },
181     { MA, 0 | F_NOMATCH, "\\R^", "\n" },
182     { MA, 1 | F_NOMATCH, "^", "\n" },
183 ph10 667 { 0, 0, "^ab", "ab" },
184 ph10 836 { 0, 0 | F_NOMATCH, "^ab", "aab" },
185 ph10 667 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
186     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
187     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
188     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
189     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
190     { 0, 0, "ab$", "ab" },
191 ph10 836 { 0, 0 | F_NOMATCH, "ab$", "ab\r\n" },
192 ph10 667 { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
193     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
194     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
195     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
196 ph10 836 { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "^a", "aa\naa" },
197 ph10 667 { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
198 ph10 836 { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\naa" },
199     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0 | F_NOMATCH, "a$", "aa\r\n" },
200     { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0 | F_PROPERTY, "\\p{Any}{2,}$", "aa\r\n" },
201 ph10 667 { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
202     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
203     { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
204     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
205     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
206     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
207     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
208     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
209     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
210     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
211     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
212     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
213     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
214     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
215     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
216     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
217     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
218     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
219     { MA, 0, "\\Aa", "aaa" },
220 ph10 836 { MA, 1 | F_NOMATCH, "\\Aa", "aaa" },
221 ph10 667 { MA, 1, "\\Ga", "aaa" },
222 ph10 836 { MA, 1 | F_NOMATCH, "\\Ga", "aba" },
223 ph10 667 { MA, 0, "a\\z", "aaa" },
224 ph10 836 { MA, 0 | F_NOMATCH, "a\\z", "aab" },
225 ph10 667
226     /* Brackets. */
227     { MUA, 0, "(ab|bb|cd)", "bacde" },
228     { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
229     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
230     { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
231     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
232     { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
233    
234     /* Greedy and non-greedy ? operators. */
235     { MUA, 0, "(?:a)?a", "laab" },
236     { CMUA, 0, "(A)?A", "llaab" },
237     { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
238     { MUA, 0, "(a)?a", "manm" },
239     { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
240     { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
241     { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
242    
243     /* Greedy and non-greedy + operators */
244     { MUA, 0, "(aa)+aa", "aaaaaaa" },
245     { MUA, 0, "(aa)+?aa", "aaaaaaa" },
246     { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
247     { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
248     { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
249     { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
250     { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
251    
252     /* Greedy and non-greedy * operators */
253     { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
254     { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
255     { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
256     { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
257     { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
258     { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
259     { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
260     { MA, 0, "((?:a|)*){0}a", "a" },
261    
262     /* Combining ? + * operators */
263     { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
264     { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
265     { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
266     { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
267     { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
268    
269     /* Single character iterators. */
270     { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
271     { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
272     { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
273     { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
274     { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
275     { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
276     { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
277     { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
278     { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
279     { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
280     { MUA, 0, "(a?+[^b])+", "babaacacb" },
281     { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
282     { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
283     { CMUA, 0, "[c-f]+k", "DemmFke" },
284     { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
285     { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
286     { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
287     { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
288     { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
289     { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
290     { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
291     { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
292     { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
293     { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
294     { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
295     { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
296     { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
297 ph10 836 { CMUAP, 0, "\xe1\xbd\xb8{2}", "\xe1\xbf\xb8#\xe1\xbf\xb8\xe1\xbd\xb8" },
298     { CMUA, 0, "[^\xf0\x90\x90\x80]{2,4}@", "\xf0\x90\x90\xa8\xf0\x90\x90\x80###\xf0\x90\x90\x80@@@" },
299     { CMUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
300     { MUA, 0, "[^\xe1\xbd\xb8][^\xc3\xa9]", "\xe1\xbd\xb8\xe1\xbf\xb8\xc3\xa9\xc3\x89#" },
301     { MUA, 0, "[^\xe1\xbd\xb8]{3,}?", "##\xe1\xbd\xb8#\xe1\xbd\xb8#\xc3\x89#\xe1\xbd\xb8" },
302 ph10 667
303     /* Basic character sets. */
304     { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
305     { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
306     { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
307     { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
308     { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
309     { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
310    
311     /* Unicode properties. */
312     { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
313 ph10 836 { MUAP, 0 | F_PROPERTY, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
314 ph10 667 { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
315 ph10 836 { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}]", "abc" },
316     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}]", "abc" },
317     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
318     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
319     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
320     { MUAP, 0 | F_NOMATCH | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
321     { MUAP, 0 | F_PROPERTY, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
322     { MUAP, 0 | F_PROPERTY, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
323 ph10 667 { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
324     { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
325     { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
326 ph10 836 { MUAP, 0 | F_PROPERTY, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
327     { MUA, 0 | F_PROPERTY, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
328 ph10 667 { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
329 ph10 836 { MUAP, 0 | F_PROPERTY, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
330     { MUAP, 0 | F_PROPERTY, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
331 ph10 667 { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
332    
333     /* Possible empty brackets. */
334     { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
335     { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
336     { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
337     { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
338     { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
339     { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
340     { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
341     { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
342     { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
343     { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
344    
345     /* Start offset. */
346     { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
347 ph10 836 { MUA, 4 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
348     { MUA, 2 | F_NOMATCH, "(\\w\\W\\w)+", "ab#d" },
349 ph10 667 { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
350    
351     /* Newline. */
352     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
353     { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
354     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
355    
356     /* Any character except newline or any newline. */
357     { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
358     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
359     { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
360     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
361     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
362 ph10 836 { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0 | F_NOMATCH, ".(.).", "\xe2\x80\xa8\nb\r" },
363 ph10 667 { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
364     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
365     { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
366     { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
367     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
368     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
369     { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
370 ph10 836 { MUA, 0 | F_NOMATCH, "\\R+", "ab" },
371 ph10 667 { MUA, 0, "\\R+", "ab\r\n\r" },
372     { MUA, 0, "\\R*", "ab\r\n\r" },
373     { MUA, 0, "\\R*", "\r\n\r" },
374     { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
375     { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
376     { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
377     { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
378 ph10 836 { MUA, 0 | F_NOMATCH, "\\R+\\R\\R", "\r\n\r\n" },
379 ph10 667 { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
380     { MUA, 0, "\\R*\\R\\R", "\n\r" },
381 ph10 836 { MUA, 0 | F_NOMATCH, "\\R{2,4}\\R\\R", "\r\r\r" },
382 ph10 667 { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
383    
384     /* Atomic groups (no fallback from "next" direction). */
385 ph10 836 { MUA, 0 | F_NOMATCH, "(?>ab)ab", "bab" },
386     { MUA, 0 | F_NOMATCH, "(?>(ab))ab", "bab" },
387 ph10 667 { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
388     "bababcdedefgheijijklmlmnop" },
389     { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
390     { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
391     { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
392     { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
393     { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
394     { MUA, 0, "(?>x|)*$", "aaa" },
395     { MUA, 0, "(?>(x)|)*$", "aaa" },
396     { MUA, 0, "(?>x|())*$", "aaa" },
397     { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
398     { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
399     { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
400     { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
401     { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
402     { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
403     { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
404     { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
405     { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
406     { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
407     { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
408     { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
409     { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
410     { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
411     { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
412     { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
413     { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
414 ph10 836 { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d" },
415     { MUA, 0 | F_PROPERTY, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
416     { MUA, 0 | F_PROPERTY, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
417     { MUA, 0 | F_PROPERTY, "\\X{2,4}", "abcdef" },
418     { MUA, 0 | F_PROPERTY, "\\X{2,4}?", "abcdef" },
419     { MUA, 0 | F_NOMATCH | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d##" },
420     { MUA, 0 | F_PROPERTY, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
421 ph10 667 { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
422     { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
423    
424 ph10 698 /* Possessive quantifiers. */
425     { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
426     { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
427     { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
428     { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
429     { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
430     { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
431 ph10 667 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
432     { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
433     { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
434     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
435     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
436     { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
437     { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
438     { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
439     { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
440 ph10 698 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
441     { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
442     { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
443     { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
444     { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
445     { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
446 ph10 667 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
447     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
448     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
449     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
450     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
451     { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
452     { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
453     { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
454     { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
455 ph10 836 { MUA, 0 | F_NOMATCH, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
456 ph10 667 { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
457     { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
458     { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
459     { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
460    
461     /* Back references. */
462     { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
463     { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
464     { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
465     { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
466     { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
467     { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
468     { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
469     { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
470     { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
471     { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
472     { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
473     { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
474     { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
475     { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
476     { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
477     { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
478     { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
479 ph10 836 { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
480     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{0,2}", "wwwww." },
481     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwww" },
482     { MUAP, 0 | F_PROPERTY, "(\\P{N})\\1{1,2}ww", "wwwww" },
483     { PCRE_UCP, 0 | F_PROPERTY, "(\\P{N})\\1{2,}", ".www." },
484     { CMUAP, 0, "(\xf0\x90\x90\x80)\\1", "\xf0\x90\x90\xa8\xf0\x90\x90\xa8" },
485 ph10 667
486     /* Assertions. */
487     { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
488     { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
489     { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
490     { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
491     { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
492     { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
493     { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
494     { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
495     { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
496     { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
497     { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
498     { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
499     { MUA, 0, "((?(?=a)a)+k)", "bbak" },
500 ph10 836 { MUA, 0 | F_NOMATCH, "(?=(?>(a))m)amk", "a k" },
501     { MUA, 0 | F_NOMATCH, "(?!(?>(a))m)amk", "a k" },
502     { MUA, 0 | F_NOMATCH, "(?>(?=(a))am)amk", "a k" },
503 ph10 667 { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
504     { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
505     { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
506     { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
507     { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
508     { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
509     { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
510     { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
511    
512     /* Not empty, ACCEPT, FAIL */
513 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*", "bcx" },
514 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
515     { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
516     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
517     { MUA, 0, "a(*ACCEPT)b", "ab" },
518 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a*(*ACCEPT)b", "bcx" },
519 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
520     { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
521 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "(?:z|a*(*ACCEPT)b)", "bcx" },
522 ph10 667 { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
523     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
524     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
525 ph10 836 { MUA | PCRE_NOTEMPTY_ATSTART, 0 | F_NOMATCH, "a*(*ACCEPT)b", "" },
526 ph10 667 { MUA, 0, "((a(*ACCEPT)b))", "ab" },
527     { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
528     { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
529     { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
530     { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
531     { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
532    
533     /* Conditional blocks. */
534     { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
535     { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
536     { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
537     { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
538     { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
539     { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
540     { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
541     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
542 ph10 836 { MUA, 0 | F_DIFF, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
543 ph10 667 { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
544     { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
545     { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
546     { MUA, 0, "(?(?=a)ab)", "a" },
547     { MUA, 0, "(?(?<!b)c)", "b" },
548     { MUA, 0, "(?(DEFINE)a(b))", "a" },
549     { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
550     { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
551     { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
552     { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
553     { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
554     { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
555     { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
556 ph10 836 { MUA, 0 | F_DIFF, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
557 ph10 667 { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
558     { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
559     { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
560 ph10 836 { MUA, 0 | F_DIFF, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
561 ph10 667 { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
562     { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
563     { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
564     { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
565 zherczeg 741 { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)*l", "bc ddd abccabccl" },
566     { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+?dd", "bcabcacdb bdddd" },
567     { MUA, 0, "(?P<Name>a)?(?P<Name2>b)?(?(Name)c|d)+l", "ababccddabdbccd abcccl" },
568 ph10 667
569 ph10 698 /* Set start of match. */
570 ph10 667 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
571     { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
572     { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
573 ph10 836 { MUA | PCRE_NOTEMPTY, 0 | F_NOMATCH, "a\\K(*ACCEPT)b", "aa" },
574 ph10 667 { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
575    
576     /* First line. */
577 ph10 836 { MUA | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}a", "bb\naaa" },
578     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}a", "bb\r\naaa" },
579 ph10 667 { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
580 ph10 836 { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[^a][^b]", "ab" },
581     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "a", "\na" },
582     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "[abc]", "\na" },
583     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^a", "\na" },
584     { MUA | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^(?<=\n)", "\na" },
585     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\xc2\x85#" },
586     { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "#", "\x85#" },
587     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0 | F_NOMATCH, "^#", "\xe2\x80\xa8#" },
588     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_PROPERTY, "\\p{Any}", "\r\na" },
589 ph10 667 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
590     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
591 ph10 836 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH, "ba", "bbb\r\nba" },
592     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0 | F_NOMATCH | F_PROPERTY, "\\p{Any}{4}|a", "\r\na" },
593 ph10 667 { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
594    
595     /* Recurse. */
596     { MUA, 0, "(a)(?1)", "aa" },
597     { MUA, 0, "((a))(?1)", "aa" },
598     { MUA, 0, "(b|a)(?1)", "aa" },
599     { MUA, 0, "(b|(a))(?1)", "aa" },
600 ph10 836 { MUA, 0 | F_NOMATCH, "((a)(b)(?:a*))(?1)", "aba" },
601 ph10 667 { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
602     { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
603     { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
604     { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
605     { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
606     { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
607     { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
608 ph10 836 { MUA, 0 | F_NOMATCH, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
609 ph10 667 { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
610     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
611 ph10 836 { MUA, 0 | F_NOMATCH, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
612 ph10 667 { MUA, 0, "b|<(?R)*>", "<<b>" },
613     { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
614     { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
615 zherczeg 741 { MUA, 0, "(a)((?(R)a|b))(?2)", "aabbabaa" },
616     { MUA, 0, "(a)((?(R2)a|b))(?2)", "aabbabaa" },
617     { MUA, 0, "(a)((?(R1)a|b))(?2)", "ababba" },
618     { MUA, 0, "(?(R0)aa|bb(?R))", "abba aabb bbaa" },
619     { MUA, 0, "((?(R)(?:aaaa|a)|(?:(aaaa)|(a)))+)(?1)$", "aaaaaaaaaa aaaa" },
620     { MUA, 0, "(?P<Name>a(?(R&Name)a|b))(?1)", "aab abb abaa" },
621 ph10 667
622 ph10 836 /* 16 bit specific tests. */
623     { CMA, 0 | F_FORCECONV, "\xc3\xa1", "\xc3\x81\xc3\xa1" },
624     { CMA, 0 | F_FORCECONV, "\xe1\xbd\xb8", "\xe1\xbf\xb8\xe1\xbd\xb8" },
625     { CMA, 0 | F_FORCECONV, "[\xc3\xa1]", "\xc3\x81\xc3\xa1" },
626     { CMA, 0 | F_FORCECONV, "[\xe1\xbd\xb8]", "\xe1\xbf\xb8\xe1\xbd\xb8" },
627     { CMA, 0 | F_FORCECONV, "[a-\xed\xb0\x80]", "A" },
628     { CMA, 0 | F_NO8 | F_FORCECONV, "[a-\\x{dc00}]", "B" },
629     { CMA, 0 | F_NO8 | F_NOMATCH | F_FORCECONV, "[b-\\x{dc00}]", "a" },
630     { CMA, 0 | F_NO8 | F_FORCECONV, "\xed\xa0\x80\\x{d800}\xed\xb0\x80\\x{dc00}", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80" },
631     { CMA, 0 | F_NO8 | F_FORCECONV, "[\xed\xa0\x80\\x{d800}]{1,2}?[\xed\xb0\x80\\x{dc00}]{1,2}?#", "\xed\xa0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80#" },
632     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80\xed\xb0\x80#]{0,3}(?<=\xed\xb0\x80.)", "\xed\xa0\x80#\xed\xa0\x80##\xed\xb0\x80\xed\xa0\x80" },
633     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\x9f\xbf\xed\xa0\x83" },
634     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xed\xb3\xbf]", "\xed\xb4\x80\xed\xb3\xb0" },
635     { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\x9f\xbf\xed\xa0\x83" },
636     { CMA, 0 | F_NO8 | F_FORCECONV, "[\\x{d800}-\\x{dcff}]", "\xed\xb4\x80\xed\xb3\xb0" },
637     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80-\xef\xbf\xbf]+[\x1-\xed\xb0\x80]+#", "\xed\xa0\x85\xc3\x81\xed\xa0\x85\xef\xbf\xb0\xc2\x85\xed\xa9\x89#" },
638     { CMA, 0 | F_FORCECONV, "[\xed\xa0\x80][\xed\xb0\x80]{2,}", "\xed\xa0\x80\xed\xb0\x80\xed\xa0\x80\xed\xb0\x80\xed\xb0\x80\xed\xb0\x80" },
639     { MA, 0 | F_FORCECONV, "[^\xed\xb0\x80]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
640     { MA, 0 | F_NO8 | F_FORCECONV, "[^\\x{dc00}]{3,}?", "##\xed\xb0\x80#\xed\xb0\x80#\xc3\x89#\xed\xb0\x80" },
641     { CMA, 0 | F_FORCECONV, ".\\B.", "\xed\xa0\x80\xed\xb0\x80" },
642     { CMA, 0 | F_FORCECONV, "\\D+(?:\\d+|.)\\S+(?:\\s+|.)\\W+(?:\\w+|.)\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80" },
643     { CMA, 0 | F_FORCECONV, "\\d*\\s*\\w*\xed\xa0\x80\xed\xa0\x80", "\xed\xa0\x80\xed\xa0\x80" },
644     { CMA, 0 | F_FORCECONV | F_NOMATCH, "\\d*?\\D*?\\s*?\\S*?\\w*?\\W*?##", "\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80\xed\xa0\x80#" },
645     { CMA | PCRE_EXTENDED, 0 | F_FORCECONV, "\xed\xa0\x80 \xed\xb0\x80 !", "\xed\xa0\x80\xed\xb0\x80!" },
646     { CMA, 0 | F_FORCECONV, "\xed\xa0\x80+#[^#]+\xed\xa0\x80", "\xed\xa0\x80#a\xed\xa0\x80" },
647     { CMA, 0 | F_FORCECONV, "(\xed\xa0\x80+)#\\1", "\xed\xa0\x80\xed\xa0\x80#\xed\xa0\x80\xed\xa0\x80" },
648     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0 | F_NO8 | F_FORCECONV, "^-", "a--\xe2\x80\xa8--" },
649     { PCRE_BSR_UNICODE, 0 | F_NO8 | F_FORCECONV, "\\R", "ab\xe2\x80\xa8" },
650     { 0, 0 | F_NO8 | F_FORCECONV, "\\v", "ab\xe2\x80\xa9" },
651     { 0, 0 | F_NO8 | F_FORCECONV, "\\h", "ab\xe1\xa0\x8e" },
652     { 0, 0 | F_NO8 | F_FORCECONV, "\\v+?\\V+?#", "\xe2\x80\xa9\xe2\x80\xa9\xef\xbf\xbf\xef\xbf\xbf#" },
653     { 0, 0 | F_NO8 | F_FORCECONV, "\\h+?\\H+?#", "\xe1\xa0\x8e\xe1\xa0\x8e\xef\xbf\xbf\xef\xbf\xbf#" },
654    
655 ph10 667 /* Deep recursion. */
656     { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
657 ph10 698 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
658 ph10 677 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
659 ph10 691
660 ph10 677 /* Deep recursion: Stack limit reached. */
661 ph10 836 { MA, 0 | F_NOMATCH, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
662     { MA, 0 | F_NOMATCH, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
663     { MA, 0 | F_NOMATCH, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
664     { MA, 0 | F_NOMATCH, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
665     { MA, 0 | F_NOMATCH, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
666 ph10 667
667     { 0, 0, NULL, NULL }
668     };
669    
670 ph10 836 static const unsigned char *tables(int mode)
671     {
672     /* The purpose of this function to allow valgrind
673     for reporting invalid reads and writes. */
674     static unsigned char *tables_copy;
675     const char *errorptr;
676     int erroroffset;
677     const unsigned char *default_tables;
678     #ifdef SUPPORT_PCRE8
679 zherczeg 852 pcre *regex;
680 ph10 836 char null_str[1] = { 0 };
681     #else
682 zherczeg 852 pcre16 *regex;
683 zherczeg 860 PCRE_UCHAR16 null_str[1] = { 0 };
684 ph10 836 #endif
685    
686     if (mode) {
687     if (tables_copy)
688     free(tables_copy);
689     tables_copy = NULL;
690     return NULL;
691     }
692    
693     if (tables_copy)
694     return tables_copy;
695    
696     default_tables = NULL;
697     #ifdef SUPPORT_PCRE8
698     regex = pcre_compile(null_str, 0, &errorptr, &erroroffset, NULL);
699     if (regex) {
700     pcre_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
701     pcre_free(regex);
702     }
703     #else
704     regex = pcre16_compile(null_str, 0, &errorptr, &erroroffset, NULL);
705     if (regex) {
706     pcre16_fullinfo(regex, NULL, PCRE_INFO_DEFAULT_TABLES, &default_tables);
707     pcre16_free(regex);
708     }
709     #endif
710     /* Shouldn't ever happen. */
711     if (!default_tables)
712     return NULL;
713    
714     /* Unfortunately this value cannot get from pcre_fullinfo.
715     Since this is a test program, this is acceptable at the moment. */
716     tables_copy = (unsigned char *)malloc(1088);
717     if (!tables_copy)
718     return NULL;
719    
720     memcpy(tables_copy, default_tables, 1088);
721     return tables_copy;
722     }
723    
724 ph10 881 #ifdef SUPPORT_PCRE8
725 zherczeg 852 static pcre_jit_stack* callback8(void *arg)
726 ph10 836 {
727     return (pcre_jit_stack *)arg;
728     }
729 ph10 881 #endif
730 ph10 836
731 ph10 881 #ifdef SUPPORT_PCRE16
732 zherczeg 852 static pcre16_jit_stack* callback16(void *arg)
733     {
734     return (pcre16_jit_stack *)arg;
735     }
736 ph10 881 #endif
737 zherczeg 852
738 ph10 836 #ifdef SUPPORT_PCRE8
739     static void setstack8(pcre_extra *extra)
740     {
741     static pcre_jit_stack *stack;
742    
743     if (!extra) {
744     if (stack)
745     pcre_jit_stack_free(stack);
746     stack = NULL;
747     return;
748     }
749    
750     if (!stack)
751     stack = pcre_jit_stack_alloc(1, 1024 * 1024);
752     /* Extra can be NULL. */
753 zherczeg 852 pcre_assign_jit_stack(extra, callback8, stack);
754 ph10 836 }
755     #endif /* SUPPORT_PCRE8 */
756    
757     #ifdef SUPPORT_PCRE16
758 zherczeg 850 static void setstack16(pcre16_extra *extra)
759 ph10 836 {
760 zherczeg 852 static pcre16_jit_stack *stack;
761 ph10 836
762     if (!extra) {
763     if (stack)
764     pcre16_jit_stack_free(stack);
765     stack = NULL;
766     return;
767     }
768    
769     if (!stack)
770     stack = pcre16_jit_stack_alloc(1, 1024 * 1024);
771     /* Extra can be NULL. */
772 zherczeg 852 pcre16_assign_jit_stack(extra, callback16, stack);
773 ph10 836 }
774     #endif /* SUPPORT_PCRE8 */
775    
776     #ifdef SUPPORT_PCRE16
777    
778 zherczeg 860 static int convert_utf8_to_utf16(const char *input, PCRE_UCHAR16 *output, int *offsetmap, int max_length)
779 ph10 836 {
780     unsigned char *iptr = (unsigned char*)input;
781     unsigned short *optr = (unsigned short *)output;
782     unsigned int c;
783    
784     if (max_length == 0)
785     return 0;
786    
787     while (*iptr && max_length > 1) {
788     c = 0;
789     if (offsetmap)
790     *offsetmap++ = (int)(iptr - (unsigned char*)input);
791    
792     if (!(*iptr & 0x80))
793     c = *iptr++;
794     else if (!(*iptr & 0x20)) {
795     c = ((iptr[0] & 0x1f) << 6) | (iptr[1] & 0x3f);
796     iptr += 2;
797     } else if (!(*iptr & 0x10)) {
798     c = ((iptr[0] & 0x0f) << 12) | ((iptr[1] & 0x3f) << 6) | (iptr[2] & 0x3f);
799     iptr += 3;
800     } else if (!(*iptr & 0x08)) {
801     c = ((iptr[0] & 0x07) << 18) | ((iptr[1] & 0x3f) << 12) | ((iptr[2] & 0x3f) << 6) | (iptr[3] & 0x3f);
802     iptr += 4;
803     }
804    
805     if (c < 65536) {
806     *optr++ = c;
807     max_length--;
808     } else if (max_length <= 2) {
809     *optr = '\0';
810     return (int)(optr - (unsigned short *)output);
811     } else {
812     c -= 0x10000;
813     *optr++ = 0xd800 | ((c >> 10) & 0x3ff);
814     *optr++ = 0xdc00 | (c & 0x3ff);
815     max_length -= 2;
816     if (offsetmap)
817     offsetmap++;
818     }
819     }
820     if (offsetmap)
821     *offsetmap = (int)(iptr - (unsigned char*)input);
822     *optr = '\0';
823     return (int)(optr - (unsigned short *)output);
824     }
825    
826 zherczeg 860 static int copy_char8_to_char16(const char *input, PCRE_UCHAR16 *output, int max_length)
827 ph10 836 {
828     unsigned char *iptr = (unsigned char*)input;
829     unsigned short *optr = (unsigned short *)output;
830    
831     if (max_length == 0)
832     return 0;
833    
834     while (*iptr && max_length > 1) {
835     *optr++ = *iptr++;
836     max_length--;
837     }
838     *optr = '\0';
839     return (int)(optr - (unsigned short *)output);
840     }
841    
842     #define REGTEST_MAX_LENGTH 4096
843 zherczeg 860 static PCRE_UCHAR16 regtest_buf[REGTEST_MAX_LENGTH];
844 ph10 836 static int regtest_offsetmap[REGTEST_MAX_LENGTH];
845    
846     #endif /* SUPPORT_PCRE16 */
847    
848     static int check_ascii(const char *input)
849     {
850     const unsigned char *ptr = (unsigned char *)input;
851     while (*ptr) {
852     if (*ptr > 127)
853     return 0;
854     ptr++;
855     }
856     return 1;
857     }
858    
859 ph10 677 static int regression_tests(void)
860 ph10 667 {
861     struct regression_test_case *current = regression_test_cases;
862     const char *error;
863     int i, err_offs;
864 ph10 836 int is_successful, is_ascii_pattern, is_ascii_input;
865     int total = 0;
866     int successful = 0;
867 ph10 667 int counter = 0;
868 ph10 836 #ifdef SUPPORT_PCRE8
869     pcre *re8;
870     pcre_extra *extra8;
871     int ovector8_1[32];
872     int ovector8_2[32];
873     int return_value8_1, return_value8_2;
874     int utf8 = 0, ucp8 = 0;
875     int disabled_flags8 = 0;
876     #endif
877     #ifdef SUPPORT_PCRE16
878 zherczeg 852 pcre16 *re16;
879 zherczeg 850 pcre16_extra *extra16;
880 ph10 836 int ovector16_1[32];
881     int ovector16_2[32];
882     int return_value16_1, return_value16_2;
883     int utf16 = 0, ucp16 = 0;
884     int disabled_flags16 = 0;
885     int length16;
886     #endif
887 ph10 667
888 ph10 698 /* This test compares the behaviour of interpreter and JIT. Although disabling
889 ph10 836 utf or ucp may make tests fail, if the pcre_exec result is the SAME, it is
890 ph10 698 still considered successful from pcre_jit_test point of view. */
891    
892 ph10 836 printf("Running JIT regression\n");
893    
894     #ifdef SUPPORT_PCRE8
895 ph10 698 pcre_config(PCRE_CONFIG_UTF8, &utf8);
896 ph10 836 pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp8);
897 ph10 698 if (!utf8)
898 ph10 836 disabled_flags8 |= PCRE_UTF8;
899     if (!ucp8)
900     disabled_flags8 |= PCRE_UCP;
901     printf(" in 8 bit mode with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp8 ? "enabled" : "disabled");
902     #endif
903     #ifdef SUPPORT_PCRE16
904     pcre16_config(PCRE_CONFIG_UTF16, &utf16);
905     pcre16_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp16);
906     if (!utf16)
907     disabled_flags16 |= PCRE_UTF8;
908     if (!ucp16)
909     disabled_flags16 |= PCRE_UCP;
910     printf(" in 16 bit mode with utf16 %s and ucp %s:\n", utf16 ? "enabled" : "disabled", ucp16 ? "enabled" : "disabled");
911     #endif
912 ph10 698
913 ph10 667 while (current->pattern) {
914 ph10 698 /* printf("\nPattern: %s :\n", current->pattern); */
915 ph10 667 total++;
916 ph10 836 if (current->start_offset & F_PROPERTY) {
917     is_ascii_pattern = 0;
918     is_ascii_input = 0;
919     } else {
920     is_ascii_pattern = check_ascii(current->pattern);
921     is_ascii_input = check_ascii(current->input);
922     }
923 ph10 667
924     error = NULL;
925 ph10 836 #ifdef SUPPORT_PCRE8
926     re8 = NULL;
927     if (!(current->start_offset & F_NO8))
928     re8 = pcre_compile(current->pattern,
929     current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags8),
930     &error, &err_offs, tables(0));
931 ph10 667
932 ph10 836 extra8 = NULL;
933     if (re8) {
934     error = NULL;
935     extra8 = pcre_study(re8, PCRE_STUDY_JIT_COMPILE, &error);
936     if (!extra8) {
937     printf("\n8 bit: Cannot study pattern: %s\n", current->pattern);
938     pcre_free(re8);
939     re8 = NULL;
940 ph10 698 }
941 ph10 836 if (!(extra8->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
942     printf("\n8 bit: JIT compiler does not support: %s\n", current->pattern);
943     pcre_free_study(extra8);
944     pcre_free(re8);
945     re8 = NULL;
946     }
947     } else if (((utf8 && ucp8) || is_ascii_pattern) && !(current->start_offset & F_NO8))
948     printf("\n8 bit: Cannot compile pattern: %s\n", current->pattern);
949     #endif
950     #ifdef SUPPORT_PCRE16
951     if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
952     convert_utf8_to_utf16(current->pattern, regtest_buf, NULL, REGTEST_MAX_LENGTH);
953     else
954     copy_char8_to_char16(current->pattern, regtest_buf, REGTEST_MAX_LENGTH);
955 ph10 667
956 ph10 836 re16 = NULL;
957     if (!(current->start_offset & F_NO16))
958     re16 = pcre16_compile(regtest_buf,
959     current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags16),
960     &error, &err_offs, tables(0));
961 ph10 667
962 ph10 836 extra16 = NULL;
963     if (re16) {
964     error = NULL;
965     extra16 = pcre16_study(re16, PCRE_STUDY_JIT_COMPILE, &error);
966     if (!extra16) {
967     printf("\n16 bit: Cannot study pattern: %s\n", current->pattern);
968     pcre16_free(re16);
969     re16 = NULL;
970     }
971     if (!(extra16->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
972     printf("\n16 bit: JIT compiler does not support: %s\n", current->pattern);
973     pcre16_free_study(extra16);
974     pcre16_free(re16);
975     re16 = NULL;
976     }
977     } else if (((utf16 && ucp16) || is_ascii_pattern) && !(current->start_offset & F_NO16))
978     printf("\n16 bit: Cannot compile pattern: %s\n", current->pattern);
979     #endif
980 ph10 667
981     counter++;
982 ph10 836 if ((counter & 0x3) != 0) {
983     #ifdef SUPPORT_PCRE8
984     setstack8(NULL);
985     #endif
986     #ifdef SUPPORT_PCRE16
987     setstack16(NULL);
988     #endif
989     }
990 ph10 667
991 ph10 836 #ifdef SUPPORT_PCRE8
992     return_value8_1 = -1000;
993     return_value8_2 = -1000;
994 ph10 667 for (i = 0; i < 32; ++i)
995 ph10 836 ovector8_1[i] = -2;
996     for (i = 0; i < 32; ++i)
997     ovector8_2[i] = -2;
998     if (re8) {
999     setstack8(extra8);
1000     return_value8_1 = pcre_exec(re8, extra8, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1001     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_1, 32);
1002     return_value8_2 = pcre_exec(re8, NULL, current->input, strlen(current->input), current->start_offset & OFFSET_MASK,
1003     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector8_2, 32);
1004     }
1005     #endif
1006 ph10 667
1007 ph10 836 #ifdef SUPPORT_PCRE16
1008     return_value16_1 = -1000;
1009     return_value16_2 = -1000;
1010 ph10 667 for (i = 0; i < 32; ++i)
1011 ph10 836 ovector16_1[i] = -2;
1012     for (i = 0; i < 32; ++i)
1013     ovector16_2[i] = -2;
1014     if (re16) {
1015     setstack16(extra16);
1016     if ((current->flags & PCRE_UTF8) || (current->start_offset & F_FORCECONV))
1017     length16 = convert_utf8_to_utf16(current->input, regtest_buf, regtest_offsetmap, REGTEST_MAX_LENGTH);
1018     else
1019     length16 = copy_char8_to_char16(current->input, regtest_buf, REGTEST_MAX_LENGTH);
1020     return_value16_1 = pcre16_exec(re16, extra16, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1021     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_1, 32);
1022     return_value16_2 = pcre16_exec(re16, NULL, regtest_buf, length16, current->start_offset & OFFSET_MASK,
1023     current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector16_2, 32);
1024     }
1025     #endif
1026 ph10 667
1027 ph10 836 /* If F_DIFF is set, just run the test, but do not compare the results.
1028 ph10 667 Segfaults can still be captured. */
1029    
1030 ph10 836 is_successful = 1;
1031     if (!(current->start_offset & F_DIFF)) {
1032     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1033     if (utf8 == utf16 && !(current->start_offset & F_FORCECONV)) {
1034     /* All results must be the same. */
1035     if (return_value8_1 != return_value8_2 || return_value8_1 != return_value16_1 || return_value8_1 != return_value16_2) {
1036     printf("\n8 and 16 bit: Return value differs(%d:%d:%d:%d): [%d] '%s' @ '%s'\n",
1037     return_value8_1, return_value8_2, return_value16_1, return_value16_2,
1038     total, current->pattern, current->input);
1039     is_successful = 0;
1040     } else if (return_value8_1 >= 0) {
1041     return_value8_1 *= 2;
1042     /* Transform back the results. */
1043     if (current->flags & PCRE_UTF8) {
1044     for (i = 0; i < return_value8_1; ++i) {
1045     if (ovector16_1[i] >= 0)
1046     ovector16_1[i] = regtest_offsetmap[ovector16_1[i]];
1047     if (ovector16_2[i] >= 0)
1048     ovector16_2[i] = regtest_offsetmap[ovector16_2[i]];
1049     }
1050 ph10 667 }
1051 ph10 836
1052     for (i = 0; i < return_value8_1; ++i)
1053     if (ovector8_1[i] != ovector8_2[i] || ovector8_1[i] != ovector16_1[i] || ovector8_1[i] != ovector16_2[i]) {
1054     printf("\n8 and 16 bit: Ovector[%d] value differs(%d:%d:%d:%d): [%d] '%s' @ '%s' \n",
1055     i, ovector8_1[i], ovector8_2[i], ovector16_1[i], ovector16_2[i],
1056     total, current->pattern, current->input);
1057     is_successful = 0;
1058     }
1059 ph10 667 }
1060 ph10 836 } else {
1061     #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1062     /* Only the 8 bit and 16 bit results must be equal. */
1063     #ifdef SUPPORT_PCRE8
1064     if (return_value8_1 != return_value8_2) {
1065     printf("\n8 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1066     return_value8_1, return_value8_2, total, current->pattern, current->input);
1067     is_successful = 0;
1068     } else if (return_value8_1 >= 0) {
1069     return_value8_1 *= 2;
1070     for (i = 0; i < return_value8_1; ++i)
1071     if (ovector8_1[i] != ovector8_2[i]) {
1072     printf("\n8 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1073     i, ovector8_1[i], ovector8_2[i], total, current->pattern, current->input);
1074     is_successful = 0;
1075     }
1076     }
1077     #endif
1078    
1079     #ifdef SUPPORT_PCRE16
1080     if (return_value16_1 != return_value16_2) {
1081     printf("\n16 bit: Return value differs(%d:%d): [%d] '%s' @ '%s'\n",
1082     return_value16_1, return_value16_2, total, current->pattern, current->input);
1083     is_successful = 0;
1084     } else if (return_value16_1 >= 0) {
1085     return_value16_1 *= 2;
1086     for (i = 0; i < return_value16_1; ++i)
1087     if (ovector16_1[i] != ovector16_2[i]) {
1088     printf("\n16 bit: Ovector[%d] value differs(%d:%d): [%d] '%s' @ '%s'\n",
1089     i, ovector16_1[i], ovector16_2[i], total, current->pattern, current->input);
1090     is_successful = 0;
1091     }
1092     }
1093     #endif
1094    
1095     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1096 ph10 667 }
1097 ph10 836 #endif /* SUPPORT_PCRE8 && SUPPORT_PCRE16 */
1098 ph10 667 }
1099    
1100 ph10 836 if (is_successful) {
1101     #ifdef SUPPORT_PCRE8
1102     if (!(current->start_offset & F_NO8) && ((utf8 && ucp8) || is_ascii_input)) {
1103     if (return_value8_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1104     printf("8 bit: Test should match: [%d] '%s' @ '%s'\n",
1105     total, current->pattern, current->input);
1106     is_successful = 0;
1107     }
1108 ph10 667
1109 ph10 836 if (return_value8_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1110     printf("8 bit: Test should not match: [%d] '%s' @ '%s'\n",
1111     total, current->pattern, current->input);
1112     is_successful = 0;
1113     }
1114     }
1115     #endif
1116     #ifdef SUPPORT_PCRE16
1117     if (!(current->start_offset & F_NO16) && ((utf16 && ucp16) || is_ascii_input)) {
1118     if (return_value16_1 < 0 && !(current->start_offset & F_NOMATCH)) {
1119     printf("16 bit: Test should match: [%d] '%s' @ '%s'\n",
1120     total, current->pattern, current->input);
1121     is_successful = 0;
1122     }
1123    
1124     if (return_value16_1 >= 0 && (current->start_offset & F_NOMATCH)) {
1125     printf("16 bit: Test should not match: [%d] '%s' @ '%s'\n",
1126     total, current->pattern, current->input);
1127     is_successful = 0;
1128     }
1129     }
1130     #endif
1131     }
1132    
1133     if (is_successful)
1134     successful++;
1135    
1136     #ifdef SUPPORT_PCRE8
1137     if (re8) {
1138     pcre_free_study(extra8);
1139     pcre_free(re8);
1140     }
1141     #endif
1142     #ifdef SUPPORT_PCRE16
1143     if (re16) {
1144     pcre16_free_study(extra16);
1145     pcre16_free(re16);
1146     }
1147     #endif
1148    
1149     /* printf("[%d-%d|%d-%d]%s", ovector8_1[0], ovector8_1[1], ovector16_1[0], ovector16_1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
1150 ph10 667 printf(".");
1151     fflush(stdout);
1152     current++;
1153     }
1154 ph10 836 tables(1);
1155     #ifdef SUPPORT_PCRE8
1156     setstack8(NULL);
1157     #endif
1158     #ifdef SUPPORT_PCRE16
1159     setstack16(NULL);
1160     #endif
1161 ph10 667
1162 ph10 836 if (total == successful) {
1163 ph10 667 printf("\nAll JIT regression tests are successfully passed.\n");
1164 ph10 677 return 0;
1165 ph10 698 } else {
1166 ph10 836 printf("\nSuccessful test ratio: %d%% (%d failed)\n", successful * 100 / total, total - successful);
1167 ph10 698 return 1;
1168     }
1169 ph10 667 }
1170    
1171     /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12