/[pcre]/code/trunk/pcre_jit_test.c
ViewVC logotype

Contents of /code/trunk/pcre_jit_test.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 736 - (hide annotations) (download)
Sun Oct 16 15:48:03 2011 UTC (2 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 32241 byte(s)
Support OP_ANYBYTE in JIT when utf8 is disabled and optimizing utf8 character length computation
1 ph10 667 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Main Library written by Philip Hazel
9     Copyright (c) 1997-2011 University of Cambridge
10    
11     This JIT compiler regression test program was written by Zoltan Herczeg
12     Copyright (c) 2010-2011
13    
14     -----------------------------------------------------------------------------
15     Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17    
18     * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20    
21     * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24    
25     * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28    
29     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40     -----------------------------------------------------------------------------
41     */
42    
43 ph10 698 #ifdef HAVE_CONFIG_H
44     #include "config.h"
45     #endif
46    
47 ph10 667 #include <stdio.h>
48     #include <string.h>
49     #include "pcre.h"
50    
51     #define PCRE_BUG 0x80000000
52    
53     /*
54     Hungarian utf8 characters
55     \xc3\xa9 = 0xe9 = 233 (e') \xc3\x89 = 0xc9 = 201 (E')
56     \xc3\xa1 = 0xe1 = 225 (a') \xc3\x81 = 0xc1 = 193 (A')
57     \xe6\x92\xad = 0x64ad = 25773 (a valid kanji)
58     \xc2\x85 = 0x85 (NExt Line = NEL)
59     \xc2\xa1 = 0xa1 (Inverted Exclamation Mark)
60     \xe2\x80\xa8 = 0x2028 (Line Separator)
61     \xc8\xba = 570 \xe2\xb1\xa5 = 11365 (lowercase length != uppercase length)
62     \xcc\x8d = 781 (Something with Mark property)
63 ph10 691 */
64 ph10 667
65     static void setstack(pcre_extra *extra);
66 ph10 677 static int regression_tests(void);
67 ph10 667
68     int main(void)
69     {
70 ph10 698 int jit = 0;
71     pcre_config(PCRE_CONFIG_JIT, &jit);
72     if (!jit) {
73     printf("JIT must be enabled to run pcre_jit_test\n");
74     return 1;
75     }
76     return regression_tests();
77 ph10 667 }
78    
79     static pcre_jit_stack* callback(void *arg)
80     {
81     return (pcre_jit_stack *)arg;
82     }
83    
84     static void setstack(pcre_extra *extra)
85     {
86     static pcre_jit_stack *stack;
87     if (stack) pcre_jit_stack_free(stack);
88     stack = pcre_jit_stack_alloc(1, 1024 * 1024);
89 ph10 675 pcre_assign_jit_stack(extra, callback, stack);
90 ph10 667 }
91    
92     /* --------------------------------------------------------------------------------------- */
93    
94     #define MUA (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
95     #define MUAP (PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
96     #define CMUA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF)
97     #define CMUAP (PCRE_CASELESS | PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
98     #define MA (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
99     #define MAP (PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF | PCRE_UCP)
100     #define CMA (PCRE_CASELESS | PCRE_MULTILINE | PCRE_NEWLINE_ANYCRLF)
101    
102     struct regression_test_case {
103     int flags;
104     int start_offset;
105     const char *pattern;
106     const char *input;
107     };
108    
109     static struct regression_test_case regression_test_cases[] = {
110     /* Constant strings. */
111     { MUA, 0, "AbC", "AbAbC" },
112     { MUA, 0, "ACCEPT", "AACACCACCEACCEPACCEPTACCEPTT" },
113     { CMUA, 0, "aA#\xc3\xa9\xc3\x81", "aA#Aa#\xc3\x89\xc3\xa1" },
114     { MA, 0, "[^a]", "aAbB" },
115     { CMA, 0, "[^m]", "mMnN" },
116     { MA, 0, "a[^b][^#]", "abacd" },
117     { CMA, 0, "A[^B][^E]", "abacd" },
118     { CMUA, 0, "[^x][^#]", "XxBll" },
119     { MUA, 0, "[^a]", "aaa\xc3\xa1#Ab" },
120     { CMUA, 0, "[^A]", "aA\xe6\x92\xad" },
121     { MUA, 0, "\\W(\\W)?\\w", "\r\n+bc" },
122     { MUA, 0, "\\W(\\W)?\\w", "\n\r+bc" },
123     { MUA, 0, "\\W(\\W)?\\w", "\r\r+bc" },
124     { MUA, 0, "\\W(\\W)?\\w", "\n\n+bc" },
125     { MUA, 0, "[axd]", "sAXd" },
126     { CMUA, 0, "[axd]", "sAXd" },
127     { CMUA, 0, "[^axd]", "DxA" },
128     { MUA, 0, "[a-dA-C]", "\xe6\x92\xad\xc3\xa9.B" },
129     { MUA, 0, "[^a-dA-C]", "\xe6\x92\xad\xc3\xa9" },
130     { CMUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
131     { MUA, 0, "[^\xc3\xa9]", "\xc3\xa9\xc3\x89." },
132 ph10 698 { MUA, 0, "[^a]", "\xc2\x80[]" },
133 ph10 667 { CMUA, 0, "\xf0\x90\x90\xa7", "\xf0\x90\x91\x8f" },
134     { CMA, 0, "1a2b3c4", "1a2B3c51A2B3C4" },
135     { PCRE_CASELESS, 0, "\xff#a", "\xff#\xff\xfe##\xff#A" },
136     { PCRE_CASELESS, 0, "\xfe", "\xff\xfc#\xfe\xfe" },
137     { PCRE_CASELESS, 0, "a1", "Aa1" },
138 zherczeg 736 { MA, 0, "\\Ca", "cda" },
139     { CMA, 0, "\\Ca", "CDA" },
140     { MA, 0, "\\Cx", "cda" },
141     { CMA, 0, "\\Cx", "CDA" },
142 ph10 667
143     /* Assertions. */
144     { MUA, 0, "\\b[^A]", "A_B#" },
145     { MA, 0, "\\b\\W", "\n*" },
146     { MUA, 0, "\\B[^,]\\b[^s]\\b", "#X" },
147     { MAP, 0, "\\B", "_\xa1" },
148     { MAP, 0, "\\b_\\b[,A]\\B", "_," },
149     { MUAP, 0, "\\b", "\xe6\x92\xad!" },
150     { MUAP, 0, "\\B", "_\xc2\xa1\xc3\xa1\xc2\x85" },
151     { MUAP, 0, "\\b[^A]\\B[^c]\\b[^_]\\B", "_\xc3\xa1\xe2\x80\xa8" },
152     { MUAP, 0, "\\b\\w+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
153     { MUA, 0, "\\b.", "\xcd\xbe" },
154     { MA, 0, "\\R^", "\n" },
155     { MA, 1, "^", "\n" },
156     { 0, 0, "^ab", "ab" },
157     { 0, 0, "^ab", "aab" },
158     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "^a", "\r\raa\n\naa\r\naa" },
159     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "^-", "\xe2\x80\xa8--\xc2\x85-\r\n-" },
160     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^-", "a--b--\x85--" },
161     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xe2\x80\xa8--" },
162     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "^-", "a--\xc2\x85--" },
163     { 0, 0, "ab$", "ab" },
164     { 0, 0, "ab$", "ab\r\n" },
165     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "a$", "\r\raa\n\naa\r\naa" },
166     { PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aaa" },
167     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANYCRLF, 0, "#$", "#\xc2\x85###\r#" },
168     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY, 0, "#$", "#\xe2\x80\xa9" },
169     { PCRE_NOTBOL | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
170     { PCRE_NOTBOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "^a", "aa\naa" },
171     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
172     { PCRE_NOTEOL | PCRE_NEWLINE_ANY, 0, "a$", "aa\r\n" },
173     { PCRE_UTF8 | PCRE_DOLLAR_ENDONLY | PCRE_NEWLINE_ANY, 0, "\\p{Any}{2,}$", "aa\r\n" },
174     { PCRE_NOTEOL | PCRE_MULTILINE | PCRE_NEWLINE_ANY, 0, "a$", "aa\naa" },
175     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa" },
176     { PCRE_NEWLINE_CR | PCRE_UTF8, 0, "a\\Z", "aaa\r" },
177     { PCRE_NEWLINE_CR, 0, ".\\Z", "aaa\n" },
178     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r" },
179     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
180     { PCRE_NEWLINE_CRLF, 0, ".\\Z", "aaa\r\n" },
181     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
182     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
183     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
184     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
185     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
186     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa" },
187     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r" },
188     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\n" },
189     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".\\Z", "aaa\r\n" },
190     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xc2\x85" },
191     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, ".\\Z", "aaa\xe2\x80\xa8" },
192     { MA, 0, "\\Aa", "aaa" },
193     { MA, 1, "\\Aa", "aaa" },
194     { MA, 1, "\\Ga", "aaa" },
195     { MA, 1, "\\Ga", "aba" },
196     { MA, 0, "a\\z", "aaa" },
197     { MA, 0, "a\\z", "aab" },
198    
199     /* Brackets. */
200     { MUA, 0, "(ab|bb|cd)", "bacde" },
201     { MUA, 0, "(?:ab|a)(bc|c)", "ababc" },
202     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|efg))", "abac" },
203     { CMUA, 0, "((aB|(Cc))|(bB)|(?:cd|EFg))", "AcCe" },
204     { MUA, 0, "((ab|(cc))|(bb)|(?:cd|ebg))", "acebebg" },
205     { MUA, 0, "(?:(a)|(?:b))(cc|(?:d|e))(a|b)k", "accabdbbccbk" },
206    
207     /* Greedy and non-greedy ? operators. */
208     { MUA, 0, "(?:a)?a", "laab" },
209     { CMUA, 0, "(A)?A", "llaab" },
210     { MUA, 0, "(a)?\?a", "aab" }, /* ?? is the prefix of trygraphs in GCC. */
211     { MUA, 0, "(a)?a", "manm" },
212     { CMUA, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
213     { MUA, 0, "(a|b)?\?d((?:e)?)", "abcde" },
214     { MUA, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
215    
216     /* Greedy and non-greedy + operators */
217     { MUA, 0, "(aa)+aa", "aaaaaaa" },
218     { MUA, 0, "(aa)+?aa", "aaaaaaa" },
219     { MUA, 0, "(?:aba|ab|a)+l", "ababamababal" },
220     { MUA, 0, "(?:aba|ab|a)+?l", "ababamababal" },
221     { MUA, 0, "(a(?:bc|cb|b|c)+?|ss)+e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
222     { MUA, 0, "(a(?:bc|cb|b|c)+|ss)+?e", "accssabccbcacbccbbXaccssabccbcacbccbbe" },
223     { MUA, 0, "(?:(b(c)+?)+)?\?(?:(bc)+|(cb)+)+(?:m)+", "bccbcccbcbccbcbPbccbcccbcbccbcbmmn" },
224    
225     /* Greedy and non-greedy * operators */
226     { CMUA, 0, "(?:AA)*AB", "aaaaaaamaaaaaaab" },
227     { MUA, 0, "(?:aa)*?ab", "aaaaaaamaaaaaaab" },
228     { MUA, 0, "(aa|ab)*ab", "aaabaaab" },
229     { CMUA, 0, "(aa|Ab)*?aB", "aaabaaab" },
230     { MUA, 0, "(a|b)*(?:a)*(?:b)*m", "abbbaaababanabbbaaababamm" },
231     { MUA, 0, "(a|b)*?(?:a)*?(?:b)*?m", "abbbaaababanabbbaaababamm" },
232     { MA, 0, "a(a(\\1*)a|(b)b+){0}a", "aa" },
233     { MA, 0, "((?:a|)*){0}a", "a" },
234    
235     /* Combining ? + * operators */
236     { MUA, 0, "((bm)+)?\?(?:a)*(bm)+n|((am)+?)?(?:a)+(am)*n", "bmbmabmamaaamambmaman" },
237     { MUA, 0, "(((ab)?cd)*ef)+g", "abcdcdefcdefefmabcdcdefcdefefgg" },
238     { MUA, 0, "(((ab)?\?cd)*?ef)+?g", "abcdcdefcdefefmabcdcdefcdefefgg" },
239     { MUA, 0, "(?:(ab)?c|(?:ab)+?d)*g", "ababcdccababddg" },
240     { MUA, 0, "(?:(?:ab)?\?c|(ab)+d)*?g", "ababcdccababddg" },
241    
242     /* Single character iterators. */
243     { MUA, 0, "(a+aab)+aaaab", "aaaabcaaaabaabcaabcaaabaaaab" },
244     { MUA, 0, "(a*a*aab)+x", "aaaaabaabaaabmaabx" },
245     { MUA, 0, "(a*?(b|ab)a*?)+x", "aaaabcxbbaabaacbaaabaabax" },
246     { MUA, 0, "(a+(ab|ad)a+)+x", "aaabaaaadaabaaabaaaadaaax" },
247     { MUA, 0, "(a?(a)a?)+(aaa)", "abaaabaaaaaaaa" },
248     { MUA, 0, "(a?\?(a)a?\?)+(b)", "aaaacaaacaacacbaaab" },
249     { MUA, 0, "(a{0,4}(b))+d", "aaaaaabaabcaaaaabaaaaabd" },
250     { MUA, 0, "(a{0,4}?[^b])+d+(a{0,4}[^b])d+", "aaaaadaaaacaadddaaddd" },
251     { MUA, 0, "(ba{2})+c", "baabaaabacbaabaac" },
252     { MUA, 0, "(a*+bc++)+", "aaabbcaaabcccab" },
253     { MUA, 0, "(a?+[^b])+", "babaacacb" },
254     { MUA, 0, "(a{0,3}+b)(a{0,3}+b)(a{0,3}+)[^c]", "abaabaaacbaabaaaac" },
255     { CMUA, 0, "([a-c]+[d-f]+?)+?g", "aBdacdehAbDaFgA" },
256     { CMUA, 0, "[c-f]+k", "DemmFke" },
257     { MUA, 0, "([DGH]{0,4}M)+", "GGDGHDGMMHMDHHGHM" },
258     { MUA, 0, "([a-c]{4,}s)+", "abasabbasbbaabsbba" },
259     { CMUA, 0, "[ace]{3,7}", "AcbDAcEEcEd" },
260     { CMUA, 0, "[ace]{3,7}?", "AcbDAcEEcEd" },
261     { CMUA, 0, "[ace]{3,}", "AcbDAcEEcEd" },
262     { CMUA, 0, "[ace]{3,}?", "AcbDAcEEcEd" },
263     { MUA, 0, "[ckl]{2,}?g", "cdkkmlglglkcg" },
264     { CMUA, 0, "[ace]{5}?", "AcCebDAcEEcEd" },
265     { MUA, 0, "([AbC]{3,5}?d)+", "BACaAbbAEAACCbdCCbdCCAAbb" },
266     { MUA, 0, "([^ab]{0,}s){2}", "abaabcdsABamsDDs" },
267     { MUA, 0, "\\b\\w+\\B", "x,a_cd" },
268     { MUAP, 0, "\\b[^\xc2\xa1]+\\B", "\xc3\x89\xc2\xa1\xe6\x92\xad\xc3\x81\xc3\xa1" },
269     { CMUA, 0, "[^b]+(a*)([^c]?d{3})", "aaaaddd" },
270    
271     /* Basic character sets. */
272     { MUA, 0, "(?:\\s)+(?:\\S)+", "ab \t\xc3\xa9\xe6\x92\xad " },
273     { MUA, 0, "(\\w)*(k)(\\W)?\?", "abcdef abck11" },
274     { MUA, 0, "\\((\\d)+\\)\\D", "a() (83 (8)2 (9)ab" },
275     { MUA, 0, "\\w(\\s|(?:\\d)*,)+\\w\\wb", "a 5, 4,, bb 5, 4,, aab" },
276     { MUA, 0, "(\\v+)(\\V+)", "\x0e\xc2\x85\xe2\x80\xa8\x0b\x09\xe2\x80\xa9" },
277     { MUA, 0, "(\\h+)(\\H+)", "\xe2\x80\xa8\xe2\x80\x80\x20\xe2\x80\x8a\xe2\x81\x9f\xe3\x80\x80\x09\x20\xc2\xa0\x0a" },
278    
279     /* Unicode properties. */
280     { MUAP, 0, "[1-5\xc3\xa9\\w]", "\xc3\xa1_" },
281     { MUAP, 0, "[\xc3\x81\\p{Ll}]", "A_\xc3\x89\xc3\xa1" },
282     { MUAP, 0, "[\\Wd-h_x-z]+", "a\xc2\xa1#_yhzdxi" },
283     { MUAP, 0, "[\\P{Any}]", "abc" },
284     { MUAP, 0, "[^\\p{Any}]", "abc" },
285     { MUAP, 0, "[\\P{Any}\xc3\xa1-\xc3\xa8]", "abc" },
286     { MUAP, 0, "[^\\p{Any}\xc3\xa1-\xc3\xa8]", "abc" },
287     { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
288     { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
289     { MUAP, 0, "[\xc3\xa1-\xc3\xa8\\p{Any}]", "abc" },
290     { MUAP, 0, "[^\xc3\xa1-\xc3\xa8\\P{Any}]", "abc" },
291     { MUAP, 0, "[b-\xc3\xa9\\s]", "a\xc\xe6\x92\xad" },
292     { CMUAP, 0, "[\xc2\x85-\xc2\x89\xc3\x89]", "\xc2\x84\xc3\xa9" },
293     { MUAP, 0, "[^b-d^&\\s]{3,}", "db^ !a\xe2\x80\xa8_ae" },
294     { MUAP, 0, "[^\\S\\P{Any}][\\sN]{1,3}[\\P{N}]{4}", "\xe2\x80\xaa\xa N\x9\xc3\xa9_0" },
295     { MUA, 0, "[^\\P{L}\x9!D-F\xa]{2,3}", "\x9,.DF\xa.CG\xc3\x81" },
296     { CMUAP, 0, "[\xc3\xa1-\xc3\xa9_\xe2\x80\xa0-\xe2\x80\xaf]{1,5}[^\xe2\x80\xa0-\xe2\x80\xaf]", "\xc2\xa1\xc3\x89\xc3\x89\xe2\x80\xaf_\xe2\x80\xa0" },
297     { MUAP, 0, "[\xc3\xa2-\xc3\xa6\xc3\x81-\xc3\x84\xe2\x80\xa8-\xe2\x80\xa9\xe6\x92\xad\\p{Zs}]{2,}", "\xe2\x80\xa7\xe2\x80\xa9\xe6\x92\xad \xe6\x92\xae" },
298     { MUAP, 0, "[\\P{L&}]{2}[^\xc2\x85-\xc2\x89\\p{Ll}\\p{Lu}]{2}", "\xc3\xa9\xe6\x92\xad.a\xe6\x92\xad|\xc2\x8a#" },
299     { PCRE_UCP, 0, "[a-b\\s]{2,5}[^a]", "AB baaa" },
300    
301     /* Possible empty brackets. */
302     { MUA, 0, "(?:|ab||bc|a)+d", "abcxabcabd" },
303     { MUA, 0, "(|ab||bc|a)+d", "abcxabcabd" },
304     { MUA, 0, "(?:|ab||bc|a)*d", "abcxabcabd" },
305     { MUA, 0, "(|ab||bc|a)*d", "abcxabcabd" },
306     { MUA, 0, "(?:|ab||bc|a)+?d", "abcxabcabd" },
307     { MUA, 0, "(|ab||bc|a)+?d", "abcxabcabd" },
308     { MUA, 0, "(?:|ab||bc|a)*?d", "abcxabcabd" },
309     { MUA, 0, "(|ab||bc|a)*?d", "abcxabcabd" },
310     { MUA, 0, "(((a)*?|(?:ba)+)+?|(?:|c|ca)*)*m", "abaacaccabacabalabaacaccabacabamm" },
311     { MUA, 0, "(?:((?:a)*|(ba)+?)+|(|c|ca)*?)*?m", "abaacaccabacabalabaacaccabacabamm" },
312    
313     /* Start offset. */
314     { MUA, 3, "(\\d|(?:\\w)*\\w)+", "0ac01Hb" },
315     { MUA, 4, "(\\w\\W\\w)+", "ab#d" },
316     { MUA, 2, "(\\w\\W\\w)+", "ab#d" },
317     { MUA, 1, "(\\w\\W\\w)+", "ab#d" },
318    
319     /* Newline. */
320     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
321     { PCRE_MULTILINE | PCRE_NEWLINE_CR, 0, "\\W{0,2}[^#]{3}", "\r\n#....." },
322     { PCRE_MULTILINE | PCRE_NEWLINE_CRLF, 0, "\\W{1,3}[^#]", "\r\n##...." },
323    
324     /* Any character except newline or any newline. */
325     { PCRE_NEWLINE_CRLF, 0, ".", "\r" },
326     { PCRE_NEWLINE_CRLF | PCRE_UTF8, 0, ".(.).", "a\xc3\xa1\r\n\n\r\r" },
327     { PCRE_NEWLINE_ANYCRLF, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
328     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.)", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa8" },
329     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.).", "a\rb\nc\r\n\xc2\x85\xe2\x80\xa9$de" },
330     { PCRE_NEWLINE_ANYCRLF | PCRE_UTF8, 0, ".(.).", "\xe2\x80\xa8\nb\r" },
331     { PCRE_NEWLINE_ANY, 0, "(.)(.)", "#\x85#\r#\n#\r\n#\x84" },
332     { PCRE_NEWLINE_ANY | PCRE_UTF8, 0, "(.+)#", "#\rMn\xc2\x85#\n###" },
333     { PCRE_BSR_ANYCRLF, 0, "\\R", "\r" },
334     { PCRE_BSR_ANYCRLF, 0, "\\R", "\x85#\r\n#" },
335     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\xe2\x80\xa8#c" },
336     { PCRE_BSR_UNICODE | PCRE_UTF8, 0, "\\R", "ab\r\nc" },
337     { PCRE_NEWLINE_CRLF | PCRE_BSR_UNICODE | PCRE_UTF8, 0, "(\\R.)+", "\xc2\x85\r\n#\xe2\x80\xa8\n\r\n\r" },
338     { MUA, 0, "\\R+", "ab" },
339     { MUA, 0, "\\R+", "ab\r\n\r" },
340     { MUA, 0, "\\R*", "ab\r\n\r" },
341     { MUA, 0, "\\R*", "\r\n\r" },
342     { MUA, 0, "\\R{2,4}", "\r\nab\r\r" },
343     { MUA, 0, "\\R{2,4}", "\r\nab\n\n\n\r\r\r" },
344     { MUA, 0, "\\R{2,}", "\r\nab\n\n\n\r\r\r" },
345     { MUA, 0, "\\R{0,3}", "\r\n\r\n\r\n\r\n\r\n" },
346     { MUA, 0, "\\R+\\R\\R", "\r\n\r\n" },
347     { MUA, 0, "\\R+\\R\\R", "\r\r\r" },
348     { MUA, 0, "\\R*\\R\\R", "\n\r" },
349     { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r" },
350     { MUA, 0, "\\R{2,4}\\R\\R", "\r\r\r\r" },
351    
352     /* Atomic groups (no fallback from "next" direction). */
353     { MUA, 0, "(?>ab)ab", "bab" },
354     { MUA, 0, "(?>(ab))ab", "bab" },
355     { MUA, 0, "(?>ab)+abc(?>de)*def(?>gh)?ghe(?>ij)+?k(?>lm)*?n(?>op)?\?op",
356     "bababcdedefgheijijklmlmnop" },
357     { MUA, 0, "(?>a(b)+a|(ab)?\?(b))an", "abban" },
358     { MUA, 0, "(?>ab+a|(?:ab)?\?b)an", "abban" },
359     { MUA, 0, "((?>ab|ad|)*?)(?>|c)*abad", "abababcababad" },
360     { MUA, 0, "(?>(aa|b|)*+(?>(##)|###)*d|(aa)(?>(baa)?)m)", "aabaa#####da" },
361     { MUA, 0, "((?>a|)+?)b", "aaacaaab" },
362     { MUA, 0, "(?>x|)*$", "aaa" },
363     { MUA, 0, "(?>(x)|)*$", "aaa" },
364     { MUA, 0, "(?>x|())*$", "aaa" },
365     { MUA, 0, "((?>[cxy]a|[a-d])*?)b", "aaa+ aaab" },
366     { MUA, 0, "((?>[cxy](a)|[a-d])*?)b", "aaa+ aaab" },
367     { MUA, 0, "(?>((?>(a+))))bab|(?>((?>(a+))))bb", "aaaabaaabaabab" },
368     { MUA, 0, "(?>(?>a+))bab|(?>(?>a+))bb", "aaaabaaabaabab" },
369     { MUA, 0, "(?>(a)c|(?>(c)|(a))a)b*?bab", "aaaabaaabaabab" },
370     { MUA, 0, "(?>ac|(?>c|a)a)b*?bab", "aaaabaaabaabab" },
371     { MUA, 0, "(?>(b)b|(a))*b(?>(c)|d)?x", "ababcaaabdbx" },
372     { MUA, 0, "(?>bb|a)*b(?>c|d)?x", "ababcaaabdbx" },
373     { MUA, 0, "(?>(bb)|a)*b(?>c|(d))?x", "ababcaaabdbx" },
374     { MUA, 0, "(?>(a))*?(?>(a))+?(?>(a))??x", "aaaaaacccaaaaabax" },
375     { MUA, 0, "(?>a)*?(?>a)+?(?>a)??x", "aaaaaacccaaaaabax" },
376     { MUA, 0, "(?>(a)|)*?(?>(a)|)+?(?>(a)|)??x", "aaaaaacccaaaaabax" },
377     { MUA, 0, "(?>a|)*?(?>a|)+?(?>a|)??x", "aaaaaacccaaaaabax" },
378     { MUA, 0, "(?>a(?>(a{0,2}))*?b|aac)+b", "aaaaaaacaaaabaaaaacaaaabaacaaabb" },
379     { CMA, 0, "(?>((?>a{32}|b+|(a*))?(?>c+|d*)?\?)+e)+?f", "aaccebbdde bbdaaaccebbdee bbdaaaccebbdeef" },
380     { MUA, 0, "(?>(?:(?>aa|a||x)+?b|(?>aa|a||(x))+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
381     { MUA, 0, "(?>(?:(?>aa|a||(x))+?b|(?>aa|a||x)+?c)?(?>[ad]{0,2})*?d)+d", "aaacdbaabdcabdbaaacd aacaabdbdcdcaaaadaabcbaadd" },
382     { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d" },
383     { MUA, 0, "\\X", "\xcc\x8d\xcc\x8d#\xcc\x8d\xcc\x8d" },
384     { MUA, 0, "\\X+..", "\xcc\x8d#\xcc\x8d#\xcc\x8d\xcc\x8d" },
385     { MUA, 0, "\\X{2,4}", "abcdef" },
386     { MUA, 0, "\\X{2,4}?", "abcdef" },
387     { MUA, 0, "\\X{2,4}..", "#\xcc\x8d##" },
388     { MUA, 0, "\\X{2,4}..", "#\xcc\x8d#\xcc\x8d##" },
389     { MUA, 0, "(c(ab)?+ab)+", "cabcababcab" },
390     { MUA, 0, "(?>(a+)b)+aabab", "aaaabaaabaabab" },
391    
392 ph10 698 /* Possessive quantifiers. */
393     { MUA, 0, "(?:a|b)++m", "mababbaaxababbaam" },
394     { MUA, 0, "(?:a|b)*+m", "mababbaaxababbaam" },
395     { MUA, 0, "(?:a|b)*+m", "ababbaaxababbaam" },
396     { MUA, 0, "(a|b)++m", "mababbaaxababbaam" },
397     { MUA, 0, "(a|b)*+m", "mababbaaxababbaam" },
398     { MUA, 0, "(a|b)*+m", "ababbaaxababbaam" },
399 ph10 667 { MUA, 0, "(a|b(*ACCEPT))++m", "maaxab" },
400     { MUA, 0, "(?:b*)++m", "bxbbxbbbxm" },
401     { MUA, 0, "(?:b*)++m", "bxbbxbbbxbbm" },
402     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxm" },
403     { MUA, 0, "(?:b*)*+m", "bxbbxbbbxbbm" },
404     { MUA, 0, "(b*)++m", "bxbbxbbbxm" },
405     { MUA, 0, "(b*)++m", "bxbbxbbbxbbm" },
406     { MUA, 0, "(b*)*+m", "bxbbxbbbxm" },
407     { MUA, 0, "(b*)*+m", "bxbbxbbbxbbm" },
408 ph10 698 { MUA, 0, "(?:a|(b))++m", "mababbaaxababbaam" },
409     { MUA, 0, "(?:(a)|b)*+m", "mababbaaxababbaam" },
410     { MUA, 0, "(?:(a)|(b))*+m", "ababbaaxababbaam" },
411     { MUA, 0, "(a|(b))++m", "mababbaaxababbaam" },
412     { MUA, 0, "((a)|b)*+m", "mababbaaxababbaam" },
413     { MUA, 0, "((a)|(b))*+m", "ababbaaxababbaam" },
414 ph10 667 { MUA, 0, "(a|(b)(*ACCEPT))++m", "maaxab" },
415     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxm" },
416     { MUA, 0, "(?:(b*))++m", "bxbbxbbbxbbm" },
417     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxm" },
418     { MUA, 0, "(?:(b*))*+m", "bxbbxbbbxbbm" },
419     { MUA, 0, "((b*))++m", "bxbbxbbbxm" },
420     { MUA, 0, "((b*))++m", "bxbbxbbbxbbm" },
421     { MUA, 0, "((b*))*+m", "bxbbxbbbxm" },
422     { MUA, 0, "((b*))*+m", "bxbbxbbbxbbm" },
423     { MUA, 0, "(?>(b{2,4}))(?:(?:(aa|c))++m|(?:(aa|c))+n)", "bbaacaaccaaaacxbbbmbn" },
424     { MUA, 0, "((?:b)++a)+(cd)*+m", "bbababbacdcdnbbababbacdcdm" },
425     { MUA, 0, "((?:(b))++a)+((c)d)*+m", "bbababbacdcdnbbababbacdcdm" },
426     { MUA, 0, "(?:(?:(?:ab)*+k)++(?:n(?:cd)++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
427     { MUA, 0, "(?:((ab)*+(k))++(n(?:c(d))++)*+)*+m", "ababkkXababkkabkncXababkkabkncdcdncdXababkkabkncdcdncdkkabkncdXababkkabkncdcdncdkkabkncdm" },
428    
429     /* Back references. */
430     { MUA, 0, "(aa|bb)(\\1*)(ll|)(\\3*)bbbbbbc", "aaaaaabbbbbbbbc" },
431     { CMUA, 0, "(aa|bb)(\\1+)(ll|)(\\3+)bbbbbbc", "bBbbBbCbBbbbBbbcbbBbbbBBbbC" },
432     { CMA, 0, "(a{2,4})\\1", "AaAaaAaA" },
433     { MUA, 0, "(aa|bb)(\\1?)aa(\\1?)(ll|)(\\4+)bbc", "aaaaaaaabbaabbbbaabbbbc" },
434     { MUA, 0, "(aa|bb)(\\1{0,5})(ll|)(\\3{0,5})cc", "bbxxbbbbxxaaaaaaaaaaaaaaaacc" },
435     { MUA, 0, "(aa|bb)(\\1{3,5})(ll|)(\\3{3,5})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
436     { MUA, 0, "(aa|bb)(\\1{3,})(ll|)(\\3{3,})cc", "bbbbbbbbbbbbaaaaaaccbbbbbbbbbbbbbbcc" },
437     { MUA, 0, "(\\w+)b(\\1+)c", "GabGaGaDbGaDGaDc" },
438     { MUA, 0, "(?:(aa)|b)\\1?b", "bb" },
439     { CMUA, 0, "(aa|bb)(\\1*?)aa(\\1+?)", "bBBbaaAAaaAAaa" },
440     { MUA, 0, "(aa|bb)(\\1*?)(dd|)cc(\\3+?)", "aaaaaccdd" },
441     { CMUA, 0, "(?:(aa|bb)(\\1?\?)cc){2}(\\1?\?)", "aAaABBbbAAaAcCaAcCaA" },
442     { MUA, 0, "(?:(aa|bb)(\\1{3,5}?)){2}(dd|)(\\3{3,5}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
443     { CMA, 0, "(?:(aa|bb)(\\1{3,}?)){2}(dd|)(\\3{3,}?)", "aaaaaabbbbbbbbbbaaaaaaaaaaaaaa" },
444     { MUA, 0, "(?:(aa|bb)(\\1{0,3}?)){2}(dd|)(\\3{0,3}?)b(\\1{0,3}?)(\\1{0,3})", "aaaaaaaaaaaaaaabaaaaa" },
445     { MUA, 0, "(a(?:\\1|)a){3}b", "aaaaaaaaaaab" },
446     { MA, 0, "(a?)b(\\1\\1*\\1+\\1?\\1*?\\1+?\\1??\\1*+\\1++\\1?+\\1{4}\\1{3,5}\\1{4,}\\1{0,5}\\1{3,5}?\\1{4,}?\\1{0,5}?\\1{3,5}+\\1{4,}+\\1{0,5}+#){2}d", "bb#b##d" },
447     { MUAP, 0, "(\\P{N})\\1{2,}", ".www." },
448     { MUAP, 0, "(\\P{N})\\1{0,2}", "wwwww." },
449     { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwww" },
450     { MUAP, 0, "(\\P{N})\\1{1,2}ww", "wwwww" },
451     { PCRE_UCP, 0, "(\\P{N})\\1{2,}", ".www." },
452    
453     /* Assertions. */
454     { MUA, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
455     { MUA, 0, "(?=((\\w+)b){3}|ab)", "dbbbb ab" },
456     { MUA, 0, "(?!ab|bc|cd)[a-z]{2}", "Xabcdef" },
457     { MUA, 0, "(?<=aaa|aa|a)a", "aaa" },
458     { MUA, 2, "(?<=aaa|aa|a)a", "aaa" },
459     { MA, 0, "(?<=aaa|aa|a)a", "aaa" },
460     { MA, 2, "(?<=aaa|aa|a)a", "aaa" },
461     { MUA, 0, "(\\d{2})(?!\\w+c|(((\\w?)m){2}n)+|\\1)", "x5656" },
462     { MUA, 0, "((?=((\\d{2,6}\\w){2,}))\\w{5,20}K){2,}", "567v09708K12l00M00 567v09708K12l00M00K45K" },
463     { MUA, 0, "(?=(?:(?=\\S+a)\\w*(b)){3})\\w+\\d", "bba bbab nbbkba nbbkba0kl" },
464     { MUA, 0, "(?>a(?>(b+))a(?=(..)))*?k", "acabbcabbaabacabaabbakk" },
465     { MUA, 0, "((?(?=(a))a)+k)", "bbak" },
466     { MUA, 0, "((?(?=a)a)+k)", "bbak" },
467     { MUA, 0, "(?=(?>(a))m)amk", "a k" },
468     { MUA, 0, "(?!(?>(a))m)amk", "a k" },
469     { MUA, 0, "(?>(?=(a))am)amk", "a k" },
470     { MUA, 0, "(?=(?>a|(?=(?>(b+))a|c)[a-c]+)*?m)[a-cm]+k", "aaam bbam baaambaam abbabba baaambaamk" },
471     { MUA, 0, "(?> ?\?\\b(?(?=\\w{1,4}(a))m)\\w{0,8}bc){2,}?", "bca ssbc mabd ssbc mabc" },
472     { MUA, 0, "(?:(?=ab)?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
473     { MUA, 0, "(?:(?=a(b))?[^n][^n])+m", "ababcdabcdcdabnababcdabcdcdabm" },
474     { MUA, 0, "(?:(?=.(.))??\\1.)+m", "aabbbcbacccanaabbbcbacccam" },
475     { MUA, 0, "(?:(?=.)??[a-c])+m", "abacdcbacacdcaccam" },
476     { MUA, 0, "((?!a)?(?!([^a]))?)+$", "acbab" },
477     { MUA, 0, "((?!a)?\?(?!([^a]))?\?)+$", "acbab" },
478    
479     /* Not empty, ACCEPT, FAIL */
480     { MUA | PCRE_NOTEMPTY, 0, "a*", "bcx" },
481     { MUA | PCRE_NOTEMPTY, 0, "a*", "bcaad" },
482     { MUA | PCRE_NOTEMPTY, 0, "a*?", "bcaad" },
483     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*", "bcaad" },
484     { MUA, 0, "a(*ACCEPT)b", "ab" },
485     { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcx" },
486     { MUA | PCRE_NOTEMPTY, 0, "a*(*ACCEPT)b", "bcaad" },
487     { MUA | PCRE_NOTEMPTY, 0, "a*?(*ACCEPT)b", "bcaad" },
488     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcx" },
489     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*(*ACCEPT)b)", "bcaad" },
490     { MUA | PCRE_NOTEMPTY, 0, "(?:z|a*?(*ACCEPT)b)", "bcaad" },
491     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "bcx" },
492     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a*(*ACCEPT)b", "" },
493     { MUA, 0, "((a(*ACCEPT)b))", "ab" },
494     { MUA, 0, "(a(*FAIL)a|a)", "aaa" },
495     { MUA, 0, "(?=ab(*ACCEPT)b)a", "ab" },
496     { MUA, 0, "(?=(?:x|ab(*ACCEPT)b))", "ab" },
497     { MUA, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
498     { MUA | PCRE_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
499    
500     /* Conditional blocks. */
501     { MUA, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
502     { MUA, 0, "(?(?!(b))a|b)+k", "ababbalbbadabak" },
503     { MUA, 0, "(?(?=a)a|b)+k", "ababbalbbadabak" },
504     { MUA, 0, "(?(?!b)a|b)+k", "ababbalbbadabak" },
505     { MUA, 0, "(?(?=(a))a*|b*)+k", "ababbalbbadabak" },
506     { MUA, 0, "(?(?!(b))a*|b*)+k", "ababbalbbadabak" },
507     { MUA, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
508     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+aaaak", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb aaaaaaak" },
509     { MUA | PCRE_BUG, 0, "(?(?!(b))(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
510     { MUA, 0, "(?(?!b)(?:aaaaaa|a)|(?:bbbbbb|b))+bbbbk", "aaaaaaaaaaaaaa bbbbbbbbbbbbbbb bbbbbbbk" },
511     { MUA, 0, "(?(?=a)a*|b*)+k", "ababbalbbadabak" },
512     { MUA, 0, "(?(?!b)a*|b*)+k", "ababbalbbadabak" },
513     { MUA, 0, "(?(?=a)ab)", "a" },
514     { MUA, 0, "(?(?<!b)c)", "b" },
515     { MUA, 0, "(?(DEFINE)a(b))", "a" },
516     { MUA, 0, "a(?(DEFINE)(?:b|(?:c?)+)*)", "a" },
517     { MUA, 0, "(?(?=.[a-c])[k-l]|[A-D])", "kdB" },
518     { MUA, 0, "(?(?!.{0,4}[cd])(aa|bb)|(cc|dd))+", "aabbccddaa" },
519     { MUA, 0, "(?(?=[^#@]*@)(aaab|aa|aba)|(aba|aab)){3,}", "aaabaaaba#aaabaaaba#aaabaaaba@" },
520     { MUA, 0, "((?=\\w{5})\\w(?(?=\\w*k)\\d|[a-f_])*\\w\\s)+", "mol m10kk m088k _f_a_ mbkkl" },
521     { MUA, 0, "(c)?\?(?(1)a|b)", "cdcaa" },
522     { MUA, 0, "(c)?\?(?(1)a|b)", "cbb" },
523     { MUA | PCRE_BUG, 0, "(?(?=(a))(aaaa|a?))+aak", "aaaaab aaaaak" },
524     { MUA, 0, "(?(?=a)(aaaa|a?))+aak", "aaaaab aaaaak" },
525     { MUA, 0, "(?(?!(b))(aaaa|a?))+aak", "aaaaab aaaaak" },
526     { MUA, 0, "(?(?!b)(aaaa|a?))+aak", "aaaaab aaaaak" },
527     { MUA | PCRE_BUG, 0, "(?(?=(a))a*)+aak", "aaaaab aaaaak" },
528     { MUA, 0, "(?(?=a)a*)+aak", "aaaaab aaaaak" },
529     { MUA, 0, "(?(?!(b))a*)+aak", "aaaaab aaaaak" },
530     { MUA, 0, "(?(?!b)a*)+aak", "aaaaab aaaaak" },
531     { MUA, 0, "(?(?=(?=(?!(x))a)aa)aaa|(?(?=(?!y)bb)bbb))*k", "abaabbaaabbbaaabbb abaabbaaabbbaaabbbk" },
532    
533 ph10 698 /* Set start of match. */
534 ph10 667 { MUA, 0, "(?:\\Ka)*aaaab", "aaaaaaaa aaaaaaabb" },
535     { MUA, 0, "(?>\\Ka\\Ka)*aaaab", "aaaaaaaa aaaaaaaaaabb" },
536     { MUA, 0, "a+\\K(?<=\\Gaa)a", "aaaaaa" },
537     { MUA | PCRE_NOTEMPTY, 0, "a\\K(*ACCEPT)b", "aa" },
538     { MUA | PCRE_NOTEMPTY_ATSTART, 0, "a\\K(*ACCEPT)b", "aa" },
539    
540     /* First line. */
541     { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\naaa" },
542     { MUA | PCRE_FIRSTLINE, 0, "\\p{Any}a", "bb\r\naaa" },
543     { MUA | PCRE_FIRSTLINE, 0, "(?<=a)", "a" },
544     { MUA | PCRE_FIRSTLINE, 0, "[^a][^b]", "ab" },
545     { MUA | PCRE_FIRSTLINE, 0, "a", "\na" },
546     { MUA | PCRE_FIRSTLINE, 0, "[abc]", "\na" },
547     { MUA | PCRE_FIRSTLINE, 0, "^a", "\na" },
548     { MUA | PCRE_FIRSTLINE, 0, "^(?<=\n)", "\na" },
549     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\xc2\x85#" },
550     { PCRE_MULTILINE | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "#", "\x85#" },
551     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_ANY | PCRE_FIRSTLINE, 0, "^#", "\xe2\x80\xa8#" },
552     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}", "\r\na" },
553     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, ".", "\r" },
554     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "a", "\ra" },
555     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "ba", "bbb\r\nba" },
556     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 0, "\\p{Any}{4}|a", "\r\na" },
557     { PCRE_MULTILINE | PCRE_UTF8 | PCRE_NEWLINE_CRLF | PCRE_FIRSTLINE, 1, ".", "\r\n" },
558    
559     /* Recurse. */
560     { MUA, 0, "(a)(?1)", "aa" },
561     { MUA, 0, "((a))(?1)", "aa" },
562     { MUA, 0, "(b|a)(?1)", "aa" },
563     { MUA, 0, "(b|(a))(?1)", "aa" },
564     { MUA, 0, "((a)(b)(?:a*))(?1)", "aba" },
565     { MUA, 0, "((a)(b)(?:a*))(?1)", "abab" },
566     { MUA, 0, "((a+)c(?2))b(?1)", "aacaabaca" },
567     { MUA, 0, "((?2)b|(a)){2}(?1)", "aabab" },
568     { MUA, 0, "(?1)(a)*+(?2)(b(?1))", "aababa" },
569     { MUA, 0, "(?1)(((a(*ACCEPT)))b)", "axaa" },
570     { MUA, 0, "(?1)(?(DEFINE) (((ac(*ACCEPT)))b) )", "akaac" },
571     { MUA, 0, "(a+)b(?1)b\\1", "abaaabaaaaa" },
572     { MUA, 0, "(?(DEFINE)(aa|a))(?1)ab", "aab" },
573     { MUA, 0, "(?(DEFINE)(a\\Kb))(?1)+ababc", "abababxabababc" },
574     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababc" },
575     { MUA, 0, "(a\\Kb)(?1)+ababc", "abababxababababxc" },
576     { MUA, 0, "b|<(?R)*>", "<<b>" },
577     { MUA, 0, "(a\\K){0}(?:(?1)b|ac)", "ac" },
578     { MUA, 0, "(?(DEFINE)(a(?2)|b)(b(?1)|(a)))(?:(?1)|(?2))m", "ababababnababababaam" },
579    
580     /* Deep recursion. */
581     { MUA, 0, "((((?:(?:(?:\\w)+)?)*|(?>\\w)+?)+|(?>\\w)?\?)*)?\\s", "aaaaa+ " },
582 ph10 698 { MUA, 0, "(?:((?:(?:(?:\\w*?)+)??|(?>\\w)?|\\w*+)*)+)+?\\s", "aa+ " },
583 ph10 677 { MUA, 0, "((a?)+)+b", "aaaaaaaaaaaaa b" },
584 ph10 691
585 ph10 677 /* Deep recursion: Stack limit reached. */
586     { MA, 0, "a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaa" },
587     { MA, 0, "(?:a+)+b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
588     { MA, 0, "(?:a+?)+?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
589     { MA, 0, "(?:a*)*b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
590     { MA, 0, "(?:a*?)*?b", "aaaaaaaaaaaaaaaaaaaaaaaa b" },
591 ph10 667
592     { 0, 0, NULL, NULL }
593     };
594    
595 ph10 677 static int regression_tests(void)
596 ph10 667 {
597     pcre *re;
598     struct regression_test_case *current = regression_test_cases;
599     const char *error;
600     pcre_extra *extra;
601 ph10 698 int utf8 = 0, ucp = 0;
602 ph10 667 int ovector1[32];
603     int ovector2[32];
604     int return_value1, return_value2;
605     int i, err_offs;
606     int total = 0, succesful = 0;
607     int counter = 0;
608 ph10 698 int disabled_flags = PCRE_BUG;
609 ph10 667
610 ph10 698 /* This test compares the behaviour of interpreter and JIT. Although disabling
611     utf8 or ucp may make tests fail, if the pcre_exec result is the SAME, it is
612     still considered successful from pcre_jit_test point of view. */
613    
614     pcre_config(PCRE_CONFIG_UTF8, &utf8);
615     pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &ucp);
616     if (!utf8)
617     disabled_flags |= PCRE_UTF8;
618     if (!ucp)
619     disabled_flags |= PCRE_UCP;
620    
621     printf("Running JIT regression tests with utf8 %s and ucp %s:\n", utf8 ? "enabled" : "disabled", ucp ? "enabled" : "disabled");
622 ph10 667 while (current->pattern) {
623 ph10 698 /* printf("\nPattern: %s :\n", current->pattern); */
624 ph10 667 total++;
625    
626     error = NULL;
627 ph10 698 re = pcre_compile(current->pattern, current->flags & ~(PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART | disabled_flags), &error, &err_offs, NULL);
628 ph10 667
629     if (!re) {
630 ph10 698 if (utf8 && ucp)
631     printf("\nCannot compile pattern: %s\n", current->pattern);
632     else {
633     /* Some patterns cannot be compiled when either of utf8
634     or ucp is disabled. We just skip them. */
635     printf(".");
636     succesful++;
637     }
638 ph10 667 current++;
639     continue;
640     }
641    
642     error = NULL;
643     extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
644     if (!extra) {
645     printf("\nCannot study pattern: %s\n", current->pattern);
646     current++;
647     continue;
648     }
649    
650     if (!(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
651     printf("\nJIT compiler does not support: %s\n", current->pattern);
652     current++;
653     continue;
654     }
655    
656     counter++;
657     if ((counter & 0x3) != 0)
658     setstack(extra);
659    
660     for (i = 0; i < 32; ++i)
661     ovector1[i] = -2;
662     return_value1 = pcre_exec(re, extra, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector1, 32);
663    
664     for (i = 0; i < 32; ++i)
665     ovector2[i] = -2;
666     return_value2 = pcre_exec(re, NULL, current->input, strlen(current->input), current->start_offset, current->flags & (PCRE_NOTBOL | PCRE_NOTEOL | PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART), ovector2, 32);
667    
668     /* If PCRE_BUG is set, just run the test, but do not compare the results.
669     Segfaults can still be captured. */
670     if (!(current->flags & PCRE_BUG)) {
671     if (return_value1 != return_value2) {
672     printf("\nReturn value differs(%d:%d): '%s' @ '%s'\n", return_value1, return_value2, current->pattern, current->input);
673     current++;
674     continue;
675     }
676    
677     if (return_value1 >= 0) {
678     return_value1 *= 2;
679     err_offs = 0;
680     for (i = 0; i < return_value1; ++i)
681     if (ovector1[i] != ovector2[i]) {
682     printf("\nOvector[%d] value differs(%d:%d): '%s' @ '%s' \n", i, ovector1[i], ovector2[i], current->pattern, current->input);
683     err_offs = 1;
684     }
685     if (err_offs) {
686     current++;
687     continue;
688     }
689     }
690     }
691    
692     pcre_free_study(extra);
693     pcre_free(re);
694    
695     /* printf("[%d-%d]%s", ovector1[0], ovector1[1], (current->flags & PCRE_CASELESS) ? "C" : ""); */
696     printf(".");
697     fflush(stdout);
698     current++;
699     succesful++;
700     }
701    
702 ph10 677 if (total == succesful) {
703 ph10 667 printf("\nAll JIT regression tests are successfully passed.\n");
704 ph10 677 return 0;
705 ph10 698 } else {
706 ph10 667 printf("\nSuccessful test ratio: %d%%\n", succesful * 100 / total);
707 ph10 698 return 1;
708     }
709 ph10 667 }
710    
711     /* End of pcre_jit_test.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12