/[pcre]/code/trunk/testdata/testinput2
ViewVC logotype

Diff of /code/trunk/testdata/testinput2

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 450 by ph10, Wed Sep 16 10:56:40 2009 UTC revision 513 by ph10, Mon May 3 11:13:37 2010 UTC
# Line 2  Line 2 
2      of PCRE's API, error diagnostics, and the compiled code of some patterns.      of PCRE's API, error diagnostics, and the compiled code of some patterns.
3      It also checks the non-Perl syntax the PCRE supports (Python, .NET,      It also checks the non-Perl syntax the PCRE supports (Python, .NET,
4      Oniguruma). Finally, there are some tests where PCRE and Perl differ,      Oniguruma). Finally, there are some tests where PCRE and Perl differ,
5      either because PCRE can't be compatible, or there is potential Perl      either because PCRE can't be compatible, or there is a possible Perl
6      bug. --/      bug. --/
7    
8  /-- Originally, the Perl 5.10 things were in here too, but now I have separated  /-- Originally, the Perl 5.10 and 5.11 things were in here too, but now I have
9      many (most?) of them out into test 11. However, there may still be some      separated many (most?) of them out into test 11. However, there may still
10      that were overlooked. --/      be some that were overlooked. --/
11    
12  /(a)b|/I  /(a)b|/I
13    
# Line 51  Line 51 
51    
52  /(?X)[\B]/  /(?X)[\B]/
53    
54    /(?X)[\R]/
55    
56    /(?X)[\X]/
57    
58    /[\B]/BZ
59    
60    /[\R]/BZ
61    
62    /[\X]/BZ
63    
64  /[z-a]/  /[z-a]/
65    
66  /^*/  /^*/
# Line 344  Line 354 
354      *** Failers      *** Failers
355      a      a
356    
357  /This one is here because I think Perl 5.005_02 gets the setting of $1 wrong/I  /This one is here because Perl behaves differently; see also the following/I
358    
359  /^(a\1?){4}$/I  /^(a\1?){4}$/I
360        aaaa
361      aaaaaa      aaaaaa
362    
363    /Perl does not fail these two for the final subjects. Neither did PCRE until/
364    /release 8.01. The problem is in backtracking into a subpattern that contains/
365    /a recursive reference to itself. PCRE has now made these into atomic patterns./
366    
367    /^(xa|=?\1a){2}$/
368        xa=xaa
369        ** Failers
370        xa=xaaa
371    
372    /^(xa|=?\1a)+$/
373        xa=xaa
374        ** Failers
375        xa=xaaa
376    
377  /These are syntax tests from Perl 5.005/I  /These are syntax tests from Perl 5.005/I
378    
# Line 1133  Line 1158 
1158    
1159  /(a(?1)+b)/DZ  /(a(?1)+b)/DZ
1160    
 /^\W*(?:((.)\W*(?1)\W*\2|)|((.)\W*(?3)\W*\4|\W*.\W*))\W*$/Ii  
     1221  
     Satan, oscillate my metallic sonatas!  
     A man, a plan, a canal: Panama!  
     Able was I ere I saw Elba.  
     *** Failers  
     The quick brown fox  
   
1161  /^(\d+|\((?1)([+*-])(?1)\)|-(?1))$/I  /^(\d+|\((?1)([+*-])(?1)\)|-(?1))$/I
1162      12      12
1163      (((2+2)*-3)-7)      (((2+2)*-3)-7)
# Line 2272  a random value. /Ix Line 2289  a random value. /Ix
2289  /a+b?(*THEN)c+(*FAIL)/C  /a+b?(*THEN)c+(*FAIL)/C
2290      aaabccc      aaabccc
2291    
 /a(*PRUNE:XXX)b/  
   
2292  /a(*MARK)b/  /a(*MARK)b/
2293    
2294  /(?i:A{1,}\6666666666)/  /(?i:A{1,}\6666666666)/
# Line 2853  a random value. /Ix Line 2868  a random value. /Ix
2868  /^X(?7)(a)(?|(b)|(q)(r)(s))(c)(d)(Y)/  /^X(?7)(a)(?|(b)|(q)(r)(s))(c)(d)(Y)/
2869      XYabcdY      XYabcdY
2870    
2871    /(?<=b(?1)|zzz)(a)/
2872        xbaax
2873        xzzzax
2874    
2875    /(a)(?<=b\1)/
2876    
2877    /(a)(?<=b+(?1))/
2878    
2879    /(a+)(?<=b(?1))/
2880    
2881    /(a(?<=b(?1)))/
2882    
2883    /(?<=b(?1))xyz/
2884    
2885    /(?<=b(?1))xyz(b+)pqrstuvew/
2886    
2887    /(a|bc)\1/SI
2888    
2889    /(a|bc)\1{2,3}/SI
2890    
2891    /(a|bc)(?1)/SI
2892    
2893    /(a|b\1)(a|b\1)/SI
2894    
2895    /(a|b\1){2}/SI
2896    
2897    /(a|bbbb\1)(a|bbbb\1)/SI
2898    
2899    /(a|bbbb\1){2}/SI
2900    
2901    /^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/SI
2902    
2903    /  (?: [\040\t] |  \(
2904    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2905    \)  )*                          # optional leading comment
2906    (?:    (?:
2907    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2908    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2909    |
2910    " (?:                      # opening quote...
2911    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
2912    |                     #    or
2913    \\ [^\x80-\xff]           #   Escaped something (something != CR)
2914    )* "  # closing quote
2915    )                    # initial word
2916    (?:  (?: [\040\t] |  \(
2917    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2918    \)  )*  \.  (?: [\040\t] |  \(
2919    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2920    \)  )*   (?:
2921    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2922    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2923    |
2924    " (?:                      # opening quote...
2925    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
2926    |                     #    or
2927    \\ [^\x80-\xff]           #   Escaped something (something != CR)
2928    )* "  # closing quote
2929    )  )* # further okay, if led by a period
2930    (?: [\040\t] |  \(
2931    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2932    \)  )*  @  (?: [\040\t] |  \(
2933    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2934    \)  )*    (?:
2935    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2936    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2937    |   \[                         # [
2938    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
2939    \]                        #           ]
2940    )                           # initial subdomain
2941    (?:                                  #
2942    (?: [\040\t] |  \(
2943    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2944    \)  )*  \.                        # if led by a period...
2945    (?: [\040\t] |  \(
2946    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2947    \)  )*   (?:
2948    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2949    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2950    |   \[                         # [
2951    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
2952    \]                        #           ]
2953    )                     #   ...further okay
2954    )*
2955    # address
2956    |                     #  or
2957    (?:
2958    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2959    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2960    |
2961    " (?:                      # opening quote...
2962    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
2963    |                     #    or
2964    \\ [^\x80-\xff]           #   Escaped something (something != CR)
2965    )* "  # closing quote
2966    )             # one word, optionally followed by....
2967    (?:
2968    [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037]  |  # atom and space parts, or...
2969    \(
2970    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2971    \)       |  # comments, or...
2972    
2973    " (?:                      # opening quote...
2974    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
2975    |                     #    or
2976    \\ [^\x80-\xff]           #   Escaped something (something != CR)
2977    )* "  # closing quote
2978    # quoted strings
2979    )*
2980    <  (?: [\040\t] |  \(
2981    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2982    \)  )*                     # leading <
2983    (?:  @  (?: [\040\t] |  \(
2984    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2985    \)  )*    (?:
2986    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2987    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2988    |   \[                         # [
2989    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
2990    \]                        #           ]
2991    )                           # initial subdomain
2992    (?:                                  #
2993    (?: [\040\t] |  \(
2994    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2995    \)  )*  \.                        # if led by a period...
2996    (?: [\040\t] |  \(
2997    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2998    \)  )*   (?:
2999    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3000    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3001    |   \[                         # [
3002    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3003    \]                        #           ]
3004    )                     #   ...further okay
3005    )*
3006    
3007    (?:  (?: [\040\t] |  \(
3008    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3009    \)  )*  ,  (?: [\040\t] |  \(
3010    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3011    \)  )*  @  (?: [\040\t] |  \(
3012    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3013    \)  )*    (?:
3014    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3015    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3016    |   \[                         # [
3017    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3018    \]                        #           ]
3019    )                           # initial subdomain
3020    (?:                                  #
3021    (?: [\040\t] |  \(
3022    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3023    \)  )*  \.                        # if led by a period...
3024    (?: [\040\t] |  \(
3025    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3026    \)  )*   (?:
3027    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3028    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3029    |   \[                         # [
3030    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3031    \]                        #           ]
3032    )                     #   ...further okay
3033    )*
3034    )* # further okay, if led by comma
3035    :                                # closing colon
3036    (?: [\040\t] |  \(
3037    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3038    \)  )*  )? #       optional route
3039    (?:
3040    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3041    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3042    |
3043    " (?:                      # opening quote...
3044    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
3045    |                     #    or
3046    \\ [^\x80-\xff]           #   Escaped something (something != CR)
3047    )* "  # closing quote
3048    )                    # initial word
3049    (?:  (?: [\040\t] |  \(
3050    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3051    \)  )*  \.  (?: [\040\t] |  \(
3052    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3053    \)  )*   (?:
3054    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3055    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3056    |
3057    " (?:                      # opening quote...
3058    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
3059    |                     #    or
3060    \\ [^\x80-\xff]           #   Escaped something (something != CR)
3061    )* "  # closing quote
3062    )  )* # further okay, if led by a period
3063    (?: [\040\t] |  \(
3064    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3065    \)  )*  @  (?: [\040\t] |  \(
3066    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3067    \)  )*    (?:
3068    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3069    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3070    |   \[                         # [
3071    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3072    \]                        #           ]
3073    )                           # initial subdomain
3074    (?:                                  #
3075    (?: [\040\t] |  \(
3076    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3077    \)  )*  \.                        # if led by a period...
3078    (?: [\040\t] |  \(
3079    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3080    \)  )*   (?:
3081    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3082    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3083    |   \[                         # [
3084    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3085    \]                        #           ]
3086    )                     #   ...further okay
3087    )*
3088    #       address spec
3089    (?: [\040\t] |  \(
3090    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3091    \)  )*  > #                  trailing >
3092    # name and address
3093    )  (?: [\040\t] |  \(
3094    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3095    \)  )*                       # optional trailing comment
3096    /xSI
3097    
3098    /<tr([\w\W\s\d][^<>]{0,})><TD([\w\W\s\d][^<>]{0,})>([\d]{0,}\.)(.*)((<BR>([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/isIS
3099    
3100    "(?>.*/)foo"SI
3101    
3102    /(?(?=[^a-z]+[a-z])  \d{2}-[a-z]{3}-\d{2}  |  \d{2}-\d{2}-\d{2} ) /xSI
3103    
3104    /(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/iSI
3105    
3106    /(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))/SI
3107    
3108    /<a[\s]+href[\s]*=[\s]*          # find <a href=
3109     ([\"\'])?                       # find single or double quote
3110     (?(1) (.*?)\1 | ([^\s]+))       # if quote found, match up to next matching
3111                                     # quote, otherwise match up to next space
3112    /isxSI
3113    
3114    /^(?!:)                       # colon disallowed at start
3115      (?:                         # start of item
3116        (?: [0-9a-f]{1,4} |       # 1-4 hex digits or
3117        (?(1)0 | () ) )           # if null previously matched, fail; else null
3118        :                         # followed by colon
3119      ){1,7}                      # end item; 1-7 of them required
3120      [0-9a-f]{1,4} $             # final hex number at end of string
3121      (?(1)|.)                    # check that there was an empty component
3122      /xiIS
3123    
3124    /(?|(?<a>A)|(?<a>B))/I
3125        AB\Ca
3126        BA\Ca
3127    
3128    /(?|(?<a>A)|(?<b>B))/
3129    
3130    /(?:a(?<quote> (?<apostrophe>')|(?<realquote>")) |
3131        b(?<quote> (?<apostrophe>')|(?<realquote>")) )
3132        (?('quote')[a-z]+|[0-9]+)/JIx
3133        a"aaaaa
3134        b"aaaaa
3135        ** Failers
3136        b"11111
3137        a"11111
3138    
3139    /^(?|(a)(b)(c)(?<D>d)|(?<D>e)) (?('D')X|Y)/JDZx
3140        abcdX
3141        eX
3142        ** Failers
3143        abcdY
3144        ey
3145    
3146    /(?<A>a) (b)(c)  (?<A>d  (?(R&A)$ | (?4)) )/JDZx
3147        abcdd
3148        ** Failers
3149        abcdde
3150    
3151    /abcd*/
3152        xxxxabcd\P
3153        xxxxabcd\P\P
3154    
3155    /abcd*/i
3156        xxxxabcd\P
3157        xxxxabcd\P\P
3158        XXXXABCD\P
3159        XXXXABCD\P\P
3160    
3161    /abc\d*/
3162        xxxxabc1\P
3163        xxxxabc1\P\P
3164    
3165    /(a)bc\1*/
3166        xxxxabca\P
3167        xxxxabca\P\P
3168    
3169    /abc[de]*/
3170        xxxxabcde\P
3171        xxxxabcde\P\P
3172    
3173    /-- This is not in the Perl 5.10 test because Perl seems currently to be broken
3174        and not behaving as specified in that it *does* bumpalong after hitting
3175        (*COMMIT). --/
3176    
3177    /(?1)(A(*COMMIT)|B)D/
3178        ABD
3179        XABD
3180        BAD
3181        ABXABD
3182        ** Failers
3183        ABX
3184        BAXBAD
3185    
3186    /(\3)(\1)(a)/<JS>
3187        cat
3188    
3189    /(\3)(\1)(a)/SI<JS>
3190        cat
3191    
3192    /(\3)(\1)(a)/SI
3193        cat
3194    
3195    /i(?(DEFINE)(?<s>a))/SI
3196        i
3197    
3198    /()i(?(1)a)/SI
3199        ia
3200    
3201    /(?i)a(?-i)b|c/BZ
3202        XabX
3203        XAbX
3204        CcC
3205        ** Failers
3206        XABX
3207    
3208    /(?i)a(?s)b|c/BZ
3209    
3210    /(?i)a(?s-i)b|c/BZ
3211    
3212    /^(ab(c\1)d|x){2}$/BZ
3213        xabcxd
3214    
3215    /^(?&t)*+(?(DEFINE)(?<t>.))$/BZ
3216    
3217    /^(?&t)*(?(DEFINE)(?<t>.))$/BZ
3218    
3219    / -- The first four of these are not in the Perl 5.10 test because Perl
3220         documents that the use of \K in assertions is "not well defined". The
3221         last is here because Perl gives the match as "b" rather than "ab". I
3222         believe this to be a Perl bug. --/
3223    
3224    /(?=a\Kb)ab/
3225        ab
3226    
3227    /(?!a\Kb)ac/
3228        ac
3229    
3230    /^abc(?<=b\Kc)d/
3231        abcd
3232    
3233    /^abc(?<!b\Kq)d/
3234        abcd
3235    
3236    /(?>a\Kb)z|(ab)/
3237        ab
3238    
3239    /----------------------/
3240    
3241    /(?P<L1>(?P<L2>0|)|(?P>L2)(?P>L1))/
3242    
3243    /abc(*MARK:)pqr/
3244    
3245    /abc(*:)pqr/
3246    
3247    /abc(*FAIL:123)xyz/
3248    
3249    /--- This should, and does, fail. In Perl, it does not, which I think is a
3250         bug because replacing the B in the pattern by (B|D) does make it fail. ---/
3251    
3252    /A(*COMMIT)B/+K
3253        ACABX
3254    
3255    /--- These should be different, but in Perl 5.11 are not, which I think
3256         is a bug in Perl. ---/
3257    
3258    /A(*THEN)B|A(*THEN)C/K
3259        AC
3260    
3261    /A(*PRUNE)B|A(*PRUNE)C/K
3262        AC
3263    
3264    /--- A whole lot of tests of verbs with arguments are here rather than in test
3265         11 because Perl doesn't seem to follow its specification entirely
3266         correctly. ---/
3267    
3268    /--- Perl 5.11 sets $REGERROR on the AC failure case here; PCRE does not. It is
3269         not clear how Perl defines "involved in the failure of the match". ---/
3270    
3271    /^(A(*THEN:A)B|C(*THEN:B)D)/K
3272        AB
3273        CD
3274        ** Failers
3275        AC
3276        CB
3277    
3278    /--- Check the use of names for success and failure. PCRE doesn't show these
3279    names for success, though Perl does, contrary to its spec. ---/
3280    
3281    /^(A(*PRUNE:A)B|C(*PRUNE:B)D)/K
3282        AB
3283        CD
3284        ** Failers
3285        AC
3286        CB
3287    
3288    /--- An empty name does not pass back an empty string. It is the same as if no
3289    name were given. ---/
3290    
3291    /^(A(*PRUNE:)B|C(*PRUNE:B)D)/K
3292        AB
3293        CD
3294    
3295    /--- PRUNE goes to next bumpalong; COMMIT does not. ---/
3296    
3297    /A(*PRUNE:A)B/K
3298        ACAB
3299    
3300    /(*MARK:A)(*PRUNE:B)(C|X)/K
3301        C
3302        D
3303    
3304    /(*MARK:A)(*THEN:B)(C|X)/K
3305        C
3306        D
3307    
3308    /--- This should fail, as the skip causes a bump to offset 3 (the skip) ---/
3309    
3310    /A(*MARK:A)A+(*SKIP)(B|Z) | AC/xK
3311        AAAC
3312    
3313    /--- Same --/
3314    
3315    /A(*MARK:A)A+(*MARK:B)(*SKIP:B)(B|Z) | AC/xK
3316        AAAC
3317    
3318    /--- This should fail; the SKIP advances by one, but when we get to AC, the
3319         PRUNE kills it. ---/
3320    
3321    /A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/xK
3322        AAAC
3323    
3324    /A(*:A)A+(*SKIP)(B|Z) | AC/xK
3325        AAAC
3326    
3327    /--- This should fail, as a null name is the same as no name ---/
3328    
3329    /A(*MARK:A)A+(*SKIP:)(B|Z) | AC/xK
3330        AAAC
3331    
3332    /--- This fails in PCRE, and I think that is in accordance with Perl's
3333         documentation, though in Perl it succeeds. ---/
3334    
3335    /A(*MARK:A)A+(*SKIP:B)(B|Z) | AAC/xK
3336        AAAC
3337    
3338    /--- Mark names can be duplicated ---/
3339    
3340    /A(*:A)B|X(*:A)Y/K
3341        AABC
3342        XXYZ
3343    
3344    /^A(*:A)B|^X(*:A)Y/K
3345        ** Failers
3346        XAQQ
3347    
3348    /--- A check on what happens after hitting a mark and them bumping along to
3349    something that does not even start. Perl reports tags after the failures here,
3350    though it does not when the individual letters are made into something
3351    more complicated. ---/
3352    
3353    /A(*:A)B|XX(*:B)Y/K
3354        AABC
3355        XXYZ
3356        ** Failers
3357        XAQQ
3358        XAQQXZZ
3359        AXQQQ
3360        AXXQQQ
3361    
3362    /--- COMMIT at the start of a pattern should be the same as an anchor. Perl
3363    optimizations defeat this. So does the PCRE optimization unless we disable it
3364    with \Y. ---/
3365    
3366    /(*COMMIT)ABC/
3367        ABCDEFG
3368        ** Failers
3369        DEFGABC\Y
3370    
3371    /--- Repeat some tests with added studying. ---/
3372    
3373    /A(*COMMIT)B/+KS
3374        ACABX
3375    
3376    /A(*THEN)B|A(*THEN)C/KS
3377        AC
3378    
3379    /A(*PRUNE)B|A(*PRUNE)C/KS
3380        AC
3381    
3382    /^(A(*THEN:A)B|C(*THEN:B)D)/KS
3383        AB
3384        CD
3385        ** Failers
3386        AC
3387        CB
3388    
3389    /^(A(*PRUNE:A)B|C(*PRUNE:B)D)/KS
3390        AB
3391        CD
3392        ** Failers
3393        AC
3394        CB
3395    
3396    /^(A(*PRUNE:)B|C(*PRUNE:B)D)/KS
3397        AB
3398        CD
3399    
3400    /A(*PRUNE:A)B/KS
3401        ACAB
3402    
3403    /(*MARK:A)(*PRUNE:B)(C|X)/KS
3404        C
3405        D
3406    
3407    /(*MARK:A)(*THEN:B)(C|X)/KS
3408        C
3409        D
3410    
3411    /A(*MARK:A)A+(*SKIP)(B|Z) | AC/xKS
3412        AAAC
3413    
3414    /A(*MARK:A)A+(*MARK:B)(*SKIP:B)(B|Z) | AC/xKS
3415        AAAC
3416    
3417    /A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/xKS
3418        AAAC
3419    
3420    /A(*:A)A+(*SKIP)(B|Z) | AC/xKS
3421        AAAC
3422    
3423    /A(*MARK:A)A+(*SKIP:)(B|Z) | AC/xKS
3424        AAAC
3425    
3426    /A(*MARK:A)A+(*SKIP:B)(B|Z) | AAC/xKS
3427        AAAC
3428    
3429    /A(*:A)B|XX(*:B)Y/KS
3430        AABC
3431        XXYZ
3432        ** Failers
3433        XAQQ
3434        XAQQXZZ
3435        AXQQQ
3436        AXXQQQ
3437    
3438    /(*COMMIT)ABC/
3439        ABCDEFG
3440        ** Failers
3441        DEFGABC\Y
3442    
3443    /^(ab (c+(*THEN)cd) | xyz)/x
3444        abcccd
3445    
3446    /^(ab (c+(*PRUNE)cd) | xyz)/x
3447        abcccd
3448    
3449    /^(ab (c+(*FAIL)cd) | xyz)/x
3450        abcccd
3451    
3452    /--- Perl 5.11 gets some of these wrong ---/
3453    
3454    /(?>.(*ACCEPT))*?5/
3455        abcde
3456    
3457    /(.(*ACCEPT))*?5/
3458        abcde
3459    
3460    /(.(*ACCEPT))5/
3461        abcde
3462    
3463    /(.(*ACCEPT))*5/
3464        abcde
3465    
3466  /-- End of testinput2 --/  /-- End of testinput2 --/

Legend:
Removed from v.450  
changed lines
  Added in v.513

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12