/[pcre]/code/trunk/testdata/testinput2
ViewVC logotype

Diff of /code/trunk/testdata/testinput2

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 450 by ph10, Wed Sep 16 10:56:40 2009 UTC revision 566 by ph10, Wed Nov 3 18:32:55 2010 UTC
# Line 2  Line 2 
2      of PCRE's API, error diagnostics, and the compiled code of some patterns.      of PCRE's API, error diagnostics, and the compiled code of some patterns.
3      It also checks the non-Perl syntax the PCRE supports (Python, .NET,      It also checks the non-Perl syntax the PCRE supports (Python, .NET,
4      Oniguruma). Finally, there are some tests where PCRE and Perl differ,      Oniguruma). Finally, there are some tests where PCRE and Perl differ,
5      either because PCRE can't be compatible, or there is potential Perl      either because PCRE can't be compatible, or there is a possible Perl
6      bug. --/      bug. --/
7    
8  /-- Originally, the Perl 5.10 things were in here too, but now I have separated  /-- Originally, the Perl >= 5.10 things were in here too, but now I have
9      many (most?) of them out into test 11. However, there may still be some      separated many (most?) of them out into test 11. However, there may still
10      that were overlooked. --/      be some that were overlooked. --/
11    
12  /(a)b|/I  /(a)b|/I
13    
# Line 51  Line 51 
51    
52  /(?X)[\B]/  /(?X)[\B]/
53    
54    /(?X)[\R]/
55    
56    /(?X)[\X]/
57    
58    /[\B]/BZ
59    
60    /[\R]/BZ
61    
62    /[\X]/BZ
63    
64  /[z-a]/  /[z-a]/
65    
66  /^*/  /^*/
# Line 344  Line 354 
354      *** Failers      *** Failers
355      a      a
356    
357  /This one is here because I think Perl 5.005_02 gets the setting of $1 wrong/I  /This one is here because Perl behaves differently; see also the following/I
358    
359  /^(a\1?){4}$/I  /^(a\1?){4}$/I
360        aaaa
361      aaaaaa      aaaaaa
362    
363    /Perl does not fail these two for the final subjects. Neither did PCRE until/
364    /release 8.01. The problem is in backtracking into a subpattern that contains/
365    /a recursive reference to itself. PCRE has now made these into atomic patterns./
366    
367    /^(xa|=?\1a){2}$/
368        xa=xaa
369        ** Failers
370        xa=xaaa
371    
372    /^(xa|=?\1a)+$/
373        xa=xaa
374        ** Failers
375        xa=xaaa
376    
377  /These are syntax tests from Perl 5.005/I  /These are syntax tests from Perl 5.005/I
378    
# Line 1133  Line 1158 
1158    
1159  /(a(?1)+b)/DZ  /(a(?1)+b)/DZ
1160    
 /^\W*(?:((.)\W*(?1)\W*\2|)|((.)\W*(?3)\W*\4|\W*.\W*))\W*$/Ii  
     1221  
     Satan, oscillate my metallic sonatas!  
     A man, a plan, a canal: Panama!  
     Able was I ere I saw Elba.  
     *** Failers  
     The quick brown fox  
   
1161  /^(\d+|\((?1)([+*-])(?1)\)|-(?1))$/I  /^(\d+|\((?1)([+*-])(?1)\)|-(?1))$/I
1162      12      12
1163      (((2+2)*-3)-7)      (((2+2)*-3)-7)
# Line 2272  a random value. /Ix Line 2289  a random value. /Ix
2289  /a+b?(*THEN)c+(*FAIL)/C  /a+b?(*THEN)c+(*FAIL)/C
2290      aaabccc      aaabccc
2291    
 /a(*PRUNE:XXX)b/  
   
2292  /a(*MARK)b/  /a(*MARK)b/
2293    
2294  /(?i:A{1,}\6666666666)/  /(?i:A{1,}\6666666666)/
# Line 2331  a random value. /Ix Line 2346  a random value. /Ix
2346      a\nb      a\nb
2347      a\r\nb      a\r\nb
2348      a\x85b      a\x85b
2349    
2350    /(*ANY).*/g
2351        abc\r\ndef
2352    
2353    /(*ANYCRLF).*/g
2354        abc\r\ndef
2355    
2356    /(*CRLF).*/g
2357        abc\r\ndef
2358    
2359  /a\Rb/I<bsr_anycrlf>  /a\Rb/I<bsr_anycrlf>
2360      a\rb      a\rb
# Line 2853  a random value. /Ix Line 2877  a random value. /Ix
2877  /^X(?7)(a)(?|(b)|(q)(r)(s))(c)(d)(Y)/  /^X(?7)(a)(?|(b)|(q)(r)(s))(c)(d)(Y)/
2878      XYabcdY      XYabcdY
2879    
2880    /(?<=b(?1)|zzz)(a)/
2881        xbaax
2882        xzzzax
2883    
2884    /(a)(?<=b\1)/
2885    
2886    /(a)(?<=b+(?1))/
2887    
2888    /(a+)(?<=b(?1))/
2889    
2890    /(a(?<=b(?1)))/
2891    
2892    /(?<=b(?1))xyz/
2893    
2894    /(?<=b(?1))xyz(b+)pqrstuvew/
2895    
2896    /(a|bc)\1/SI
2897    
2898    /(a|bc)\1{2,3}/SI
2899    
2900    /(a|bc)(?1)/SI
2901    
2902    /(a|b\1)(a|b\1)/SI
2903    
2904    /(a|b\1){2}/SI
2905    
2906    /(a|bbbb\1)(a|bbbb\1)/SI
2907    
2908    /(a|bbbb\1){2}/SI
2909    
2910    /^From +([^ ]+) +[a-zA-Z][a-zA-Z][a-zA-Z] +[a-zA-Z][a-zA-Z][a-zA-Z] +[0-9]?[0-9] +[0-9][0-9]:[0-9][0-9]/SI
2911    
2912    /  (?: [\040\t] |  \(
2913    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2914    \)  )*                          # optional leading comment
2915    (?:    (?:
2916    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2917    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2918    |
2919    " (?:                      # opening quote...
2920    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
2921    |                     #    or
2922    \\ [^\x80-\xff]           #   Escaped something (something != CR)
2923    )* "  # closing quote
2924    )                    # initial word
2925    (?:  (?: [\040\t] |  \(
2926    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2927    \)  )*  \.  (?: [\040\t] |  \(
2928    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2929    \)  )*   (?:
2930    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2931    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2932    |
2933    " (?:                      # opening quote...
2934    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
2935    |                     #    or
2936    \\ [^\x80-\xff]           #   Escaped something (something != CR)
2937    )* "  # closing quote
2938    )  )* # further okay, if led by a period
2939    (?: [\040\t] |  \(
2940    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2941    \)  )*  @  (?: [\040\t] |  \(
2942    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2943    \)  )*    (?:
2944    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2945    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2946    |   \[                         # [
2947    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
2948    \]                        #           ]
2949    )                           # initial subdomain
2950    (?:                                  #
2951    (?: [\040\t] |  \(
2952    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2953    \)  )*  \.                        # if led by a period...
2954    (?: [\040\t] |  \(
2955    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2956    \)  )*   (?:
2957    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2958    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2959    |   \[                         # [
2960    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
2961    \]                        #           ]
2962    )                     #   ...further okay
2963    )*
2964    # address
2965    |                     #  or
2966    (?:
2967    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2968    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2969    |
2970    " (?:                      # opening quote...
2971    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
2972    |                     #    or
2973    \\ [^\x80-\xff]           #   Escaped something (something != CR)
2974    )* "  # closing quote
2975    )             # one word, optionally followed by....
2976    (?:
2977    [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037]  |  # atom and space parts, or...
2978    \(
2979    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2980    \)       |  # comments, or...
2981    
2982    " (?:                      # opening quote...
2983    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
2984    |                     #    or
2985    \\ [^\x80-\xff]           #   Escaped something (something != CR)
2986    )* "  # closing quote
2987    # quoted strings
2988    )*
2989    <  (?: [\040\t] |  \(
2990    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2991    \)  )*                     # leading <
2992    (?:  @  (?: [\040\t] |  \(
2993    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
2994    \)  )*    (?:
2995    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
2996    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
2997    |   \[                         # [
2998    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
2999    \]                        #           ]
3000    )                           # initial subdomain
3001    (?:                                  #
3002    (?: [\040\t] |  \(
3003    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3004    \)  )*  \.                        # if led by a period...
3005    (?: [\040\t] |  \(
3006    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3007    \)  )*   (?:
3008    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3009    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3010    |   \[                         # [
3011    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3012    \]                        #           ]
3013    )                     #   ...further okay
3014    )*
3015    
3016    (?:  (?: [\040\t] |  \(
3017    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3018    \)  )*  ,  (?: [\040\t] |  \(
3019    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3020    \)  )*  @  (?: [\040\t] |  \(
3021    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3022    \)  )*    (?:
3023    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3024    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3025    |   \[                         # [
3026    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3027    \]                        #           ]
3028    )                           # initial subdomain
3029    (?:                                  #
3030    (?: [\040\t] |  \(
3031    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3032    \)  )*  \.                        # if led by a period...
3033    (?: [\040\t] |  \(
3034    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3035    \)  )*   (?:
3036    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3037    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3038    |   \[                         # [
3039    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3040    \]                        #           ]
3041    )                     #   ...further okay
3042    )*
3043    )* # further okay, if led by comma
3044    :                                # closing colon
3045    (?: [\040\t] |  \(
3046    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3047    \)  )*  )? #       optional route
3048    (?:
3049    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3050    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3051    |
3052    " (?:                      # opening quote...
3053    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
3054    |                     #    or
3055    \\ [^\x80-\xff]           #   Escaped something (something != CR)
3056    )* "  # closing quote
3057    )                    # initial word
3058    (?:  (?: [\040\t] |  \(
3059    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3060    \)  )*  \.  (?: [\040\t] |  \(
3061    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3062    \)  )*   (?:
3063    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3064    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3065    |
3066    " (?:                      # opening quote...
3067    [^\\\x80-\xff\n\015"]                #   Anything except backslash and quote
3068    |                     #    or
3069    \\ [^\x80-\xff]           #   Escaped something (something != CR)
3070    )* "  # closing quote
3071    )  )* # further okay, if led by a period
3072    (?: [\040\t] |  \(
3073    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3074    \)  )*  @  (?: [\040\t] |  \(
3075    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3076    \)  )*    (?:
3077    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3078    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3079    |   \[                         # [
3080    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3081    \]                        #           ]
3082    )                           # initial subdomain
3083    (?:                                  #
3084    (?: [\040\t] |  \(
3085    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3086    \)  )*  \.                        # if led by a period...
3087    (?: [\040\t] |  \(
3088    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3089    \)  )*   (?:
3090    [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+    # some number of atom characters...
3091    (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom
3092    |   \[                         # [
3093    (?: [^\\\x80-\xff\n\015\[\]] |  \\ [^\x80-\xff]  )*    #    stuff
3094    \]                        #           ]
3095    )                     #   ...further okay
3096    )*
3097    #       address spec
3098    (?: [\040\t] |  \(
3099    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3100    \)  )*  > #                  trailing >
3101    # name and address
3102    )  (?: [\040\t] |  \(
3103    (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  |  \( (?:  [^\\\x80-\xff\n\015()]  |  \\ [^\x80-\xff]  )* \)  )*
3104    \)  )*                       # optional trailing comment
3105    /xSI
3106    
3107    /<tr([\w\W\s\d][^<>]{0,})><TD([\w\W\s\d][^<>]{0,})>([\d]{0,}\.)(.*)((<BR>([\w\W\s\d][^<>]{0,})|[\s]{0,}))<\/a><\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><TD([\w\W\s\d][^<>]{0,})>([\w\W\s\d][^<>]{0,})<\/TD><\/TR>/isIS
3108    
3109    "(?>.*/)foo"SI
3110    
3111    /(?(?=[^a-z]+[a-z])  \d{2}-[a-z]{3}-\d{2}  |  \d{2}-\d{2}-\d{2} ) /xSI
3112    
3113    /(?:(?:(?:(?:(?:(?:(?:(?:(?:(a|b|c))))))))))/iSI
3114    
3115    /(?:c|d)(?:)(?:aaaaaaaa(?:)(?:bbbbbbbb)(?:bbbbbbbb(?:))(?:bbbbbbbb(?:)(?:bbbbbbbb)))/SI
3116    
3117    /<a[\s]+href[\s]*=[\s]*          # find <a href=
3118     ([\"\'])?                       # find single or double quote
3119     (?(1) (.*?)\1 | ([^\s]+))       # if quote found, match up to next matching
3120                                     # quote, otherwise match up to next space
3121    /isxSI
3122    
3123    /^(?!:)                       # colon disallowed at start
3124      (?:                         # start of item
3125        (?: [0-9a-f]{1,4} |       # 1-4 hex digits or
3126        (?(1)0 | () ) )           # if null previously matched, fail; else null
3127        :                         # followed by colon
3128      ){1,7}                      # end item; 1-7 of them required
3129      [0-9a-f]{1,4} $             # final hex number at end of string
3130      (?(1)|.)                    # check that there was an empty component
3131      /xiIS
3132    
3133    /(?|(?<a>A)|(?<a>B))/I
3134        AB\Ca
3135        BA\Ca
3136    
3137    /(?|(?<a>A)|(?<b>B))/
3138    
3139    /(?:a(?<quote> (?<apostrophe>')|(?<realquote>")) |
3140        b(?<quote> (?<apostrophe>')|(?<realquote>")) )
3141        (?('quote')[a-z]+|[0-9]+)/JIx
3142        a"aaaaa
3143        b"aaaaa
3144        ** Failers
3145        b"11111
3146        a"11111
3147    
3148    /^(?|(a)(b)(c)(?<D>d)|(?<D>e)) (?('D')X|Y)/JDZx
3149        abcdX
3150        eX
3151        ** Failers
3152        abcdY
3153        ey
3154    
3155    /(?<A>a) (b)(c)  (?<A>d  (?(R&A)$ | (?4)) )/JDZx
3156        abcdd
3157        ** Failers
3158        abcdde
3159    
3160    /abcd*/
3161        xxxxabcd\P
3162        xxxxabcd\P\P
3163    
3164    /abcd*/i
3165        xxxxabcd\P
3166        xxxxabcd\P\P
3167        XXXXABCD\P
3168        XXXXABCD\P\P
3169    
3170    /abc\d*/
3171        xxxxabc1\P
3172        xxxxabc1\P\P
3173    
3174    /(a)bc\1*/
3175        xxxxabca\P
3176        xxxxabca\P\P
3177    
3178    /abc[de]*/
3179        xxxxabcde\P
3180        xxxxabcde\P\P
3181    
3182    /-- This is not in the Perl >= 5.10 test because Perl seems currently to be
3183        broken and not behaving as specified in that it *does* bumpalong after
3184        hitting (*COMMIT). --/
3185    
3186    /(?1)(A(*COMMIT)|B)D/
3187        ABD
3188        XABD
3189        BAD
3190        ABXABD
3191        ** Failers
3192        ABX
3193        BAXBAD
3194    
3195    /(\3)(\1)(a)/<JS>
3196        cat
3197    
3198    /(\3)(\1)(a)/SI<JS>
3199        cat
3200    
3201    /(\3)(\1)(a)/SI
3202        cat
3203    
3204    /i(?(DEFINE)(?<s>a))/SI
3205        i
3206    
3207    /()i(?(1)a)/SI
3208        ia
3209    
3210    /(?i)a(?-i)b|c/BZ
3211        XabX
3212        XAbX
3213        CcC
3214        ** Failers
3215        XABX
3216    
3217    /(?i)a(?s)b|c/BZ
3218    
3219    /(?i)a(?s-i)b|c/BZ
3220    
3221    /^(ab(c\1)d|x){2}$/BZ
3222        xabcxd
3223    
3224    /^(?&t)*+(?(DEFINE)(?<t>.))$/BZ
3225    
3226    /^(?&t)*(?(DEFINE)(?<t>.))$/BZ
3227    
3228    / -- The first four of these are not in the Perl >= 5.10 test because Perl
3229         documents that the use of \K in assertions is "not well defined". The
3230         last is here because Perl gives the match as "b" rather than "ab". I
3231         believe this to be a Perl bug. --/
3232    
3233    /(?=a\Kb)ab/
3234        ab
3235    
3236    /(?!a\Kb)ac/
3237        ac
3238    
3239    /^abc(?<=b\Kc)d/
3240        abcd
3241    
3242    /^abc(?<!b\Kq)d/
3243        abcd
3244    
3245    /(?>a\Kb)z|(ab)/
3246        ab
3247    
3248    /----------------------/
3249    
3250    /(?P<L1>(?P<L2>0|)|(?P>L2)(?P>L1))/
3251    
3252    /abc(*MARK:)pqr/
3253    
3254    /abc(*:)pqr/
3255    
3256    /abc(*FAIL:123)xyz/
3257    
3258    /--- This should, and does, fail. In Perl, it does not, which I think is a
3259         bug because replacing the B in the pattern by (B|D) does make it fail. ---/
3260    
3261    /A(*COMMIT)B/+K
3262        ACABX
3263    
3264    /--- These should be different, but in Perl 5.11 are not, which I think
3265         is a bug in Perl. ---/
3266    
3267    /A(*THEN)B|A(*THEN)C/K
3268        AC
3269    
3270    /A(*PRUNE)B|A(*PRUNE)C/K
3271        AC
3272    
3273    /--- A whole lot of tests of verbs with arguments are here rather than in test
3274         11 because Perl doesn't seem to follow its specification entirely
3275         correctly. ---/
3276    
3277    /--- Perl 5.11 sets $REGERROR on the AC failure case here; PCRE does not. It is
3278         not clear how Perl defines "involved in the failure of the match". ---/
3279    
3280    /^(A(*THEN:A)B|C(*THEN:B)D)/K
3281        AB
3282        CD
3283        ** Failers
3284        AC
3285        CB
3286    
3287    /--- Check the use of names for success and failure. PCRE doesn't show these
3288    names for success, though Perl does, contrary to its spec. ---/
3289    
3290    /^(A(*PRUNE:A)B|C(*PRUNE:B)D)/K
3291        AB
3292        CD
3293        ** Failers
3294        AC
3295        CB
3296    
3297    /--- An empty name does not pass back an empty string. It is the same as if no
3298    name were given. ---/
3299    
3300    /^(A(*PRUNE:)B|C(*PRUNE:B)D)/K
3301        AB
3302        CD
3303    
3304    /--- PRUNE goes to next bumpalong; COMMIT does not. ---/
3305    
3306    /A(*PRUNE:A)B/K
3307        ACAB
3308    
3309    /(*MARK:A)(*PRUNE:B)(C|X)/K
3310        C
3311        D
3312    
3313    /(*MARK:A)(*THEN:B)(C|X)/K
3314        C
3315        D
3316    
3317    /--- This should fail, as the skip causes a bump to offset 3 (the skip) ---/
3318    
3319    /A(*MARK:A)A+(*SKIP)(B|Z) | AC/xK
3320        AAAC
3321    
3322    /--- Same --/
3323    
3324    /A(*MARK:A)A+(*MARK:B)(*SKIP:B)(B|Z) | AC/xK
3325        AAAC
3326    
3327    /--- This should fail; the SKIP advances by one, but when we get to AC, the
3328         PRUNE kills it. ---/
3329    
3330    /A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/xK
3331        AAAC
3332    
3333    /A(*:A)A+(*SKIP)(B|Z) | AC/xK
3334        AAAC
3335    
3336    /--- This should fail, as a null name is the same as no name ---/
3337    
3338    /A(*MARK:A)A+(*SKIP:)(B|Z) | AC/xK
3339        AAAC
3340    
3341    /--- This fails in PCRE, and I think that is in accordance with Perl's
3342         documentation, though in Perl it succeeds. ---/
3343    
3344    /A(*MARK:A)A+(*SKIP:B)(B|Z) | AAC/xK
3345        AAAC
3346    
3347    /--- Mark names can be duplicated ---/
3348    
3349    /A(*:A)B|X(*:A)Y/K
3350        AABC
3351        XXYZ
3352    
3353    /^A(*:A)B|^X(*:A)Y/K
3354        ** Failers
3355        XAQQ
3356    
3357    /--- A check on what happens after hitting a mark and them bumping along to
3358    something that does not even start. Perl reports tags after the failures here,
3359    though it does not when the individual letters are made into something
3360    more complicated. ---/
3361    
3362    /A(*:A)B|XX(*:B)Y/K
3363        AABC
3364        XXYZ
3365        ** Failers
3366        XAQQ
3367        XAQQXZZ
3368        AXQQQ
3369        AXXQQQ
3370    
3371    /--- COMMIT at the start of a pattern should be the same as an anchor. Perl
3372    optimizations defeat this. So does the PCRE optimization unless we disable it
3373    with \Y. ---/
3374    
3375    /(*COMMIT)ABC/
3376        ABCDEFG
3377        ** Failers
3378        DEFGABC\Y
3379    
3380    /--- Repeat some tests with added studying. ---/
3381    
3382    /A(*COMMIT)B/+KS
3383        ACABX
3384    
3385    /A(*THEN)B|A(*THEN)C/KS
3386        AC
3387    
3388    /A(*PRUNE)B|A(*PRUNE)C/KS
3389        AC
3390    
3391    /^(A(*THEN:A)B|C(*THEN:B)D)/KS
3392        AB
3393        CD
3394        ** Failers
3395        AC
3396        CB
3397    
3398    /^(A(*PRUNE:A)B|C(*PRUNE:B)D)/KS
3399        AB
3400        CD
3401        ** Failers
3402        AC
3403        CB
3404    
3405    /^(A(*PRUNE:)B|C(*PRUNE:B)D)/KS
3406        AB
3407        CD
3408    
3409    /A(*PRUNE:A)B/KS
3410        ACAB
3411    
3412    /(*MARK:A)(*PRUNE:B)(C|X)/KS
3413        C
3414        D
3415    
3416    /(*MARK:A)(*THEN:B)(C|X)/KS
3417        C
3418        D
3419    
3420    /A(*MARK:A)A+(*SKIP)(B|Z) | AC/xKS
3421        AAAC
3422    
3423    /A(*MARK:A)A+(*MARK:B)(*SKIP:B)(B|Z) | AC/xKS
3424        AAAC
3425    
3426    /A(*PRUNE:A)A+(*SKIP:A)(B|Z) | AC/xKS
3427        AAAC
3428    
3429    /A(*:A)A+(*SKIP)(B|Z) | AC/xKS
3430        AAAC
3431    
3432    /A(*MARK:A)A+(*SKIP:)(B|Z) | AC/xKS
3433        AAAC
3434    
3435    /A(*MARK:A)A+(*SKIP:B)(B|Z) | AAC/xKS
3436        AAAC
3437    
3438    /A(*:A)B|XX(*:B)Y/KS
3439        AABC
3440        XXYZ
3441        ** Failers
3442        XAQQ
3443        XAQQXZZ
3444        AXQQQ
3445        AXXQQQ
3446    
3447    /(*COMMIT)ABC/
3448        ABCDEFG
3449        ** Failers
3450        DEFGABC\Y
3451    
3452    /^(ab (c+(*THEN)cd) | xyz)/x
3453        abcccd
3454    
3455    /^(ab (c+(*PRUNE)cd) | xyz)/x
3456        abcccd
3457    
3458    /^(ab (c+(*FAIL)cd) | xyz)/x
3459        abcccd
3460    
3461    /--- Perl 5.11 gets some of these wrong ---/
3462    
3463    /(?>.(*ACCEPT))*?5/
3464        abcde
3465    
3466    /(.(*ACCEPT))*?5/
3467        abcde
3468    
3469    /(.(*ACCEPT))5/
3470        abcde
3471    
3472    /(.(*ACCEPT))*5/
3473        abcde
3474    
3475    /A\NB./BZ
3476        ACBD
3477        *** Failers
3478        A\nB
3479        ACB\n
3480    
3481    /A\NB./sBZ
3482        ACBD
3483        ACB\n
3484        *** Failers
3485        A\nB
3486    
3487    /A\NB/<crlf>
3488        A\nB
3489        A\rB
3490        ** Failers
3491        A\r\nB
3492    
3493    /\R+b/BZ
3494    
3495    /\R+\n/BZ
3496    
3497    /\R+\d/BZ
3498    
3499    /\d*\R/BZ
3500    
3501    /\s*\R/BZ
3502    
3503    /-- Perl treats this one differently, not failing the second string. I believe
3504        that is a bug in Perl. --/
3505    
3506    /^((abc|abcx)(*THEN)y|abcd)/
3507        abcd
3508        *** Failers
3509        abcxy
3510    
3511    /(?<=abc)def/
3512        abc\P\P
3513    
3514    /abc$/
3515        abc
3516        abc\P
3517        abc\P\P
3518    
3519    /abc$/m
3520        abc
3521        abc\n
3522        abc\P\P
3523        abc\n\P\P
3524        abc\P
3525        abc\n\P
3526    
3527    /abc\z/
3528        abc
3529        abc\P
3530        abc\P\P
3531    
3532    /abc\Z/
3533        abc
3534        abc\P
3535        abc\P\P
3536    
3537    /abc\b/
3538        abc
3539        abc\P
3540        abc\P\P
3541    
3542    /abc\B/
3543        abc
3544        abc\P
3545        abc\P\P
3546    
3547  /-- End of testinput2 --/  /-- End of testinput2 --/

Legend:
Removed from v.450  
changed lines
  Added in v.566

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12