| 1 |
/-- This set of tests is for UTF-8 support, and is relevant only to the 8-bit
|
| 2 |
library. --/
|
| 3 |
|
| 4 |
/X(\C{3})/8
|
| 5 |
X\x{1234}
|
| 6 |
0: X\x{1234}
|
| 7 |
1: \x{1234}
|
| 8 |
|
| 9 |
/X(\C{4})/8
|
| 10 |
X\x{1234}YZ
|
| 11 |
0: X\x{1234}Y
|
| 12 |
1: \x{1234}Y
|
| 13 |
|
| 14 |
/X\C*/8
|
| 15 |
XYZabcdce
|
| 16 |
0: XYZabcdce
|
| 17 |
|
| 18 |
/X\C*?/8
|
| 19 |
XYZabcde
|
| 20 |
0: X
|
| 21 |
|
| 22 |
/X\C{3,5}/8
|
| 23 |
Xabcdefg
|
| 24 |
0: Xabcde
|
| 25 |
X\x{1234}
|
| 26 |
0: X\x{1234}
|
| 27 |
X\x{1234}YZ
|
| 28 |
0: X\x{1234}YZ
|
| 29 |
X\x{1234}\x{512}
|
| 30 |
0: X\x{1234}\x{512}
|
| 31 |
X\x{1234}\x{512}YZ
|
| 32 |
0: X\x{1234}\x{512}
|
| 33 |
|
| 34 |
/X\C{3,5}?/8
|
| 35 |
Xabcdefg
|
| 36 |
0: Xabc
|
| 37 |
X\x{1234}
|
| 38 |
0: X\x{1234}
|
| 39 |
X\x{1234}YZ
|
| 40 |
0: X\x{1234}
|
| 41 |
X\x{1234}\x{512}
|
| 42 |
0: X\x{1234}
|
| 43 |
|
| 44 |
/a\Cb/
|
| 45 |
aXb
|
| 46 |
0: aXb
|
| 47 |
a\nb
|
| 48 |
0: a\x0ab
|
| 49 |
|
| 50 |
/a\Cb/8
|
| 51 |
aXb
|
| 52 |
0: aXb
|
| 53 |
a\nb
|
| 54 |
0: a\x{0a}b
|
| 55 |
|
| 56 |
/a\C\Cb/8
|
| 57 |
a\x{100}b
|
| 58 |
0: a\x{100}b
|
| 59 |
|
| 60 |
/ab\Cde/8
|
| 61 |
abXde
|
| 62 |
0: abXde
|
| 63 |
|
| 64 |
/a\C\Cb/8
|
| 65 |
a\x{100}b
|
| 66 |
0: a\x{100}b
|
| 67 |
** Failers
|
| 68 |
No match
|
| 69 |
a\x{12257}b
|
| 70 |
No match
|
| 71 |
|
| 72 |
/[]/8
|
| 73 |
Failed: invalid UTF-8 string at offset 1
|
| 74 |
|
| 75 |
//8
|
| 76 |
Failed: invalid UTF-8 string at offset 0
|
| 77 |
|
| 78 |
/xxx/8
|
| 79 |
Failed: invalid UTF-8 string at offset 0
|
| 80 |
|
| 81 |
/xxx/8?DZSS
|
| 82 |
------------------------------------------------------------------
|
| 83 |
Bra
|
| 84 |
\X{c0}\X{c0}\X{c0}xxx
|
| 85 |
Ket
|
| 86 |
End
|
| 87 |
------------------------------------------------------------------
|
| 88 |
Capturing subpattern count = 0
|
| 89 |
Options: utf no_utf_check
|
| 90 |
First char = \x{c3}
|
| 91 |
Need char = 'x'
|
| 92 |
|
| 93 |
/abc/8
|
| 94 |
]
|
| 95 |
Error -10 (bad UTF-8 string) offset=0 reason=6
|
| 96 |
|
| 97 |
Error -10 (bad UTF-8 string) offset=0 reason=1
|
| 98 |
|
| 99 |
Error -10 (bad UTF-8 string) offset=0 reason=6
|
| 100 |
\?
|
| 101 |
No match
|
| 102 |
\xe1\x88
|
| 103 |
Error -10 (bad UTF-8 string) offset=0 reason=1
|
| 104 |
\P\xe1\x88
|
| 105 |
Error -10 (bad UTF-8 string) offset=0 reason=1
|
| 106 |
\P\P\xe1\x88
|
| 107 |
Error -25 (short UTF-8 string) offset=0 reason=1
|
| 108 |
XX\xea
|
| 109 |
Error -10 (bad UTF-8 string) offset=2 reason=2
|
| 110 |
\O0XX\xea
|
| 111 |
Error -10 (bad UTF-8 string)
|
| 112 |
\O1XX\xea
|
| 113 |
Error -10 (bad UTF-8 string)
|
| 114 |
\O2XX\xea
|
| 115 |
Error -10 (bad UTF-8 string) offset=2 reason=2
|
| 116 |
XX\xf1
|
| 117 |
Error -10 (bad UTF-8 string) offset=2 reason=3
|
| 118 |
XX\xf8
|
| 119 |
Error -10 (bad UTF-8 string) offset=2 reason=4
|
| 120 |
XX\xfc
|
| 121 |
Error -10 (bad UTF-8 string) offset=2 reason=5
|
| 122 |
ZZ\xea\xaf\x20YY
|
| 123 |
Error -10 (bad UTF-8 string) offset=2 reason=7
|
| 124 |
ZZ\xfd\xbf\xbf\x2f\xbf\xbfYY
|
| 125 |
Error -10 (bad UTF-8 string) offset=2 reason=8
|
| 126 |
ZZ\xfd\xbf\xbf\xbf\x2f\xbfYY
|
| 127 |
Error -10 (bad UTF-8 string) offset=2 reason=9
|
| 128 |
ZZ\xfd\xbf\xbf\xbf\xbf\x2fYY
|
| 129 |
Error -10 (bad UTF-8 string) offset=2 reason=10
|
| 130 |
ZZ\xffYY
|
| 131 |
Error -10 (bad UTF-8 string) offset=2 reason=21
|
| 132 |
ZZ\xfeYY
|
| 133 |
Error -10 (bad UTF-8 string) offset=2 reason=21
|
| 134 |
|
| 135 |
/anything/8
|
| 136 |
\xc0\x80
|
| 137 |
Error -10 (bad UTF-8 string) offset=0 reason=15
|
| 138 |
\xc1\x8f
|
| 139 |
Error -10 (bad UTF-8 string) offset=0 reason=15
|
| 140 |
\xe0\x9f\x80
|
| 141 |
Error -10 (bad UTF-8 string) offset=0 reason=16
|
| 142 |
\xf0\x8f\x80\x80
|
| 143 |
Error -10 (bad UTF-8 string) offset=0 reason=17
|
| 144 |
\xf8\x87\x80\x80\x80
|
| 145 |
Error -10 (bad UTF-8 string) offset=0 reason=18
|
| 146 |
\xfc\x83\x80\x80\x80\x80
|
| 147 |
Error -10 (bad UTF-8 string) offset=0 reason=19
|
| 148 |
\xfe\x80\x80\x80\x80\x80
|
| 149 |
Error -10 (bad UTF-8 string) offset=0 reason=21
|
| 150 |
\xff\x80\x80\x80\x80\x80
|
| 151 |
Error -10 (bad UTF-8 string) offset=0 reason=21
|
| 152 |
\xc3\x8f
|
| 153 |
No match
|
| 154 |
\xe0\xaf\x80
|
| 155 |
No match
|
| 156 |
\xe1\x80\x80
|
| 157 |
No match
|
| 158 |
\xf0\x9f\x80\x80
|
| 159 |
No match
|
| 160 |
\xf1\x8f\x80\x80
|
| 161 |
No match
|
| 162 |
\xf8\x88\x80\x80\x80
|
| 163 |
Error -10 (bad UTF-8 string) offset=0 reason=11
|
| 164 |
\xf9\x87\x80\x80\x80
|
| 165 |
Error -10 (bad UTF-8 string) offset=0 reason=11
|
| 166 |
\xfc\x84\x80\x80\x80\x80
|
| 167 |
Error -10 (bad UTF-8 string) offset=0 reason=12
|
| 168 |
\xfd\x83\x80\x80\x80\x80
|
| 169 |
Error -10 (bad UTF-8 string) offset=0 reason=12
|
| 170 |
\?\xf8\x88\x80\x80\x80
|
| 171 |
No match
|
| 172 |
\?\xf9\x87\x80\x80\x80
|
| 173 |
No match
|
| 174 |
\?\xfc\x84\x80\x80\x80\x80
|
| 175 |
No match
|
| 176 |
\?\xfd\x83\x80\x80\x80\x80
|
| 177 |
No match
|
| 178 |
|
| 179 |
/\x{100}/8DZ
|
| 180 |
------------------------------------------------------------------
|
| 181 |
Bra
|
| 182 |
\x{100}
|
| 183 |
Ket
|
| 184 |
End
|
| 185 |
------------------------------------------------------------------
|
| 186 |
Capturing subpattern count = 0
|
| 187 |
Options: utf
|
| 188 |
First char = \x{c4}
|
| 189 |
Need char = 128
|
| 190 |
|
| 191 |
/\x{1000}/8DZ
|
| 192 |
------------------------------------------------------------------
|
| 193 |
Bra
|
| 194 |
\x{1000}
|
| 195 |
Ket
|
| 196 |
End
|
| 197 |
------------------------------------------------------------------
|
| 198 |
Capturing subpattern count = 0
|
| 199 |
Options: utf
|
| 200 |
First char = \x{e1}
|
| 201 |
Need char = 128
|
| 202 |
|
| 203 |
/\x{10000}/8DZ
|
| 204 |
------------------------------------------------------------------
|
| 205 |
Bra
|
| 206 |
\x{10000}
|
| 207 |
Ket
|
| 208 |
End
|
| 209 |
------------------------------------------------------------------
|
| 210 |
Capturing subpattern count = 0
|
| 211 |
Options: utf
|
| 212 |
First char = \x{f0}
|
| 213 |
Need char = 128
|
| 214 |
|
| 215 |
/\x{100000}/8DZ
|
| 216 |
------------------------------------------------------------------
|
| 217 |
Bra
|
| 218 |
\x{100000}
|
| 219 |
Ket
|
| 220 |
End
|
| 221 |
------------------------------------------------------------------
|
| 222 |
Capturing subpattern count = 0
|
| 223 |
Options: utf
|
| 224 |
First char = \x{f4}
|
| 225 |
Need char = 128
|
| 226 |
|
| 227 |
/\x{10ffff}/8DZ
|
| 228 |
------------------------------------------------------------------
|
| 229 |
Bra
|
| 230 |
\x{10ffff}
|
| 231 |
Ket
|
| 232 |
End
|
| 233 |
------------------------------------------------------------------
|
| 234 |
Capturing subpattern count = 0
|
| 235 |
Options: utf
|
| 236 |
First char = \x{f4}
|
| 237 |
Need char = 191
|
| 238 |
|
| 239 |
/[\x{ff}]/8DZ
|
| 240 |
------------------------------------------------------------------
|
| 241 |
Bra
|
| 242 |
\x{ff}
|
| 243 |
Ket
|
| 244 |
End
|
| 245 |
------------------------------------------------------------------
|
| 246 |
Capturing subpattern count = 0
|
| 247 |
Options: utf
|
| 248 |
First char = \x{c3}
|
| 249 |
Need char = 191
|
| 250 |
|
| 251 |
/[\x{100}]/8DZ
|
| 252 |
------------------------------------------------------------------
|
| 253 |
Bra
|
| 254 |
\x{100}
|
| 255 |
Ket
|
| 256 |
End
|
| 257 |
------------------------------------------------------------------
|
| 258 |
Capturing subpattern count = 0
|
| 259 |
Options: utf
|
| 260 |
First char = \x{c4}
|
| 261 |
Need char = 128
|
| 262 |
|
| 263 |
/\x80/8DZ
|
| 264 |
------------------------------------------------------------------
|
| 265 |
Bra
|
| 266 |
\x{80}
|
| 267 |
Ket
|
| 268 |
End
|
| 269 |
------------------------------------------------------------------
|
| 270 |
Capturing subpattern count = 0
|
| 271 |
Options: utf
|
| 272 |
First char = \x{c2}
|
| 273 |
Need char = 128
|
| 274 |
|
| 275 |
/\xff/8DZ
|
| 276 |
------------------------------------------------------------------
|
| 277 |
Bra
|
| 278 |
\x{ff}
|
| 279 |
Ket
|
| 280 |
End
|
| 281 |
------------------------------------------------------------------
|
| 282 |
Capturing subpattern count = 0
|
| 283 |
Options: utf
|
| 284 |
First char = \x{c3}
|
| 285 |
Need char = 191
|
| 286 |
|
| 287 |
/\x{D55c}\x{ad6d}\x{C5B4}/DZ8
|
| 288 |
------------------------------------------------------------------
|
| 289 |
Bra
|
| 290 |
\x{d55c}\x{ad6d}\x{c5b4}
|
| 291 |
Ket
|
| 292 |
End
|
| 293 |
------------------------------------------------------------------
|
| 294 |
Capturing subpattern count = 0
|
| 295 |
Options: utf
|
| 296 |
First char = \x{ed}
|
| 297 |
Need char = 180
|
| 298 |
\x{D55c}\x{ad6d}\x{C5B4}
|
| 299 |
0: \x{d55c}\x{ad6d}\x{c5b4}
|
| 300 |
|
| 301 |
/\x{65e5}\x{672c}\x{8a9e}/DZ8
|
| 302 |
------------------------------------------------------------------
|
| 303 |
Bra
|
| 304 |
\x{65e5}\x{672c}\x{8a9e}
|
| 305 |
Ket
|
| 306 |
End
|
| 307 |
------------------------------------------------------------------
|
| 308 |
Capturing subpattern count = 0
|
| 309 |
Options: utf
|
| 310 |
First char = \x{e6}
|
| 311 |
Need char = 158
|
| 312 |
\x{65e5}\x{672c}\x{8a9e}
|
| 313 |
0: \x{65e5}\x{672c}\x{8a9e}
|
| 314 |
|
| 315 |
/\x{80}/DZ8
|
| 316 |
------------------------------------------------------------------
|
| 317 |
Bra
|
| 318 |
\x{80}
|
| 319 |
Ket
|
| 320 |
End
|
| 321 |
------------------------------------------------------------------
|
| 322 |
Capturing subpattern count = 0
|
| 323 |
Options: utf
|
| 324 |
First char = \x{c2}
|
| 325 |
Need char = 128
|
| 326 |
|
| 327 |
/\x{084}/DZ8
|
| 328 |
------------------------------------------------------------------
|
| 329 |
Bra
|
| 330 |
\x{84}
|
| 331 |
Ket
|
| 332 |
End
|
| 333 |
------------------------------------------------------------------
|
| 334 |
Capturing subpattern count = 0
|
| 335 |
Options: utf
|
| 336 |
First char = \x{c2}
|
| 337 |
Need char = 132
|
| 338 |
|
| 339 |
/\x{104}/DZ8
|
| 340 |
------------------------------------------------------------------
|
| 341 |
Bra
|
| 342 |
\x{104}
|
| 343 |
Ket
|
| 344 |
End
|
| 345 |
------------------------------------------------------------------
|
| 346 |
Capturing subpattern count = 0
|
| 347 |
Options: utf
|
| 348 |
First char = \x{c4}
|
| 349 |
Need char = 132
|
| 350 |
|
| 351 |
/\x{861}/DZ8
|
| 352 |
------------------------------------------------------------------
|
| 353 |
Bra
|
| 354 |
\x{861}
|
| 355 |
Ket
|
| 356 |
End
|
| 357 |
------------------------------------------------------------------
|
| 358 |
Capturing subpattern count = 0
|
| 359 |
Options: utf
|
| 360 |
First char = \x{e0}
|
| 361 |
Need char = 161
|
| 362 |
|
| 363 |
/\x{212ab}/DZ8
|
| 364 |
------------------------------------------------------------------
|
| 365 |
Bra
|
| 366 |
\x{212ab}
|
| 367 |
Ket
|
| 368 |
End
|
| 369 |
------------------------------------------------------------------
|
| 370 |
Capturing subpattern count = 0
|
| 371 |
Options: utf
|
| 372 |
First char = \x{f0}
|
| 373 |
Need char = 171
|
| 374 |
|
| 375 |
/-- This one is here not because it's different to Perl, but because the way
|
| 376 |
the captured single-byte is displayed. (In Perl it becomes a character, and you
|
| 377 |
can't tell the difference.) --/
|
| 378 |
|
| 379 |
/X(\C)(.*)/8
|
| 380 |
X\x{1234}
|
| 381 |
0: X\x{1234}
|
| 382 |
1: \x{e1}
|
| 383 |
2: \x{88}\x{b4}
|
| 384 |
X\nabc
|
| 385 |
0: X\x{0a}abc
|
| 386 |
1: \x{0a}
|
| 387 |
2: abc
|
| 388 |
|
| 389 |
/-- This one is here because Perl gives out a grumbly error message (quite
|
| 390 |
correctly, but that messes up comparisons). --/
|
| 391 |
|
| 392 |
/a\Cb/8
|
| 393 |
*** Failers
|
| 394 |
No match
|
| 395 |
a\x{100}b
|
| 396 |
No match
|
| 397 |
|
| 398 |
/[^ab\xC0-\xF0]/8SDZ
|
| 399 |
------------------------------------------------------------------
|
| 400 |
Bra
|
| 401 |
[\x00-`c-\xbf\xf1-\xff] (neg)
|
| 402 |
Ket
|
| 403 |
End
|
| 404 |
------------------------------------------------------------------
|
| 405 |
Capturing subpattern count = 0
|
| 406 |
Options: utf
|
| 407 |
No first char
|
| 408 |
No need char
|
| 409 |
Subject length lower bound = 1
|
| 410 |
Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
|
| 411 |
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
|
| 412 |
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
|
| 413 |
5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
|
| 414 |
Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
|
| 415 |
\xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0
|
| 416 |
\xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf
|
| 417 |
\xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee
|
| 418 |
\xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd
|
| 419 |
\xfe \xff
|
| 420 |
\x{f1}
|
| 421 |
0: \x{f1}
|
| 422 |
\x{bf}
|
| 423 |
0: \x{bf}
|
| 424 |
\x{100}
|
| 425 |
0: \x{100}
|
| 426 |
\x{1000}
|
| 427 |
0: \x{1000}
|
| 428 |
*** Failers
|
| 429 |
0: *
|
| 430 |
\x{c0}
|
| 431 |
No match
|
| 432 |
\x{f0}
|
| 433 |
No match
|
| 434 |
|
| 435 |
/Ā{3,4}/8SDZ
|
| 436 |
------------------------------------------------------------------
|
| 437 |
Bra
|
| 438 |
\x{100}{3}
|
| 439 |
\x{100}?
|
| 440 |
Ket
|
| 441 |
End
|
| 442 |
------------------------------------------------------------------
|
| 443 |
Capturing subpattern count = 0
|
| 444 |
Options: utf
|
| 445 |
First char = \x{c4}
|
| 446 |
Need char = 128
|
| 447 |
Subject length lower bound = 3
|
| 448 |
No set of starting bytes
|
| 449 |
\x{100}\x{100}\x{100}\x{100\x{100}
|
| 450 |
0: \x{100}\x{100}\x{100}
|
| 451 |
|
| 452 |
/(\x{100}+|x)/8SDZ
|
| 453 |
------------------------------------------------------------------
|
| 454 |
Bra
|
| 455 |
CBra 1
|
| 456 |
\x{100}+
|
| 457 |
Alt
|
| 458 |
x
|
| 459 |
Ket
|
| 460 |
Ket
|
| 461 |
End
|
| 462 |
------------------------------------------------------------------
|
| 463 |
Capturing subpattern count = 1
|
| 464 |
Options: utf
|
| 465 |
No first char
|
| 466 |
No need char
|
| 467 |
Subject length lower bound = 1
|
| 468 |
Starting byte set: x \xc4
|
| 469 |
|
| 470 |
/(\x{100}*a|x)/8SDZ
|
| 471 |
------------------------------------------------------------------
|
| 472 |
Bra
|
| 473 |
CBra 1
|
| 474 |
\x{100}*+
|
| 475 |
a
|
| 476 |
Alt
|
| 477 |
x
|
| 478 |
Ket
|
| 479 |
Ket
|
| 480 |
End
|
| 481 |
------------------------------------------------------------------
|
| 482 |
Capturing subpattern count = 1
|
| 483 |
Options: utf
|
| 484 |
No first char
|
| 485 |
No need char
|
| 486 |
Subject length lower bound = 1
|
| 487 |
Starting byte set: a x \xc4
|
| 488 |
|
| 489 |
/(\x{100}{0,2}a|x)/8SDZ
|
| 490 |
------------------------------------------------------------------
|
| 491 |
Bra
|
| 492 |
CBra 1
|
| 493 |
\x{100}{0,2}
|
| 494 |
a
|
| 495 |
Alt
|
| 496 |
x
|
| 497 |
Ket
|
| 498 |
Ket
|
| 499 |
End
|
| 500 |
------------------------------------------------------------------
|
| 501 |
Capturing subpattern count = 1
|
| 502 |
Options: utf
|
| 503 |
No first char
|
| 504 |
No need char
|
| 505 |
Subject length lower bound = 1
|
| 506 |
Starting byte set: a x \xc4
|
| 507 |
|
| 508 |
/(\x{100}{1,2}a|x)/8SDZ
|
| 509 |
------------------------------------------------------------------
|
| 510 |
Bra
|
| 511 |
CBra 1
|
| 512 |
\x{100}
|
| 513 |
\x{100}{0,1}
|
| 514 |
a
|
| 515 |
Alt
|
| 516 |
x
|
| 517 |
Ket
|
| 518 |
Ket
|
| 519 |
End
|
| 520 |
------------------------------------------------------------------
|
| 521 |
Capturing subpattern count = 1
|
| 522 |
Options: utf
|
| 523 |
No first char
|
| 524 |
No need char
|
| 525 |
Subject length lower bound = 1
|
| 526 |
Starting byte set: x \xc4
|
| 527 |
|
| 528 |
/\x{100}/8DZ
|
| 529 |
------------------------------------------------------------------
|
| 530 |
Bra
|
| 531 |
\x{100}
|
| 532 |
Ket
|
| 533 |
End
|
| 534 |
------------------------------------------------------------------
|
| 535 |
Capturing subpattern count = 0
|
| 536 |
Options: utf
|
| 537 |
First char = \x{c4}
|
| 538 |
Need char = 128
|
| 539 |
|
| 540 |
/a\x{100}\x{101}*/8DZ
|
| 541 |
------------------------------------------------------------------
|
| 542 |
Bra
|
| 543 |
a\x{100}
|
| 544 |
\x{101}*
|
| 545 |
Ket
|
| 546 |
End
|
| 547 |
------------------------------------------------------------------
|
| 548 |
Capturing subpattern count = 0
|
| 549 |
Options: utf
|
| 550 |
First char = 'a'
|
| 551 |
Need char = 128
|
| 552 |
|
| 553 |
/a\x{100}\x{101}+/8DZ
|
| 554 |
------------------------------------------------------------------
|
| 555 |
Bra
|
| 556 |
a\x{100}
|
| 557 |
\x{101}+
|
| 558 |
Ket
|
| 559 |
End
|
| 560 |
------------------------------------------------------------------
|
| 561 |
Capturing subpattern count = 0
|
| 562 |
Options: utf
|
| 563 |
First char = 'a'
|
| 564 |
Need char = 129
|
| 565 |
|
| 566 |
/[^\x{c4}]/DZ
|
| 567 |
------------------------------------------------------------------
|
| 568 |
Bra
|
| 569 |
[^\xc4]
|
| 570 |
Ket
|
| 571 |
End
|
| 572 |
------------------------------------------------------------------
|
| 573 |
Capturing subpattern count = 0
|
| 574 |
No options
|
| 575 |
No first char
|
| 576 |
No need char
|
| 577 |
|
| 578 |
/[\x{100}]/8DZ
|
| 579 |
------------------------------------------------------------------
|
| 580 |
Bra
|
| 581 |
\x{100}
|
| 582 |
Ket
|
| 583 |
End
|
| 584 |
------------------------------------------------------------------
|
| 585 |
Capturing subpattern count = 0
|
| 586 |
Options: utf
|
| 587 |
First char = \x{c4}
|
| 588 |
Need char = 128
|
| 589 |
\x{100}
|
| 590 |
0: \x{100}
|
| 591 |
Z\x{100}
|
| 592 |
0: \x{100}
|
| 593 |
\x{100}Z
|
| 594 |
0: \x{100}
|
| 595 |
*** Failers
|
| 596 |
No match
|
| 597 |
|
| 598 |
/[\xff]/DZ8
|
| 599 |
------------------------------------------------------------------
|
| 600 |
Bra
|
| 601 |
\x{ff}
|
| 602 |
Ket
|
| 603 |
End
|
| 604 |
------------------------------------------------------------------
|
| 605 |
Capturing subpattern count = 0
|
| 606 |
Options: utf
|
| 607 |
First char = \x{c3}
|
| 608 |
Need char = 191
|
| 609 |
>\x{ff}<
|
| 610 |
0: \x{ff}
|
| 611 |
|
| 612 |
/[^\xff]/8DZ
|
| 613 |
------------------------------------------------------------------
|
| 614 |
Bra
|
| 615 |
[\x00-\xfe] (neg)
|
| 616 |
Ket
|
| 617 |
End
|
| 618 |
------------------------------------------------------------------
|
| 619 |
Capturing subpattern count = 0
|
| 620 |
Options: utf
|
| 621 |
No first char
|
| 622 |
No need char
|
| 623 |
|
| 624 |
/\x{100}abc(xyz(?1))/8DZ
|
| 625 |
------------------------------------------------------------------
|
| 626 |
Bra
|
| 627 |
\x{100}abc
|
| 628 |
CBra 1
|
| 629 |
xyz
|
| 630 |
Recurse
|
| 631 |
Ket
|
| 632 |
Ket
|
| 633 |
End
|
| 634 |
------------------------------------------------------------------
|
| 635 |
Capturing subpattern count = 1
|
| 636 |
Options: utf
|
| 637 |
First char = \x{c4}
|
| 638 |
Need char = 'z'
|
| 639 |
|
| 640 |
/a\x{1234}b/P8
|
| 641 |
a\x{1234}b
|
| 642 |
0: a\x{1234}b
|
| 643 |
|
| 644 |
/\777/8I
|
| 645 |
Capturing subpattern count = 0
|
| 646 |
Options: utf
|
| 647 |
First char = \x{c7}
|
| 648 |
Need char = 191
|
| 649 |
\x{1ff}
|
| 650 |
0: \x{1ff}
|
| 651 |
\777
|
| 652 |
0: \x{1ff}
|
| 653 |
|
| 654 |
/\x{100}+\x{200}/8DZ
|
| 655 |
------------------------------------------------------------------
|
| 656 |
Bra
|
| 657 |
\x{100}++
|
| 658 |
\x{200}
|
| 659 |
Ket
|
| 660 |
End
|
| 661 |
------------------------------------------------------------------
|
| 662 |
Capturing subpattern count = 0
|
| 663 |
Options: utf
|
| 664 |
First char = \x{c4}
|
| 665 |
Need char = 128
|
| 666 |
|
| 667 |
/\x{100}+X/8DZ
|
| 668 |
------------------------------------------------------------------
|
| 669 |
Bra
|
| 670 |
\x{100}++
|
| 671 |
X
|
| 672 |
Ket
|
| 673 |
End
|
| 674 |
------------------------------------------------------------------
|
| 675 |
Capturing subpattern count = 0
|
| 676 |
Options: utf
|
| 677 |
First char = \x{c4}
|
| 678 |
Need char = 'X'
|
| 679 |
|
| 680 |
/^[\QĀ\E-\QŐ\E/BZ8
|
| 681 |
Failed: missing terminating ] for character class at offset 15
|
| 682 |
|
| 683 |
/-- This tests the stricter UTF-8 check according to RFC 3629. --/
|
| 684 |
|
| 685 |
/X/8
|
| 686 |
\x{0}\x{d7ff}\x{e000}\x{10ffff}
|
| 687 |
No match
|
| 688 |
\x{d800}
|
| 689 |
Error -10 (bad UTF-8 string) offset=0 reason=14
|
| 690 |
\x{d800}\?
|
| 691 |
No match
|
| 692 |
\x{da00}
|
| 693 |
Error -10 (bad UTF-8 string) offset=0 reason=14
|
| 694 |
\x{da00}\?
|
| 695 |
No match
|
| 696 |
\x{dfff}
|
| 697 |
Error -10 (bad UTF-8 string) offset=0 reason=14
|
| 698 |
\x{dfff}\?
|
| 699 |
No match
|
| 700 |
\x{110000}
|
| 701 |
Error -10 (bad UTF-8 string) offset=0 reason=13
|
| 702 |
\x{110000}\?
|
| 703 |
No match
|
| 704 |
\x{2000000}
|
| 705 |
Error -10 (bad UTF-8 string) offset=0 reason=11
|
| 706 |
\x{2000000}\?
|
| 707 |
No match
|
| 708 |
\x{7fffffff}
|
| 709 |
Error -10 (bad UTF-8 string) offset=0 reason=12
|
| 710 |
\x{7fffffff}\?
|
| 711 |
No match
|
| 712 |
|
| 713 |
/(*UTF8)\x{1234}/
|
| 714 |
abcd\x{1234}pqr
|
| 715 |
0: \x{1234}
|
| 716 |
|
| 717 |
/(*CRLF)(*UTF8)(*BSR_UNICODE)a\Rb/I
|
| 718 |
Capturing subpattern count = 0
|
| 719 |
Options: bsr_unicode utf
|
| 720 |
Forced newline sequence: CRLF
|
| 721 |
First char = 'a'
|
| 722 |
Need char = 'b'
|
| 723 |
|
| 724 |
/\h/SI8
|
| 725 |
Capturing subpattern count = 0
|
| 726 |
Options: utf
|
| 727 |
No first char
|
| 728 |
No need char
|
| 729 |
Subject length lower bound = 1
|
| 730 |
Starting byte set: \x09 \x20 \xc2 \xe1 \xe2 \xe3
|
| 731 |
ABC\x{09}
|
| 732 |
0: \x{09}
|
| 733 |
ABC\x{20}
|
| 734 |
0:
|
| 735 |
ABC\x{a0}
|
| 736 |
0: \x{a0}
|
| 737 |
ABC\x{1680}
|
| 738 |
0: \x{1680}
|
| 739 |
ABC\x{180e}
|
| 740 |
0: \x{180e}
|
| 741 |
ABC\x{2000}
|
| 742 |
0: \x{2000}
|
| 743 |
ABC\x{202f}
|
| 744 |
0: \x{202f}
|
| 745 |
ABC\x{205f}
|
| 746 |
0: \x{205f}
|
| 747 |
ABC\x{3000}
|
| 748 |
0: \x{3000}
|
| 749 |
|
| 750 |
/\v/SI8
|
| 751 |
Capturing subpattern count = 0
|
| 752 |
Options: utf
|
| 753 |
No first char
|
| 754 |
No need char
|
| 755 |
Subject length lower bound = 1
|
| 756 |
Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2
|
| 757 |
ABC\x{0a}
|
| 758 |
0: \x{0a}
|
| 759 |
ABC\x{0b}
|
| 760 |
0: \x{0b}
|
| 761 |
ABC\x{0c}
|
| 762 |
0: \x{0c}
|
| 763 |
ABC\x{0d}
|
| 764 |
0: \x{0d}
|
| 765 |
ABC\x{85}
|
| 766 |
0: \x{85}
|
| 767 |
ABC\x{2028}
|
| 768 |
0: \x{2028}
|
| 769 |
|
| 770 |
/\h*A/SI8
|
| 771 |
Capturing subpattern count = 0
|
| 772 |
Options: utf
|
| 773 |
No first char
|
| 774 |
Need char = 'A'
|
| 775 |
Subject length lower bound = 1
|
| 776 |
Starting byte set: \x09 \x20 A \xc2 \xe1 \xe2 \xe3
|
| 777 |
CDBABC
|
| 778 |
0: A
|
| 779 |
|
| 780 |
/\v+A/SI8
|
| 781 |
Capturing subpattern count = 0
|
| 782 |
Options: utf
|
| 783 |
No first char
|
| 784 |
Need char = 'A'
|
| 785 |
Subject length lower bound = 2
|
| 786 |
Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2
|
| 787 |
|
| 788 |
/\s?xxx\s/8SI
|
| 789 |
Capturing subpattern count = 0
|
| 790 |
Options: utf
|
| 791 |
No first char
|
| 792 |
Need char = 'x'
|
| 793 |
Subject length lower bound = 4
|
| 794 |
Starting byte set: \x09 \x0a \x0c \x0d \x20 x
|
| 795 |
|
| 796 |
/\sxxx\s/I8ST1
|
| 797 |
Capturing subpattern count = 0
|
| 798 |
Options: utf
|
| 799 |
No first char
|
| 800 |
Need char = 'x'
|
| 801 |
Subject length lower bound = 5
|
| 802 |
Starting byte set: \x09 \x0a \x0c \x0d \x20 \xc2
|
| 803 |
AB\x{85}xxx\x{a0}XYZ
|
| 804 |
0: \x{85}xxx\x{a0}
|
| 805 |
AB\x{a0}xxx\x{85}XYZ
|
| 806 |
0: \x{a0}xxx\x{85}
|
| 807 |
|
| 808 |
/\S \S/I8ST1
|
| 809 |
Capturing subpattern count = 0
|
| 810 |
Options: utf
|
| 811 |
No first char
|
| 812 |
Need char = ' '
|
| 813 |
Subject length lower bound = 3
|
| 814 |
Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e
|
| 815 |
\x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d
|
| 816 |
\x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @
|
| 817 |
A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e
|
| 818 |
f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3
|
| 819 |
\xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2
|
| 820 |
\xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1
|
| 821 |
\xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0
|
| 822 |
\xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
|
| 823 |
\x{a2} \x{84}
|
| 824 |
0: \x{a2} \x{84}
|
| 825 |
A Z
|
| 826 |
0: A Z
|
| 827 |
|
| 828 |
/a+/8
|
| 829 |
a\x{123}aa\>1
|
| 830 |
0: aa
|
| 831 |
a\x{123}aa\>2
|
| 832 |
Error -11 (bad UTF-8 offset)
|
| 833 |
a\x{123}aa\>3
|
| 834 |
0: aa
|
| 835 |
a\x{123}aa\>4
|
| 836 |
0: a
|
| 837 |
a\x{123}aa\>5
|
| 838 |
No match
|
| 839 |
a\x{123}aa\>6
|
| 840 |
Error -24 (bad offset value)
|
| 841 |
|
| 842 |
/\x{1234}+/iS8I
|
| 843 |
Capturing subpattern count = 0
|
| 844 |
Options: caseless utf
|
| 845 |
No first char
|
| 846 |
No need char
|
| 847 |
Subject length lower bound = 1
|
| 848 |
Starting byte set: \xe1
|
| 849 |
|
| 850 |
/\x{1234}+?/iS8I
|
| 851 |
Capturing subpattern count = 0
|
| 852 |
Options: caseless utf
|
| 853 |
No first char
|
| 854 |
No need char
|
| 855 |
Subject length lower bound = 1
|
| 856 |
Starting byte set: \xe1
|
| 857 |
|
| 858 |
/\x{1234}++/iS8I
|
| 859 |
Capturing subpattern count = 0
|
| 860 |
Options: caseless utf
|
| 861 |
No first char
|
| 862 |
No need char
|
| 863 |
Subject length lower bound = 1
|
| 864 |
Starting byte set: \xe1
|
| 865 |
|
| 866 |
/\x{1234}{2}/iS8I
|
| 867 |
Capturing subpattern count = 0
|
| 868 |
Options: caseless utf
|
| 869 |
No first char
|
| 870 |
No need char
|
| 871 |
Subject length lower bound = 2
|
| 872 |
Starting byte set: \xe1
|
| 873 |
|
| 874 |
/[^\x{c4}]/8DZ
|
| 875 |
------------------------------------------------------------------
|
| 876 |
Bra
|
| 877 |
[\x00-\xc3\xc5-\xff] (neg)
|
| 878 |
Ket
|
| 879 |
End
|
| 880 |
------------------------------------------------------------------
|
| 881 |
Capturing subpattern count = 0
|
| 882 |
Options: utf
|
| 883 |
No first char
|
| 884 |
No need char
|
| 885 |
|
| 886 |
/X+\x{200}/8DZ
|
| 887 |
------------------------------------------------------------------
|
| 888 |
Bra
|
| 889 |
X++
|
| 890 |
\x{200}
|
| 891 |
Ket
|
| 892 |
End
|
| 893 |
------------------------------------------------------------------
|
| 894 |
Capturing subpattern count = 0
|
| 895 |
Options: utf
|
| 896 |
First char = 'X'
|
| 897 |
Need char = 128
|
| 898 |
|
| 899 |
/\R/SI8
|
| 900 |
Capturing subpattern count = 0
|
| 901 |
Options: utf
|
| 902 |
No first char
|
| 903 |
No need char
|
| 904 |
Subject length lower bound = 1
|
| 905 |
Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2
|
| 906 |
|
| 907 |
/-- End of testinput17 --/
|