| 120 |
Buginese, |
Buginese, |
| 121 |
Buhid, |
Buhid, |
| 122 |
Canadian_Aboriginal, |
Canadian_Aboriginal, |
| 123 |
|
Carian, |
| 124 |
|
Cham, |
| 125 |
Cherokee, |
Cherokee, |
| 126 |
Common, |
Common, |
| 127 |
Coptic, |
Coptic, |
| 145 |
Inherited, |
Inherited, |
| 146 |
Kannada, |
Kannada, |
| 147 |
Katakana, |
Katakana, |
| 148 |
|
Kayah_Li, |
| 149 |
Kharoshthi, |
Kharoshthi, |
| 150 |
Khmer, |
Khmer, |
| 151 |
Lao, |
Lao, |
| 152 |
Latin, |
Latin, |
| 153 |
|
Lepcha, |
| 154 |
Limbu, |
Limbu, |
| 155 |
Linear_B, |
Linear_B, |
| 156 |
|
Lycian, |
| 157 |
|
Lydian, |
| 158 |
Malayalam, |
Malayalam, |
| 159 |
Mongolian, |
Mongolian, |
| 160 |
Myanmar, |
Myanmar, |
| 163 |
Ogham, |
Ogham, |
| 164 |
Old_Italic, |
Old_Italic, |
| 165 |
Old_Persian, |
Old_Persian, |
| 166 |
|
Ol_Chiki, |
| 167 |
Oriya, |
Oriya, |
| 168 |
Osmanya, |
Osmanya, |
| 169 |
Phags_Pa, |
Phags_Pa, |
| 170 |
Phoenician, |
Phoenician, |
| 171 |
|
Rejang, |
| 172 |
Runic, |
Runic, |
| 173 |
|
Saurashtra, |
| 174 |
Shavian, |
Shavian, |
| 175 |
Sinhala, |
Sinhala, |
| 176 |
|
Sudanese, |
| 177 |
Syloti_Nagri, |
Syloti_Nagri, |
| 178 |
Syriac, |
Syriac, |
| 179 |
Tagalog, |
Tagalog, |
| 186 |
Tibetan, |
Tibetan, |
| 187 |
Tifinagh, |
Tifinagh, |
| 188 |
Ugaritic, |
Ugaritic, |
| 189 |
|
Vai, |
| 190 |
Yi. |
Yi. |
| 191 |
. |
. |
| 192 |
. |
. |
| 242 |
.SH "ANCHORS AND SIMPLE ASSERTIONS" |
.SH "ANCHORS AND SIMPLE ASSERTIONS" |
| 243 |
.rs |
.rs |
| 244 |
.sp |
.sp |
| 245 |
\eb word boundary |
\eb word boundary (only ASCII letters recognized) |
| 246 |
\eB not a word boundary |
\eB not a word boundary |
| 247 |
^ start of subject |
^ start of subject |
| 248 |
also after internal newline in multiline mode |
also after internal newline in multiline mode |
| 271 |
.SH "CAPTURING" |
.SH "CAPTURING" |
| 272 |
.rs |
.rs |
| 273 |
.sp |
.sp |
| 274 |
(...) capturing group |
(...) capturing group |
| 275 |
(?<name>...) named capturing group (Perl) |
(?<name>...) named capturing group (Perl) |
| 276 |
(?'name'...) named capturing group (Perl) |
(?'name'...) named capturing group (Perl) |
| 277 |
(?P<name>...) named capturing group (Python) |
(?P<name>...) named capturing group (Python) |
| 278 |
(?:...) non-capturing group |
(?:...) non-capturing group |
| 279 |
(?|...) non-capturing group; reset group numbers for |
(?|...) non-capturing group; reset group numbers for |
| 280 |
capturing groups in each alternative |
capturing groups in each alternative |
| 281 |
. |
. |
| 282 |
. |
. |
| 283 |
.SH "ATOMIC GROUPS" |
.SH "ATOMIC GROUPS" |
| 284 |
.rs |
.rs |
| 285 |
.sp |
.sp |
| 286 |
(?>...) atomic, non-capturing group |
(?>...) atomic, non-capturing group |
| 287 |
. |
. |
| 288 |
. |
. |
| 289 |
. |
. |
| 291 |
.SH "COMMENT" |
.SH "COMMENT" |
| 292 |
.rs |
.rs |
| 293 |
.sp |
.sp |
| 294 |
(?#....) comment (not nestable) |
(?#....) comment (not nestable) |
| 295 |
. |
. |
| 296 |
. |
. |
| 297 |
.SH "OPTION SETTING" |
.SH "OPTION SETTING" |
| 298 |
.rs |
.rs |
| 299 |
.sp |
.sp |
| 300 |
(?i) caseless |
(?i) caseless |
| 301 |
(?J) allow duplicate names |
(?J) allow duplicate names |
| 302 |
(?m) multiline |
(?m) multiline |
| 303 |
(?s) single line (dotall) |
(?s) single line (dotall) |
| 304 |
(?U) default ungreedy (lazy) |
(?U) default ungreedy (lazy) |
| 305 |
(?x) extended (ignore white space) |
(?x) extended (ignore white space) |
| 306 |
(?-...) unset option(s) |
(?-...) unset option(s) |
| 307 |
|
.sp |
| 308 |
|
The following is recognized only at the start of a pattern or after one of the |
| 309 |
|
newline-setting options with similar syntax: |
| 310 |
|
.sp |
| 311 |
|
(*UTF8) set UTF-8 mode |
| 312 |
. |
. |
| 313 |
. |
. |
| 314 |
.SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS" |
.SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS" |
| 315 |
.rs |
.rs |
| 316 |
.sp |
.sp |
| 317 |
(?=...) positive look ahead |
(?=...) positive look ahead |
| 318 |
(?!...) negative look ahead |
(?!...) negative look ahead |
| 319 |
(?<=...) positive look behind |
(?<=...) positive look behind |
| 320 |
(?<!...) negative look behind |
(?<!...) negative look behind |
| 321 |
.sp |
.sp |
| 322 |
Each top-level branch of a look behind must be of a fixed length. |
Each top-level branch of a look behind must be of a fixed length. |
| 323 |
. |
. |
| 325 |
.SH "BACKREFERENCES" |
.SH "BACKREFERENCES" |
| 326 |
.rs |
.rs |
| 327 |
.sp |
.sp |
| 328 |
\en reference by number (can be ambiguous) |
\en reference by number (can be ambiguous) |
| 329 |
\egn reference by number |
\egn reference by number |
| 330 |
\eg{n} reference by number |
\eg{n} reference by number |
| 331 |
\eg{-n} relative reference by number |
\eg{-n} relative reference by number |
| 332 |
\ek<name> reference by name (Perl) |
\ek<name> reference by name (Perl) |
| 333 |
\ek'name' reference by name (Perl) |
\ek'name' reference by name (Perl) |
| 334 |
\eg{name} reference by name (Perl) |
\eg{name} reference by name (Perl) |
| 335 |
\ek{name} reference by name (.NET) |
\ek{name} reference by name (.NET) |
| 336 |
(?P=name) reference by name (Python) |
(?P=name) reference by name (Python) |
| 337 |
. |
. |
| 338 |
. |
. |
| 339 |
.SH "SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)" |
.SH "SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)" |
| 340 |
.rs |
.rs |
| 341 |
.sp |
.sp |
| 342 |
(?R) recurse whole pattern |
(?R) recurse whole pattern |
| 343 |
(?n) call subpattern by absolute number |
(?n) call subpattern by absolute number |
| 344 |
(?+n) call subpattern by relative number |
(?+n) call subpattern by relative number |
| 345 |
(?-n) call subpattern by relative number |
(?-n) call subpattern by relative number |
| 346 |
(?&name) call subpattern by name (Perl) |
(?&name) call subpattern by name (Perl) |
| 347 |
(?P>name) call subpattern by name (Python) |
(?P>name) call subpattern by name (Python) |
| 348 |
\eg<name> call subpattern by name (Oniguruma) |
\eg<name> call subpattern by name (Oniguruma) |
| 349 |
\eg'name' call subpattern by name (Oniguruma) |
\eg'name' call subpattern by name (Oniguruma) |
| 350 |
\eg<n> call subpattern by absolute number (Oniguruma) |
\eg<n> call subpattern by absolute number (Oniguruma) |
| 351 |
\eg'n' call subpattern by absolute number (Oniguruma) |
\eg'n' call subpattern by absolute number (Oniguruma) |
| 352 |
\eg<+n> call subpattern by relative number (PCRE extension) |
\eg<+n> call subpattern by relative number (PCRE extension) |
| 353 |
\eg'+n' call subpattern by relative number (PCRE extension) |
\eg'+n' call subpattern by relative number (PCRE extension) |
| 354 |
\eg<-n> call subpattern by relative number (PCRE extension) |
\eg<-n> call subpattern by relative number (PCRE extension) |
| 355 |
\eg'-n' call subpattern by relative number (PCRE extension) |
\eg'-n' call subpattern by relative number (PCRE extension) |
| 356 |
. |
. |
| 357 |
. |
. |
| 358 |
.SH "CONDITIONAL PATTERNS" |
.SH "CONDITIONAL PATTERNS" |
| 361 |
(?(condition)yes-pattern) |
(?(condition)yes-pattern) |
| 362 |
(?(condition)yes-pattern|no-pattern) |
(?(condition)yes-pattern|no-pattern) |
| 363 |
.sp |
.sp |
| 364 |
(?(n)... absolute reference condition |
(?(n)... absolute reference condition |
| 365 |
(?(+n)... relative reference condition |
(?(+n)... relative reference condition |
| 366 |
(?(-n)... relative reference condition |
(?(-n)... relative reference condition |
| 367 |
(?(<name>)... named reference condition (Perl) |
(?(<name>)... named reference condition (Perl) |
| 368 |
(?('name')... named reference condition (Perl) |
(?('name')... named reference condition (Perl) |
| 369 |
(?(name)... named reference condition (PCRE) |
(?(name)... named reference condition (PCRE) |
| 370 |
(?(R)... overall recursion condition |
(?(R)... overall recursion condition |
| 371 |
(?(Rn)... specific group recursion condition |
(?(Rn)... specific group recursion condition |
| 372 |
(?(R&name)... specific recursion condition |
(?(R&name)... specific recursion condition |
| 373 |
(?(DEFINE)... define subpattern for reference |
(?(DEFINE)... define subpattern for reference |
| 374 |
(?(assert)... assertion condition |
(?(assert)... assertion condition |
| 375 |
. |
. |
| 376 |
. |
. |
| 377 |
.SH "BACKTRACKING CONTROL" |
.SH "BACKTRACKING CONTROL" |
| 379 |
.sp |
.sp |
| 380 |
The following act immediately they are reached: |
The following act immediately they are reached: |
| 381 |
.sp |
.sp |
| 382 |
(*ACCEPT) force successful match |
(*ACCEPT) force successful match |
| 383 |
(*FAIL) force backtrack; synonym (*F) |
(*FAIL) force backtrack; synonym (*F) |
| 384 |
.sp |
.sp |
| 385 |
The following act only when a subsequent match failure causes a backtrack to |
The following act only when a subsequent match failure causes a backtrack to |
| 386 |
reach them. They all force a match failure, but they differ in what happens |
reach them. They all force a match failure, but they differ in what happens |
| 387 |
afterwards. Those that advance the start-of-match point do so only if the |
afterwards. Those that advance the start-of-match point do so only if the |
| 388 |
pattern is not anchored. |
pattern is not anchored. |
| 389 |
.sp |
.sp |
| 390 |
(*COMMIT) overall failure, no advance of starting point |
(*COMMIT) overall failure, no advance of starting point |
| 391 |
(*PRUNE) advance to next starting character |
(*PRUNE) advance to next starting character |
| 392 |
(*SKIP) advance start to current matching position |
(*SKIP) advance start to current matching position |
| 393 |
(*THEN) local failure, backtrack to next alternation |
(*THEN) local failure, backtrack to next alternation |
| 394 |
. |
. |
| 395 |
. |
. |
| 396 |
.SH "NEWLINE CONVENTIONS" |
.SH "NEWLINE CONVENTIONS" |
| 397 |
.rs |
.rs |
| 398 |
.sp |
.sp |
| 399 |
These are recognized only at the very start of the pattern or after a |
These are recognized only at the very start of the pattern or after a |
| 400 |
(*BSR_...) option. |
(*BSR_...) or (*UTF8) option. |
| 401 |
.sp |
.sp |
| 402 |
(*CR) |
(*CR) carriage return only |
| 403 |
(*LF) |
(*LF) linefeed only |
| 404 |
(*CRLF) |
(*CRLF) carriage return followed by linefeed |
| 405 |
(*ANYCRLF) |
(*ANYCRLF) all three of the above |
| 406 |
(*ANY) |
(*ANY) any Unicode newline sequence |
| 407 |
. |
. |
| 408 |
. |
. |
| 409 |
.SH "WHAT \eR MATCHES" |
.SH "WHAT \eR MATCHES" |
| 410 |
.rs |
.rs |
| 411 |
.sp |
.sp |
| 412 |
These are recognized only at the very start of the pattern or after a |
These are recognized only at the very start of the pattern or after a |
| 413 |
(*...) option that sets the newline convention. |
(*...) option that sets the newline convention or UTF-8 mode. |
| 414 |
.sp |
.sp |
| 415 |
(*BSR_ANYCRLF) |
(*BSR_ANYCRLF) CR, LF, or CRLF |
| 416 |
(*BSR_UNICODE) |
(*BSR_UNICODE) any Unicode newline sequence |
| 417 |
. |
. |
| 418 |
. |
. |
| 419 |
.SH "CALLOUTS" |
.SH "CALLOUTS" |
| 444 |
.rs |
.rs |
| 445 |
.sp |
.sp |
| 446 |
.nf |
.nf |
| 447 |
Last updated: 09 April 2008 |
Last updated: 11 April 2009 |
| 448 |
Copyright (c) 1997-2008 University of Cambridge. |
Copyright (c) 1997-2009 University of Cambridge. |
| 449 |
.fi |
.fi |