| 6 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
| 7 |
|
|
| 8 |
Written by Philip Hazel |
Written by Philip Hazel |
| 9 |
Copyright (c) 1997-2010 University of Cambridge |
Copyright (c) 1997-2012 University of Cambridge |
| 10 |
|
|
| 11 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 12 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
| 44 |
|
|
| 45 |
(1) It is #included by pcre_compile.c when it is compiled in debugging mode |
(1) It is #included by pcre_compile.c when it is compiled in debugging mode |
| 46 |
(PCRE_DEBUG defined in pcre_internal.h). It is not included in production |
(PCRE_DEBUG defined in pcre_internal.h). It is not included in production |
| 47 |
compiles. |
compiles. In this case PCRE_INCLUDED is defined. |
| 48 |
|
|
| 49 |
(2) It is also compiled separately and linked with pcretest.c, which can be |
(2) It is also compiled separately and linked with pcretest.c, which can be |
| 50 |
asked to print out a compiled regex for debugging purposes. */ |
asked to print out a compiled regex for debugging purposes. */ |
| 51 |
|
|
| 52 |
|
#ifndef PCRE_INCLUDED |
| 53 |
|
|
| 54 |
#ifdef HAVE_CONFIG_H |
#ifdef HAVE_CONFIG_H |
| 55 |
#include "config.h" |
#include "config.h" |
| 56 |
#endif |
#endif |
| 57 |
|
|
| 58 |
|
/* For pcretest program. */ |
| 59 |
|
#define PRIV(name) name |
| 60 |
|
|
| 61 |
/* We have to include pcre_internal.h because we need the internal info for |
/* We have to include pcre_internal.h because we need the internal info for |
| 62 |
displaying the results of pcre_study() and we also need to know about the |
displaying the results of pcre_study() and we also need to know about the |
| 63 |
internal macros, structures, and other internal data values; pcretest has |
internal macros, structures, and other internal data values; pcretest has |
| 73 |
/* These are the funtions that are contained within. It doesn't seem worth |
/* These are the funtions that are contained within. It doesn't seem worth |
| 74 |
having a separate .h file just for this. */ |
having a separate .h file just for this. */ |
| 75 |
|
|
| 76 |
|
#endif /* PCRE_INCLUDED */ |
| 77 |
|
|
| 78 |
|
#ifdef PCRE_INCLUDED |
| 79 |
|
static /* Keep the following function as private. */ |
| 80 |
|
#endif |
| 81 |
#ifdef COMPILE_PCRE8 |
#ifdef COMPILE_PCRE8 |
| 82 |
void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths); |
void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths); |
| 83 |
#endif |
#else |
|
#ifdef COMPILE_PCRE16 |
|
| 84 |
void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths); |
void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths); |
| 85 |
#endif |
#endif |
| 86 |
|
|
| 97 |
|
|
| 98 |
/* The table of operator names. */ |
/* The table of operator names. */ |
| 99 |
|
|
| 100 |
static const char *OP_names[] = { OP_NAME_LIST }; |
static const char *priv_OP_names[] = { OP_NAME_LIST }; |
| 101 |
|
|
| 102 |
/* This table of operator lengths is not actually used by the working code, |
/* This table of operator lengths is not actually used by the working code, |
| 103 |
but its size is needed for a check that ensures it is the correct size for the |
but its size is needed for a check that ensures it is the correct size for the |
| 104 |
number of opcodes (thus catching update omissions). */ |
number of opcodes (thus catching update omissions). */ |
| 105 |
|
|
| 106 |
static const pcre_uint8 OP_lengths[] = { OP_LENGTHS }; |
static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS }; |
| 107 |
|
|
| 108 |
|
|
| 109 |
|
|
| 117 |
int c = *ptr; |
int c = *ptr; |
| 118 |
|
|
| 119 |
#ifndef SUPPORT_UTF |
#ifndef SUPPORT_UTF |
| 120 |
|
|
| 121 |
(void)utf; /* Avoid compiler warning */ |
(void)utf; /* Avoid compiler warning */ |
| 122 |
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); |
if (PRINTABLE(c)) fprintf(f, "%c", c); |
| 123 |
|
else if (c <= 0xff) fprintf(f, "\\x%02x", c); |
| 124 |
|
else fprintf(f, "\\x{%x}", c); |
| 125 |
return 0; |
return 0; |
| 126 |
|
|
| 127 |
#else |
#else |
| 242 |
They can be turned off from pcretest so that automatic tests on bytecode can be |
They can be turned off from pcretest so that automatic tests on bytecode can be |
| 243 |
written that do not depend on the value of LINK_SIZE. */ |
written that do not depend on the value of LINK_SIZE. */ |
| 244 |
|
|
| 245 |
|
#ifdef PCRE_INCLUDED |
| 246 |
|
static /* Keep the following function as private. */ |
| 247 |
|
#endif |
| 248 |
#ifdef COMPILE_PCRE8 |
#ifdef COMPILE_PCRE8 |
| 249 |
void |
void |
| 250 |
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths) |
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths) |
| 253 |
pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths) |
pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths) |
| 254 |
#endif |
#endif |
| 255 |
{ |
{ |
| 256 |
real_pcre *re = (real_pcre *)external_re; |
REAL_PCRE *re = (REAL_PCRE *)external_re; |
| 257 |
pcre_uchar *codestart, *code; |
pcre_uchar *codestart, *code; |
| 258 |
BOOL utf; |
BOOL utf; |
| 259 |
|
|
| 300 |
|
|
| 301 |
case OP_TABLE_LENGTH: |
case OP_TABLE_LENGTH: |
| 302 |
case OP_TABLE_LENGTH + |
case OP_TABLE_LENGTH + |
| 303 |
((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && |
((sizeof(priv_OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && |
| 304 |
(sizeof(OP_lengths) == OP_TABLE_LENGTH)): |
(sizeof(priv_OP_lengths) == OP_TABLE_LENGTH)): |
| 305 |
break; |
break; |
| 306 |
/* ========================================================================== */ |
/* ========================================================================== */ |
| 307 |
|
|
| 308 |
case OP_END: |
case OP_END: |
| 309 |
fprintf(f, " %s\n", OP_names[*code]); |
fprintf(f, " %s\n", priv_OP_names[*code]); |
| 310 |
fprintf(f, "------------------------------------------------------------------\n"); |
fprintf(f, "------------------------------------------------------------------\n"); |
| 311 |
return; |
return; |
| 312 |
|
|
| 338 |
case OP_SCBRAPOS: |
case OP_SCBRAPOS: |
| 339 |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 340 |
else fprintf(f, " "); |
else fprintf(f, " "); |
| 341 |
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE)); |
fprintf(f, "%s %d", priv_OP_names[*code], GET2(code, 1+LINK_SIZE)); |
| 342 |
break; |
break; |
| 343 |
|
|
| 344 |
case OP_BRA: |
case OP_BRA: |
| 361 |
case OP_REVERSE: |
case OP_REVERSE: |
| 362 |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 363 |
else fprintf(f, " "); |
else fprintf(f, " "); |
| 364 |
fprintf(f, "%s", OP_names[*code]); |
fprintf(f, "%s", priv_OP_names[*code]); |
| 365 |
break; |
break; |
| 366 |
|
|
| 367 |
case OP_CLOSE: |
case OP_CLOSE: |
| 368 |
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1)); |
fprintf(f, " %s %d", priv_OP_names[*code], GET2(code, 1)); |
| 369 |
break; |
break; |
| 370 |
|
|
| 371 |
case OP_CREF: |
case OP_CREF: |
| 372 |
case OP_NCREF: |
case OP_NCREF: |
| 373 |
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); |
fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]); |
| 374 |
break; |
break; |
| 375 |
|
|
| 376 |
case OP_RREF: |
case OP_RREF: |
| 425 |
fprintf(f, " %s ", flag); |
fprintf(f, " %s ", flag); |
| 426 |
if (*code >= OP_TYPESTAR) |
if (*code >= OP_TYPESTAR) |
| 427 |
{ |
{ |
| 428 |
fprintf(f, "%s", OP_names[code[1]]); |
fprintf(f, "%s", priv_OP_names[code[1]]); |
| 429 |
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) |
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) |
| 430 |
{ |
{ |
| 431 |
fprintf(f, " %s ", get_ucpname(code[2], code[3])); |
fprintf(f, " %s ", get_ucpname(code[2], code[3])); |
| 433 |
} |
} |
| 434 |
} |
} |
| 435 |
else extra = print_char(f, code+1, utf); |
else extra = print_char(f, code+1, utf); |
| 436 |
fprintf(f, "%s", OP_names[*code]); |
fprintf(f, "%s", priv_OP_names[*code]); |
| 437 |
break; |
break; |
| 438 |
|
|
| 439 |
case OP_EXACTI: |
case OP_EXACTI: |
| 459 |
case OP_TYPEUPTO: |
case OP_TYPEUPTO: |
| 460 |
case OP_TYPEMINUPTO: |
case OP_TYPEMINUPTO: |
| 461 |
case OP_TYPEPOSUPTO: |
case OP_TYPEPOSUPTO: |
| 462 |
fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]); |
fprintf(f, " %s", priv_OP_names[code[1 + IMM2_SIZE]]); |
| 463 |
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) |
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) |
| 464 |
{ |
{ |
| 465 |
fprintf(f, " %s ", get_ucpname(code[1 + IMM2_SIZE + 1], |
fprintf(f, " %s ", get_ucpname(code[1 + IMM2_SIZE + 1], |
| 477 |
flag = "/i"; |
flag = "/i"; |
| 478 |
/* Fall through */ |
/* Fall through */ |
| 479 |
case OP_NOT: |
case OP_NOT: |
| 480 |
c = code[1]; |
fprintf(f, " %s [^", flag); |
| 481 |
if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c); |
extra = print_char(f, code + 1, utf); |
| 482 |
else fprintf(f, " %s [^\\x%02x]", flag, c); |
fprintf(f, "]"); |
| 483 |
break; |
break; |
| 484 |
|
|
| 485 |
case OP_NOTSTARI: |
case OP_NOTSTARI: |
| 503 |
case OP_NOTQUERY: |
case OP_NOTQUERY: |
| 504 |
case OP_NOTMINQUERY: |
case OP_NOTMINQUERY: |
| 505 |
case OP_NOTPOSQUERY: |
case OP_NOTPOSQUERY: |
| 506 |
c = code[1]; |
fprintf(f, " %s [^", flag); |
| 507 |
if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c); |
extra = print_char(f, code + 1, utf); |
| 508 |
else fprintf(f, " %s [^\\x%02x]", flag, c); |
fprintf(f, "]%s", priv_OP_names[*code]); |
|
fprintf(f, "%s", OP_names[*code]); |
|
| 509 |
break; |
break; |
| 510 |
|
|
| 511 |
case OP_NOTEXACTI: |
case OP_NOTEXACTI: |
| 519 |
case OP_NOTUPTO: |
case OP_NOTUPTO: |
| 520 |
case OP_NOTMINUPTO: |
case OP_NOTMINUPTO: |
| 521 |
case OP_NOTPOSUPTO: |
case OP_NOTPOSUPTO: |
| 522 |
c = code[1 + IMM2_SIZE]; |
fprintf(f, " %s [^", flag); |
| 523 |
if (PRINTABLE(c)) fprintf(f, " %s [^%c]{", flag, c); |
extra = print_char(f, code + 1 + IMM2_SIZE, utf); |
| 524 |
else fprintf(f, " %s [^\\x%02x]{", flag, c); |
fprintf(f, "]{"); |
| 525 |
if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,"); |
if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,"); |
| 526 |
fprintf(f, "%d}", GET2(code,1)); |
fprintf(f, "%d}", GET2(code,1)); |
| 527 |
if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?"); |
if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?"); |
| 532 |
case OP_RECURSE: |
case OP_RECURSE: |
| 533 |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 534 |
else fprintf(f, " "); |
else fprintf(f, " "); |
| 535 |
fprintf(f, "%s", OP_names[*code]); |
fprintf(f, "%s", priv_OP_names[*code]); |
| 536 |
break; |
break; |
| 537 |
|
|
| 538 |
case OP_REFI: |
case OP_REFI: |
| 540 |
/* Fall through */ |
/* Fall through */ |
| 541 |
case OP_REF: |
case OP_REF: |
| 542 |
fprintf(f, " %s \\%d", flag, GET2(code,1)); |
fprintf(f, " %s \\%d", flag, GET2(code,1)); |
| 543 |
ccode = code + PRIV(OP_lengths)[*code]; |
ccode = code + priv_OP_lengths[*code]; |
| 544 |
goto CLASS_REF_REPEAT; |
goto CLASS_REF_REPEAT; |
| 545 |
|
|
| 546 |
case OP_CALLOUT: |
case OP_CALLOUT: |
| 547 |
fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2), |
fprintf(f, " %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2), |
| 548 |
GET(code, 2 + LINK_SIZE)); |
GET(code, 2 + LINK_SIZE)); |
| 549 |
break; |
break; |
| 550 |
|
|
| 551 |
case OP_PROP: |
case OP_PROP: |
| 552 |
case OP_NOTPROP: |
case OP_NOTPROP: |
| 553 |
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2])); |
fprintf(f, " %s %s", priv_OP_names[*code], get_ucpname(code[1], code[2])); |
| 554 |
break; |
break; |
| 555 |
|
|
| 556 |
/* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no |
/* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no |
| 629 |
} |
} |
| 630 |
else |
else |
| 631 |
{ |
{ |
| 632 |
ccode += 1 + print_char(f, ccode, TRUE); |
ccode += 1 + print_char(f, ccode, utf); |
| 633 |
if (ch == XCL_RANGE) |
if (ch == XCL_RANGE) |
| 634 |
{ |
{ |
| 635 |
fprintf(f, "-"); |
fprintf(f, "-"); |
| 636 |
ccode += 1 + print_char(f, ccode, TRUE); |
ccode += 1 + print_char(f, ccode, utf); |
| 637 |
} |
} |
| 638 |
} |
} |
| 639 |
} |
} |
| 654 |
case OP_CRMINPLUS: |
case OP_CRMINPLUS: |
| 655 |
case OP_CRQUERY: |
case OP_CRQUERY: |
| 656 |
case OP_CRMINQUERY: |
case OP_CRMINQUERY: |
| 657 |
fprintf(f, "%s", OP_names[*ccode]); |
fprintf(f, "%s", priv_OP_names[*ccode]); |
| 658 |
extra += PRIV(OP_lengths)[*ccode]; |
extra += priv_OP_lengths[*ccode]; |
| 659 |
break; |
break; |
| 660 |
|
|
| 661 |
case OP_CRRANGE: |
case OP_CRRANGE: |
| 665 |
if (max == 0) fprintf(f, "{%d,}", min); |
if (max == 0) fprintf(f, "{%d,}", min); |
| 666 |
else fprintf(f, "{%d,%d}", min, max); |
else fprintf(f, "{%d,%d}", min, max); |
| 667 |
if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); |
if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); |
| 668 |
extra += PRIV(OP_lengths)[*ccode]; |
extra += priv_OP_lengths[*ccode]; |
| 669 |
break; |
break; |
| 670 |
|
|
| 671 |
/* Do nothing if it's not a repeat; this code stops picky compilers |
/* Do nothing if it's not a repeat; this code stops picky compilers |
| 681 |
case OP_PRUNE_ARG: |
case OP_PRUNE_ARG: |
| 682 |
case OP_SKIP_ARG: |
case OP_SKIP_ARG: |
| 683 |
case OP_THEN_ARG: |
case OP_THEN_ARG: |
| 684 |
fprintf(f, " %s ", OP_names[*code]); |
fprintf(f, " %s ", priv_OP_names[*code]); |
| 685 |
print_puchar(f, code + 2); |
print_puchar(f, code + 2); |
| 686 |
extra += code[1]; |
extra += code[1]; |
| 687 |
break; |
break; |
| 688 |
|
|
| 689 |
case OP_THEN: |
case OP_THEN: |
| 690 |
fprintf(f, " %s", OP_names[*code]); |
fprintf(f, " %s", priv_OP_names[*code]); |
| 691 |
break; |
break; |
| 692 |
|
|
| 693 |
case OP_CIRCM: |
case OP_CIRCM: |
| 698 |
/* Anything else is just an item with no data, but possibly a flag. */ |
/* Anything else is just an item with no data, but possibly a flag. */ |
| 699 |
|
|
| 700 |
default: |
default: |
| 701 |
fprintf(f, " %s %s", flag, OP_names[*code]); |
fprintf(f, " %s %s", flag, priv_OP_names[*code]); |
| 702 |
break; |
break; |
| 703 |
} |
} |
| 704 |
|
|
| 705 |
code += PRIV(OP_lengths)[*code] + extra; |
code += priv_OP_lengths[*code] + extra; |
| 706 |
fprintf(f, "\n"); |
fprintf(f, "\n"); |
| 707 |
} |
} |
| 708 |
} |
} |