| 6 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
| 7 |
|
|
| 8 |
Written by Philip Hazel |
Written by Philip Hazel |
| 9 |
Copyright (c) 1997-2007 University of Cambridge |
Copyright (c) 1997-2010 University of Cambridge |
| 10 |
|
|
| 11 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 12 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
| 43 |
local functions. This source file is used in two places: |
local functions. This source file is used in two places: |
| 44 |
|
|
| 45 |
(1) It is #included by pcre_compile.c when it is compiled in debugging mode |
(1) It is #included by pcre_compile.c when it is compiled in debugging mode |
| 46 |
(DEBUG defined in pcre_internal.h). It is not included in production compiles. |
(PCRE_DEBUG defined in pcre_internal.h). It is not included in production |
| 47 |
|
compiles. |
| 48 |
|
|
| 49 |
(2) It is always #included by pcretest.c, which can be asked to print out a |
(2) It is always #included by pcretest.c, which can be asked to print out a |
| 50 |
compiled regex for debugging purposes. */ |
compiled regex for debugging purposes. */ |
| 55 |
(even without the use of locales) and we want the output always to be the same, |
(even without the use of locales) and we want the output always to be the same, |
| 56 |
for testing purposes. This macro is used in pcretest as well as in this file. */ |
for testing purposes. This macro is used in pcretest as well as in this file. */ |
| 57 |
|
|
| 58 |
|
#ifdef EBCDIC |
| 59 |
|
#define PRINTABLE(c) ((c) >= 64 && (c) < 255) |
| 60 |
|
#else |
| 61 |
#define PRINTABLE(c) ((c) >= 32 && (c) < 127) |
#define PRINTABLE(c) ((c) >= 32 && (c) < 127) |
| 62 |
|
#endif |
| 63 |
|
|
| 64 |
/* The table of operator names. */ |
/* The table of operator names. */ |
| 65 |
|
|
| 180 |
for(;;) |
for(;;) |
| 181 |
{ |
{ |
| 182 |
uschar *ccode; |
uschar *ccode; |
| 183 |
|
const char *flag = " "; |
| 184 |
int c; |
int c; |
| 185 |
int extra = 0; |
int extra = 0; |
| 186 |
|
|
| 191 |
|
|
| 192 |
switch(*code) |
switch(*code) |
| 193 |
{ |
{ |
| 194 |
|
/* ========================================================================== */ |
| 195 |
|
/* These cases are never obeyed. This is a fudge that causes a compile- |
| 196 |
|
time error if the vectors OP_names or _pcre_OP_lengths, which are indexed |
| 197 |
|
by opcode, are not the correct length. It seems to be the only way to do |
| 198 |
|
such a check at compile time, as the sizeof() operator does not work in |
| 199 |
|
the C preprocessor. We do this while compiling pcretest, because that |
| 200 |
|
#includes pcre_tables.c, which holds _pcre_OP_lengths. We can't do this |
| 201 |
|
when building pcre_compile.c with PCRE_DEBUG set, because it doesn't then |
| 202 |
|
know the size of _pcre_OP_lengths. */ |
| 203 |
|
|
| 204 |
|
#ifdef COMPILING_PCRETEST |
| 205 |
|
case OP_TABLE_LENGTH: |
| 206 |
|
case OP_TABLE_LENGTH + |
| 207 |
|
((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && |
| 208 |
|
(sizeof(_pcre_OP_lengths) == OP_TABLE_LENGTH)): |
| 209 |
|
break; |
| 210 |
|
#endif |
| 211 |
|
/* ========================================================================== */ |
| 212 |
|
|
| 213 |
case OP_END: |
case OP_END: |
| 214 |
fprintf(f, " %s\n", OP_names[*code]); |
fprintf(f, " %s\n", OP_names[*code]); |
| 215 |
fprintf(f, "------------------------------------------------------------------\n"); |
fprintf(f, "------------------------------------------------------------------\n"); |
| 216 |
return; |
return; |
| 217 |
|
|
|
case OP_OPT: |
|
|
fprintf(f, " %.2x %s", code[1], OP_names[*code]); |
|
|
break; |
|
|
|
|
| 218 |
case OP_CHAR: |
case OP_CHAR: |
| 219 |
fprintf(f, " "); |
fprintf(f, " "); |
| 220 |
do |
do |
| 226 |
fprintf(f, "\n"); |
fprintf(f, "\n"); |
| 227 |
continue; |
continue; |
| 228 |
|
|
| 229 |
case OP_CHARNC: |
case OP_CHARI: |
| 230 |
fprintf(f, " NC "); |
fprintf(f, " /i "); |
| 231 |
do |
do |
| 232 |
{ |
{ |
| 233 |
code++; |
code++; |
| 234 |
code += 1 + print_char(f, code, utf8); |
code += 1 + print_char(f, code, utf8); |
| 235 |
} |
} |
| 236 |
while (*code == OP_CHARNC); |
while (*code == OP_CHARI); |
| 237 |
fprintf(f, "\n"); |
fprintf(f, "\n"); |
| 238 |
continue; |
continue; |
| 239 |
|
|
| 240 |
case OP_CBRA: |
case OP_CBRA: |
| 241 |
|
case OP_CBRAPOS: |
| 242 |
case OP_SCBRA: |
case OP_SCBRA: |
| 243 |
|
case OP_SCBRAPOS: |
| 244 |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 245 |
else fprintf(f, " "); |
else fprintf(f, " "); |
| 246 |
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE)); |
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE)); |
| 247 |
break; |
break; |
| 248 |
|
|
| 249 |
case OP_BRA: |
case OP_BRA: |
| 250 |
|
case OP_BRAPOS: |
| 251 |
case OP_SBRA: |
case OP_SBRA: |
| 252 |
|
case OP_SBRAPOS: |
| 253 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 254 |
case OP_KETRMIN: |
case OP_KETRMIN: |
| 255 |
|
case OP_KETRPOS: |
| 256 |
case OP_ALT: |
case OP_ALT: |
| 257 |
case OP_KET: |
case OP_KET: |
| 258 |
case OP_ASSERT: |
case OP_ASSERT: |
| 268 |
fprintf(f, "%s", OP_names[*code]); |
fprintf(f, "%s", OP_names[*code]); |
| 269 |
break; |
break; |
| 270 |
|
|
| 271 |
|
case OP_CLOSE: |
| 272 |
|
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1)); |
| 273 |
|
break; |
| 274 |
|
|
| 275 |
case OP_CREF: |
case OP_CREF: |
| 276 |
|
case OP_NCREF: |
| 277 |
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); |
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); |
| 278 |
break; |
break; |
| 279 |
|
|
| 285 |
fprintf(f, " Cond recurse %d", c); |
fprintf(f, " Cond recurse %d", c); |
| 286 |
break; |
break; |
| 287 |
|
|
| 288 |
|
case OP_NRREF: |
| 289 |
|
c = GET2(code, 1); |
| 290 |
|
if (c == RREF_ANY) |
| 291 |
|
fprintf(f, " Cond nrecurse any"); |
| 292 |
|
else |
| 293 |
|
fprintf(f, " Cond nrecurse %d", c); |
| 294 |
|
break; |
| 295 |
|
|
| 296 |
case OP_DEF: |
case OP_DEF: |
| 297 |
fprintf(f, " Cond def"); |
fprintf(f, " Cond def"); |
| 298 |
break; |
break; |
| 299 |
|
|
| 300 |
|
case OP_STARI: |
| 301 |
|
case OP_MINSTARI: |
| 302 |
|
case OP_POSSTARI: |
| 303 |
|
case OP_PLUSI: |
| 304 |
|
case OP_MINPLUSI: |
| 305 |
|
case OP_POSPLUSI: |
| 306 |
|
case OP_QUERYI: |
| 307 |
|
case OP_MINQUERYI: |
| 308 |
|
case OP_POSQUERYI: |
| 309 |
|
flag = "/i"; |
| 310 |
|
/* Fall through */ |
| 311 |
case OP_STAR: |
case OP_STAR: |
| 312 |
case OP_MINSTAR: |
case OP_MINSTAR: |
| 313 |
case OP_POSSTAR: |
case OP_POSSTAR: |
| 326 |
case OP_TYPEQUERY: |
case OP_TYPEQUERY: |
| 327 |
case OP_TYPEMINQUERY: |
case OP_TYPEMINQUERY: |
| 328 |
case OP_TYPEPOSQUERY: |
case OP_TYPEPOSQUERY: |
| 329 |
fprintf(f, " "); |
fprintf(f, " %s ", flag); |
| 330 |
if (*code >= OP_TYPESTAR) |
if (*code >= OP_TYPESTAR) |
| 331 |
{ |
{ |
| 332 |
fprintf(f, "%s", OP_names[code[1]]); |
fprintf(f, "%s", OP_names[code[1]]); |
| 340 |
fprintf(f, "%s", OP_names[*code]); |
fprintf(f, "%s", OP_names[*code]); |
| 341 |
break; |
break; |
| 342 |
|
|
| 343 |
|
case OP_EXACTI: |
| 344 |
|
case OP_UPTOI: |
| 345 |
|
case OP_MINUPTOI: |
| 346 |
|
case OP_POSUPTOI: |
| 347 |
|
flag = "/i"; |
| 348 |
|
/* Fall through */ |
| 349 |
case OP_EXACT: |
case OP_EXACT: |
| 350 |
case OP_UPTO: |
case OP_UPTO: |
| 351 |
case OP_MINUPTO: |
case OP_MINUPTO: |
| 352 |
case OP_POSUPTO: |
case OP_POSUPTO: |
| 353 |
fprintf(f, " "); |
fprintf(f, " %s ", flag); |
| 354 |
extra = print_char(f, code+3, utf8); |
extra = print_char(f, code+3, utf8); |
| 355 |
fprintf(f, "{"); |
fprintf(f, "{"); |
| 356 |
if (*code != OP_EXACT) fprintf(f, "0,"); |
if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,"); |
| 357 |
fprintf(f, "%d}", GET2(code,1)); |
fprintf(f, "%d}", GET2(code,1)); |
| 358 |
if (*code == OP_MINUPTO) fprintf(f, "?"); |
if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?"); |
| 359 |
else if (*code == OP_POSUPTO) fprintf(f, "+"); |
else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+"); |
| 360 |
break; |
break; |
| 361 |
|
|
| 362 |
case OP_TYPEEXACT: |
case OP_TYPEEXACT: |
| 376 |
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+"); |
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+"); |
| 377 |
break; |
break; |
| 378 |
|
|
| 379 |
|
case OP_NOTI: |
| 380 |
|
flag = "/i"; |
| 381 |
|
/* Fall through */ |
| 382 |
case OP_NOT: |
case OP_NOT: |
| 383 |
c = code[1]; |
c = code[1]; |
| 384 |
if (PRINTABLE(c)) fprintf(f, " [^%c]", c); |
if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c); |
| 385 |
else fprintf(f, " [^\\x%02x]", c); |
else fprintf(f, " %s [^\\x%02x]", flag, c); |
| 386 |
break; |
break; |
| 387 |
|
|
| 388 |
|
case OP_NOTSTARI: |
| 389 |
|
case OP_NOTMINSTARI: |
| 390 |
|
case OP_NOTPOSSTARI: |
| 391 |
|
case OP_NOTPLUSI: |
| 392 |
|
case OP_NOTMINPLUSI: |
| 393 |
|
case OP_NOTPOSPLUSI: |
| 394 |
|
case OP_NOTQUERYI: |
| 395 |
|
case OP_NOTMINQUERYI: |
| 396 |
|
case OP_NOTPOSQUERYI: |
| 397 |
|
flag = "/i"; |
| 398 |
|
/* Fall through */ |
| 399 |
|
|
| 400 |
case OP_NOTSTAR: |
case OP_NOTSTAR: |
| 401 |
case OP_NOTMINSTAR: |
case OP_NOTMINSTAR: |
| 402 |
case OP_NOTPOSSTAR: |
case OP_NOTPOSSTAR: |
| 407 |
case OP_NOTMINQUERY: |
case OP_NOTMINQUERY: |
| 408 |
case OP_NOTPOSQUERY: |
case OP_NOTPOSQUERY: |
| 409 |
c = code[1]; |
c = code[1]; |
| 410 |
if (PRINTABLE(c)) fprintf(f, " [^%c]", c); |
if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c); |
| 411 |
else fprintf(f, " [^\\x%02x]", c); |
else fprintf(f, " %s [^\\x%02x]", flag, c); |
| 412 |
fprintf(f, "%s", OP_names[*code]); |
fprintf(f, "%s", OP_names[*code]); |
| 413 |
break; |
break; |
| 414 |
|
|
| 415 |
|
case OP_NOTEXACTI: |
| 416 |
|
case OP_NOTUPTOI: |
| 417 |
|
case OP_NOTMINUPTOI: |
| 418 |
|
case OP_NOTPOSUPTOI: |
| 419 |
|
flag = "/i"; |
| 420 |
|
/* Fall through */ |
| 421 |
|
|
| 422 |
case OP_NOTEXACT: |
case OP_NOTEXACT: |
| 423 |
case OP_NOTUPTO: |
case OP_NOTUPTO: |
| 424 |
case OP_NOTMINUPTO: |
case OP_NOTMINUPTO: |
| 425 |
case OP_NOTPOSUPTO: |
case OP_NOTPOSUPTO: |
| 426 |
c = code[3]; |
c = code[3]; |
| 427 |
if (PRINTABLE(c)) fprintf(f, " [^%c]{", c); |
if (PRINTABLE(c)) fprintf(f, " %s [^%c]{", flag, c); |
| 428 |
else fprintf(f, " [^\\x%02x]{", c); |
else fprintf(f, " %s [^\\x%02x]{", flag, c); |
| 429 |
if (*code != OP_NOTEXACT) fprintf(f, "0,"); |
if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,"); |
| 430 |
fprintf(f, "%d}", GET2(code,1)); |
fprintf(f, "%d}", GET2(code,1)); |
| 431 |
if (*code == OP_NOTMINUPTO) fprintf(f, "?"); |
if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?"); |
| 432 |
else if (*code == OP_NOTPOSUPTO) fprintf(f, "+"); |
else |
| 433 |
|
if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+"); |
| 434 |
break; |
break; |
| 435 |
|
|
| 436 |
case OP_RECURSE: |
case OP_RECURSE: |
| 439 |
fprintf(f, "%s", OP_names[*code]); |
fprintf(f, "%s", OP_names[*code]); |
| 440 |
break; |
break; |
| 441 |
|
|
| 442 |
|
case OP_REFI: |
| 443 |
|
flag = "/i"; |
| 444 |
|
/* Fall through */ |
| 445 |
case OP_REF: |
case OP_REF: |
| 446 |
fprintf(f, " \\%d", GET2(code,1)); |
fprintf(f, " %s \\%d", flag, GET2(code,1)); |
| 447 |
ccode = code + _pcre_OP_lengths[*code]; |
ccode = code + _pcre_OP_lengths[*code]; |
| 448 |
goto CLASS_REF_REPEAT; |
goto CLASS_REF_REPEAT; |
| 449 |
|
|
| 579 |
} |
} |
| 580 |
break; |
break; |
| 581 |
|
|
| 582 |
/* Anything else is just an item with no data*/ |
case OP_MARK: |
| 583 |
|
case OP_PRUNE_ARG: |
| 584 |
|
case OP_SKIP_ARG: |
| 585 |
|
fprintf(f, " %s %s", OP_names[*code], code + 2); |
| 586 |
|
extra += code[1]; |
| 587 |
|
break; |
| 588 |
|
|
| 589 |
|
case OP_THEN: |
| 590 |
|
if (print_lengths) |
| 591 |
|
fprintf(f, " %s %d", OP_names[*code], GET(code, 1)); |
| 592 |
|
else |
| 593 |
|
fprintf(f, " %s", OP_names[*code]); |
| 594 |
|
break; |
| 595 |
|
|
| 596 |
|
case OP_THEN_ARG: |
| 597 |
|
if (print_lengths) |
| 598 |
|
fprintf(f, " %s %d %s", OP_names[*code], GET(code, 1), |
| 599 |
|
code + 2 + LINK_SIZE); |
| 600 |
|
else |
| 601 |
|
fprintf(f, " %s %s", OP_names[*code], code + 2 + LINK_SIZE); |
| 602 |
|
extra += code[1+LINK_SIZE]; |
| 603 |
|
break; |
| 604 |
|
|
| 605 |
|
case OP_CIRCM: |
| 606 |
|
case OP_DOLLM: |
| 607 |
|
flag = "/m"; |
| 608 |
|
/* Fall through */ |
| 609 |
|
|
| 610 |
|
/* Anything else is just an item with no data, but possibly a flag. */ |
| 611 |
|
|
| 612 |
default: |
default: |
| 613 |
fprintf(f, " %s", OP_names[*code]); |
fprintf(f, " %s %s", flag, OP_names[*code]); |
| 614 |
break; |
break; |
| 615 |
} |
} |
| 616 |
|
|