| 6 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
| 7 |
|
|
| 8 |
Written by Philip Hazel |
Written by Philip Hazel |
| 9 |
Copyright (c) 1997-2005 University of Cambridge |
Copyright (c) 1997-2007 University of Cambridge |
| 10 |
|
|
| 11 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 12 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
| 49 |
compiled regex for debugging purposes. */ |
compiled regex for debugging purposes. */ |
| 50 |
|
|
| 51 |
|
|
| 52 |
|
/* Macro that decides whether a character should be output as a literal or in |
| 53 |
|
hexadecimal. We don't use isprint() because that can vary from system to system |
| 54 |
|
(even without the use of locales) and we want the output always to be the same, |
| 55 |
|
for testing purposes. This macro is used in pcretest as well as in this file. */ |
| 56 |
|
|
| 57 |
|
#define PRINTABLE(c) ((c) >= 32 && (c) < 127) |
| 58 |
|
|
| 59 |
|
/* The table of operator names. */ |
| 60 |
|
|
| 61 |
static const char *OP_names[] = { OP_NAME_LIST }; |
static const char *OP_names[] = { OP_NAME_LIST }; |
| 62 |
|
|
| 63 |
|
|
| 64 |
|
|
| 65 |
/************************************************* |
/************************************************* |
| 66 |
* Print single- or multi-byte character * |
* Print single- or multi-byte character * |
| 67 |
*************************************************/ |
*************************************************/ |
| 71 |
{ |
{ |
| 72 |
int c = *ptr; |
int c = *ptr; |
| 73 |
|
|
| 74 |
|
#ifndef SUPPORT_UTF8 |
| 75 |
|
utf8 = utf8; /* Avoid compiler warning */ |
| 76 |
|
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); |
| 77 |
|
return 0; |
| 78 |
|
|
| 79 |
|
#else |
| 80 |
if (!utf8 || (c & 0xc0) != 0xc0) |
if (!utf8 || (c & 0xc0) != 0xc0) |
| 81 |
{ |
{ |
| 82 |
if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); |
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); |
| 83 |
return 0; |
return 0; |
| 84 |
} |
} |
| 85 |
else |
else |
| 108 |
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c); |
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c); |
| 109 |
return a; |
return a; |
| 110 |
} |
} |
| 111 |
|
#endif |
| 112 |
} |
} |
| 113 |
|
|
| 114 |
|
|
| 118 |
*************************************************/ |
*************************************************/ |
| 119 |
|
|
| 120 |
static const char * |
static const char * |
| 121 |
get_ucpname(int property) |
get_ucpname(int ptype, int pvalue) |
| 122 |
{ |
{ |
| 123 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
| 124 |
int i; |
int i; |
| 125 |
for (i = _pcre_utt_size; i >= 0; i--) |
for (i = _pcre_utt_size; i >= 0; i--) |
| 126 |
{ |
{ |
| 127 |
if (property == _pcre_utt[i].value) break; |
if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break; |
| 128 |
} |
} |
| 129 |
return (i >= 0)? _pcre_utt[i].name : "??"; |
return (i >= 0)? _pcre_utt[i].name : "??"; |
| 130 |
#else |
#else |
| 131 |
return "??"; |
/* It gets harder and harder to shut off unwanted compiler warnings. */ |
| 132 |
|
ptype = ptype * pvalue; |
| 133 |
|
return (ptype == pvalue)? "??" : "??"; |
| 134 |
#endif |
#endif |
| 135 |
} |
} |
| 136 |
|
|
| 141 |
*************************************************/ |
*************************************************/ |
| 142 |
|
|
| 143 |
/* Make this function work for a regex with integers either byte order. |
/* Make this function work for a regex with integers either byte order. |
| 144 |
However, we assume that what we are passed is a compiled regex. */ |
However, we assume that what we are passed is a compiled regex. The |
| 145 |
|
print_lengths flag controls whether offsets and lengths of items are printed. |
| 146 |
|
They can be turned off from pcretest so that automatic tests on bytecode can be |
| 147 |
|
written that do not depend on the value of LINK_SIZE. */ |
| 148 |
|
|
| 149 |
static void |
static void |
| 150 |
pcre_printint(pcre *external_re, FILE *f) |
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths) |
| 151 |
{ |
{ |
| 152 |
real_pcre *re = (real_pcre *)external_re; |
real_pcre *re = (real_pcre *)external_re; |
| 153 |
uschar *codestart, *code; |
uschar *codestart, *code; |
| 178 |
int c; |
int c; |
| 179 |
int extra = 0; |
int extra = 0; |
| 180 |
|
|
| 181 |
fprintf(f, "%3d ", (int)(code - codestart)); |
if (print_lengths) |
| 182 |
|
fprintf(f, "%3d ", (int)(code - codestart)); |
| 183 |
if (*code >= OP_BRA) |
else |
| 184 |
{ |
fprintf(f, " "); |
|
if (*code - OP_BRA > EXTRACT_BASIC_MAX) |
|
|
fprintf(f, "%3d Bra extra\n", GET(code, 1)); |
|
|
else |
|
|
fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA); |
|
|
code += _pcre_OP_lengths[OP_BRA]; |
|
|
continue; |
|
|
} |
|
| 185 |
|
|
| 186 |
switch(*code) |
switch(*code) |
| 187 |
{ |
{ |
| 195 |
break; |
break; |
| 196 |
|
|
| 197 |
case OP_CHAR: |
case OP_CHAR: |
| 198 |
|
fprintf(f, " "); |
| 199 |
|
do |
| 200 |
{ |
{ |
| 201 |
fprintf(f, " "); |
code++; |
| 202 |
do |
code += 1 + print_char(f, code, utf8); |
|
{ |
|
|
code++; |
|
|
code += 1 + print_char(f, code, utf8); |
|
|
} |
|
|
while (*code == OP_CHAR); |
|
|
fprintf(f, "\n"); |
|
|
continue; |
|
| 203 |
} |
} |
| 204 |
break; |
while (*code == OP_CHAR); |
| 205 |
|
fprintf(f, "\n"); |
| 206 |
|
continue; |
| 207 |
|
|
| 208 |
case OP_CHARNC: |
case OP_CHARNC: |
| 209 |
|
fprintf(f, " NC "); |
| 210 |
|
do |
| 211 |
{ |
{ |
| 212 |
fprintf(f, " NC "); |
code++; |
| 213 |
do |
code += 1 + print_char(f, code, utf8); |
|
{ |
|
|
code++; |
|
|
code += 1 + print_char(f, code, utf8); |
|
|
} |
|
|
while (*code == OP_CHARNC); |
|
|
fprintf(f, "\n"); |
|
|
continue; |
|
| 214 |
} |
} |
| 215 |
|
while (*code == OP_CHARNC); |
| 216 |
|
fprintf(f, "\n"); |
| 217 |
|
continue; |
| 218 |
|
|
| 219 |
|
case OP_CBRA: |
| 220 |
|
case OP_SCBRA: |
| 221 |
|
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 222 |
|
else fprintf(f, " "); |
| 223 |
|
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE)); |
| 224 |
break; |
break; |
| 225 |
|
|
| 226 |
|
case OP_BRA: |
| 227 |
|
case OP_SBRA: |
| 228 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 229 |
case OP_KETRMIN: |
case OP_KETRMIN: |
| 230 |
case OP_ALT: |
case OP_ALT: |
| 235 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
| 236 |
case OP_ONCE: |
case OP_ONCE: |
| 237 |
case OP_COND: |
case OP_COND: |
| 238 |
|
case OP_SCOND: |
| 239 |
case OP_REVERSE: |
case OP_REVERSE: |
| 240 |
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]); |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 241 |
|
else fprintf(f, " "); |
| 242 |
|
fprintf(f, "%s", OP_names[*code]); |
| 243 |
break; |
break; |
| 244 |
|
|
| 245 |
case OP_BRANUMBER: |
case OP_CREF: |
| 246 |
printf("%3d %s", GET2(code, 1), OP_names[*code]); |
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); |
| 247 |
break; |
break; |
| 248 |
|
|
| 249 |
case OP_CREF: |
case OP_RREF: |
| 250 |
if (GET2(code, 1) == CREF_RECURSE) |
c = GET2(code, 1); |
| 251 |
fprintf(f, " Cond recurse"); |
if (c == RREF_ANY) |
| 252 |
|
fprintf(f, " Cond recurse any"); |
| 253 |
else |
else |
| 254 |
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); |
fprintf(f, " Cond recurse %d", c); |
| 255 |
|
break; |
| 256 |
|
|
| 257 |
|
case OP_DEF: |
| 258 |
|
fprintf(f, " Cond def"); |
| 259 |
break; |
break; |
| 260 |
|
|
| 261 |
case OP_STAR: |
case OP_STAR: |
| 262 |
case OP_MINSTAR: |
case OP_MINSTAR: |
| 263 |
|
case OP_POSSTAR: |
| 264 |
case OP_PLUS: |
case OP_PLUS: |
| 265 |
case OP_MINPLUS: |
case OP_MINPLUS: |
| 266 |
|
case OP_POSPLUS: |
| 267 |
case OP_QUERY: |
case OP_QUERY: |
| 268 |
case OP_MINQUERY: |
case OP_MINQUERY: |
| 269 |
|
case OP_POSQUERY: |
| 270 |
case OP_TYPESTAR: |
case OP_TYPESTAR: |
| 271 |
case OP_TYPEMINSTAR: |
case OP_TYPEMINSTAR: |
| 272 |
|
case OP_TYPEPOSSTAR: |
| 273 |
case OP_TYPEPLUS: |
case OP_TYPEPLUS: |
| 274 |
case OP_TYPEMINPLUS: |
case OP_TYPEMINPLUS: |
| 275 |
|
case OP_TYPEPOSPLUS: |
| 276 |
case OP_TYPEQUERY: |
case OP_TYPEQUERY: |
| 277 |
case OP_TYPEMINQUERY: |
case OP_TYPEMINQUERY: |
| 278 |
|
case OP_TYPEPOSQUERY: |
| 279 |
fprintf(f, " "); |
fprintf(f, " "); |
| 280 |
if (*code >= OP_TYPESTAR) |
if (*code >= OP_TYPESTAR) |
| 281 |
{ |
{ |
| 282 |
fprintf(f, "%s", OP_names[code[1]]); |
fprintf(f, "%s", OP_names[code[1]]); |
| 283 |
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) |
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) |
| 284 |
{ |
{ |
| 285 |
fprintf(f, " %s ", get_ucpname(code[2])); |
fprintf(f, " %s ", get_ucpname(code[2], code[3])); |
| 286 |
extra = 1; |
extra = 2; |
| 287 |
} |
} |
| 288 |
} |
} |
| 289 |
else extra = print_char(f, code+1, utf8); |
else extra = print_char(f, code+1, utf8); |
| 293 |
case OP_EXACT: |
case OP_EXACT: |
| 294 |
case OP_UPTO: |
case OP_UPTO: |
| 295 |
case OP_MINUPTO: |
case OP_MINUPTO: |
| 296 |
|
case OP_POSUPTO: |
| 297 |
fprintf(f, " "); |
fprintf(f, " "); |
| 298 |
extra = print_char(f, code+3, utf8); |
extra = print_char(f, code+3, utf8); |
| 299 |
fprintf(f, "{"); |
fprintf(f, "{"); |
| 300 |
if (*code != OP_EXACT) fprintf(f, ","); |
if (*code != OP_EXACT) fprintf(f, "0,"); |
| 301 |
fprintf(f, "%d}", GET2(code,1)); |
fprintf(f, "%d}", GET2(code,1)); |
| 302 |
if (*code == OP_MINUPTO) fprintf(f, "?"); |
if (*code == OP_MINUPTO) fprintf(f, "?"); |
| 303 |
|
else if (*code == OP_POSUPTO) fprintf(f, "+"); |
| 304 |
break; |
break; |
| 305 |
|
|
| 306 |
case OP_TYPEEXACT: |
case OP_TYPEEXACT: |
| 307 |
case OP_TYPEUPTO: |
case OP_TYPEUPTO: |
| 308 |
case OP_TYPEMINUPTO: |
case OP_TYPEMINUPTO: |
| 309 |
|
case OP_TYPEPOSUPTO: |
| 310 |
fprintf(f, " %s", OP_names[code[3]]); |
fprintf(f, " %s", OP_names[code[3]]); |
| 311 |
if (code[3] == OP_PROP || code[3] == OP_NOTPROP) |
if (code[3] == OP_PROP || code[3] == OP_NOTPROP) |
| 312 |
{ |
{ |
| 313 |
fprintf(f, " %s ", get_ucpname(code[4])); |
fprintf(f, " %s ", get_ucpname(code[4], code[5])); |
| 314 |
extra = 1; |
extra = 2; |
| 315 |
} |
} |
| 316 |
fprintf(f, "{"); |
fprintf(f, "{"); |
| 317 |
if (*code != OP_TYPEEXACT) fprintf(f, "0,"); |
if (*code != OP_TYPEEXACT) fprintf(f, "0,"); |
| 318 |
fprintf(f, "%d}", GET2(code,1)); |
fprintf(f, "%d}", GET2(code,1)); |
| 319 |
if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); |
if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); |
| 320 |
|
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+"); |
| 321 |
break; |
break; |
| 322 |
|
|
| 323 |
case OP_NOT: |
case OP_NOT: |
| 324 |
if (isprint(c = code[1])) fprintf(f, " [^%c]", c); |
c = code[1]; |
| 325 |
|
if (PRINTABLE(c)) fprintf(f, " [^%c]", c); |
| 326 |
else fprintf(f, " [^\\x%02x]", c); |
else fprintf(f, " [^\\x%02x]", c); |
| 327 |
break; |
break; |
| 328 |
|
|
| 329 |
case OP_NOTSTAR: |
case OP_NOTSTAR: |
| 330 |
case OP_NOTMINSTAR: |
case OP_NOTMINSTAR: |
| 331 |
|
case OP_NOTPOSSTAR: |
| 332 |
case OP_NOTPLUS: |
case OP_NOTPLUS: |
| 333 |
case OP_NOTMINPLUS: |
case OP_NOTMINPLUS: |
| 334 |
|
case OP_NOTPOSPLUS: |
| 335 |
case OP_NOTQUERY: |
case OP_NOTQUERY: |
| 336 |
case OP_NOTMINQUERY: |
case OP_NOTMINQUERY: |
| 337 |
if (isprint(c = code[1])) fprintf(f, " [^%c]", c); |
case OP_NOTPOSQUERY: |
| 338 |
|
c = code[1]; |
| 339 |
|
if (PRINTABLE(c)) fprintf(f, " [^%c]", c); |
| 340 |
else fprintf(f, " [^\\x%02x]", c); |
else fprintf(f, " [^\\x%02x]", c); |
| 341 |
fprintf(f, "%s", OP_names[*code]); |
fprintf(f, "%s", OP_names[*code]); |
| 342 |
break; |
break; |
| 344 |
case OP_NOTEXACT: |
case OP_NOTEXACT: |
| 345 |
case OP_NOTUPTO: |
case OP_NOTUPTO: |
| 346 |
case OP_NOTMINUPTO: |
case OP_NOTMINUPTO: |
| 347 |
if (isprint(c = code[3])) fprintf(f, " [^%c]{", c); |
case OP_NOTPOSUPTO: |
| 348 |
|
c = code[3]; |
| 349 |
|
if (PRINTABLE(c)) fprintf(f, " [^%c]{", c); |
| 350 |
else fprintf(f, " [^\\x%02x]{", c); |
else fprintf(f, " [^\\x%02x]{", c); |
| 351 |
if (*code != OP_NOTEXACT) fprintf(f, "0,"); |
if (*code != OP_NOTEXACT) fprintf(f, "0,"); |
| 352 |
fprintf(f, "%d}", GET2(code,1)); |
fprintf(f, "%d}", GET2(code,1)); |
| 353 |
if (*code == OP_NOTMINUPTO) fprintf(f, "?"); |
if (*code == OP_NOTMINUPTO) fprintf(f, "?"); |
| 354 |
|
else if (*code == OP_NOTPOSUPTO) fprintf(f, "+"); |
| 355 |
break; |
break; |
| 356 |
|
|
| 357 |
case OP_RECURSE: |
case OP_RECURSE: |
| 358 |
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]); |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 359 |
|
else fprintf(f, " "); |
| 360 |
|
fprintf(f, "%s", OP_names[*code]); |
| 361 |
break; |
break; |
| 362 |
|
|
| 363 |
case OP_REF: |
case OP_REF: |
| 372 |
|
|
| 373 |
case OP_PROP: |
case OP_PROP: |
| 374 |
case OP_NOTPROP: |
case OP_NOTPROP: |
| 375 |
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1])); |
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2])); |
| 376 |
break; |
break; |
| 377 |
|
|
| 378 |
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in |
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in |
| 413 |
for (j = i+1; j < 256; j++) |
for (j = i+1; j < 256; j++) |
| 414 |
if ((ccode[j/8] & (1 << (j&7))) == 0) break; |
if ((ccode[j/8] & (1 << (j&7))) == 0) break; |
| 415 |
if (i == '-' || i == ']') fprintf(f, "\\"); |
if (i == '-' || i == ']') fprintf(f, "\\"); |
| 416 |
if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i); |
if (PRINTABLE(i)) fprintf(f, "%c", i); |
| 417 |
|
else fprintf(f, "\\x%02x", i); |
| 418 |
if (--j > i) |
if (--j > i) |
| 419 |
{ |
{ |
| 420 |
if (j != i + 1) fprintf(f, "-"); |
if (j != i + 1) fprintf(f, "-"); |
| 421 |
if (j == '-' || j == ']') fprintf(f, "\\"); |
if (j == '-' || j == ']') fprintf(f, "\\"); |
| 422 |
if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j); |
if (PRINTABLE(j)) fprintf(f, "%c", j); |
| 423 |
|
else fprintf(f, "\\x%02x", j); |
| 424 |
} |
} |
| 425 |
i = j; |
i = j; |
| 426 |
} |
} |
| 437 |
{ |
{ |
| 438 |
if (ch == XCL_PROP) |
if (ch == XCL_PROP) |
| 439 |
{ |
{ |
| 440 |
fprintf(f, "\\p{%s}", get_ucpname(*ccode++)); |
int ptype = *ccode++; |
| 441 |
|
int pvalue = *ccode++; |
| 442 |
|
fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue)); |
| 443 |
} |
} |
| 444 |
else if (ch == XCL_NOTPROP) |
else if (ch == XCL_NOTPROP) |
| 445 |
{ |
{ |
| 446 |
fprintf(f, "\\P{%s}", get_ucpname(*ccode++)); |
int ptype = *ccode++; |
| 447 |
|
int pvalue = *ccode++; |
| 448 |
|
fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue)); |
| 449 |
} |
} |
| 450 |
else |
else |
| 451 |
{ |
{ |
| 487 |
if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); |
if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); |
| 488 |
extra += _pcre_OP_lengths[*ccode]; |
extra += _pcre_OP_lengths[*ccode]; |
| 489 |
break; |
break; |
| 490 |
|
|
| 491 |
|
/* Do nothing if it's not a repeat; this code stops picky compilers |
| 492 |
|
warning about the lack of a default code path. */ |
| 493 |
|
|
| 494 |
|
default: |
| 495 |
|
break; |
| 496 |
} |
} |
| 497 |
} |
} |
| 498 |
break; |
break; |