| 1 |
ph10 |
805 |
/************************************************* |
| 2 |
|
|
* Perl-Compatible Regular Expressions * |
| 3 |
|
|
*************************************************/ |
| 4 |
|
|
|
| 5 |
|
|
/* PCRE is a library of functions to support regular expressions whose syntax |
| 6 |
|
|
and semantics are as close as possible to those of the Perl 5 language. |
| 7 |
|
|
|
| 8 |
|
|
Written by Philip Hazel |
| 9 |
ph10 |
830 |
Copyright (c) 1997-2012 University of Cambridge |
| 10 |
ph10 |
805 |
|
| 11 |
|
|
----------------------------------------------------------------------------- |
| 12 |
|
|
Redistribution and use in source and binary forms, with or without |
| 13 |
|
|
modification, are permitted provided that the following conditions are met: |
| 14 |
|
|
|
| 15 |
|
|
* Redistributions of source code must retain the above copyright notice, |
| 16 |
|
|
this list of conditions and the following disclaimer. |
| 17 |
|
|
|
| 18 |
|
|
* Redistributions in binary form must reproduce the above copyright |
| 19 |
|
|
notice, this list of conditions and the following disclaimer in the |
| 20 |
|
|
documentation and/or other materials provided with the distribution. |
| 21 |
|
|
|
| 22 |
|
|
* Neither the name of the University of Cambridge nor the names of its |
| 23 |
|
|
contributors may be used to endorse or promote products derived from |
| 24 |
|
|
this software without specific prior written permission. |
| 25 |
|
|
|
| 26 |
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 27 |
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 28 |
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 29 |
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 30 |
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 31 |
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 32 |
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 33 |
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 34 |
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 35 |
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 36 |
|
|
POSSIBILITY OF SUCH DAMAGE. |
| 37 |
|
|
----------------------------------------------------------------------------- |
| 38 |
|
|
*/ |
| 39 |
|
|
|
| 40 |
|
|
|
| 41 |
|
|
/* This module contains a PCRE private debugging function for printing out the |
| 42 |
|
|
internal form of a compiled regular expression, along with some supporting |
| 43 |
|
|
local functions. This source file is used in two places: |
| 44 |
|
|
|
| 45 |
|
|
(1) It is #included by pcre_compile.c when it is compiled in debugging mode |
| 46 |
|
|
(PCRE_DEBUG defined in pcre_internal.h). It is not included in production |
| 47 |
zherczeg |
806 |
compiles. In this case PCRE_INCLUDED is defined. |
| 48 |
ph10 |
805 |
|
| 49 |
|
|
(2) It is also compiled separately and linked with pcretest.c, which can be |
| 50 |
|
|
asked to print out a compiled regex for debugging purposes. */ |
| 51 |
|
|
|
| 52 |
zherczeg |
806 |
#ifndef PCRE_INCLUDED |
| 53 |
|
|
|
| 54 |
ph10 |
805 |
#ifdef HAVE_CONFIG_H |
| 55 |
|
|
#include "config.h" |
| 56 |
|
|
#endif |
| 57 |
|
|
|
| 58 |
|
|
/* We have to include pcre_internal.h because we need the internal info for |
| 59 |
|
|
displaying the results of pcre_study() and we also need to know about the |
| 60 |
|
|
internal macros, structures, and other internal data values; pcretest has |
| 61 |
|
|
"inside information" compared to a program that strictly follows the PCRE API. |
| 62 |
|
|
|
| 63 |
|
|
Although pcre_internal.h does itself include pcre.h, we explicitly include it |
| 64 |
|
|
here before pcre_internal.h so that the PCRE_EXP_xxx macros get set |
| 65 |
|
|
appropriately for an application, not for building PCRE. */ |
| 66 |
|
|
|
| 67 |
|
|
#include "pcre.h" |
| 68 |
|
|
#include "pcre_internal.h" |
| 69 |
|
|
|
| 70 |
|
|
/* These are the funtions that are contained within. It doesn't seem worth |
| 71 |
|
|
having a separate .h file just for this. */ |
| 72 |
|
|
|
| 73 |
zherczeg |
806 |
#endif /* PCRE_INCLUDED */ |
| 74 |
|
|
|
| 75 |
|
|
#ifdef PCRE_INCLUDED |
| 76 |
|
|
static /* Keep the following function as private. */ |
| 77 |
|
|
#endif |
| 78 |
ph10 |
805 |
#ifdef COMPILE_PCRE8 |
| 79 |
|
|
void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths); |
| 80 |
zherczeg |
806 |
#else |
| 81 |
ph10 |
805 |
void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths); |
| 82 |
|
|
#endif |
| 83 |
|
|
|
| 84 |
|
|
/* Macro that decides whether a character should be output as a literal or in |
| 85 |
|
|
hexadecimal. We don't use isprint() because that can vary from system to system |
| 86 |
|
|
(even without the use of locales) and we want the output always to be the same, |
| 87 |
|
|
for testing purposes. */ |
| 88 |
|
|
|
| 89 |
|
|
#ifdef EBCDIC |
| 90 |
|
|
#define PRINTABLE(c) ((c) >= 64 && (c) < 255) |
| 91 |
|
|
#else |
| 92 |
|
|
#define PRINTABLE(c) ((c) >= 32 && (c) < 127) |
| 93 |
|
|
#endif |
| 94 |
|
|
|
| 95 |
|
|
/* The table of operator names. */ |
| 96 |
|
|
|
| 97 |
|
|
static const char *OP_names[] = { OP_NAME_LIST }; |
| 98 |
|
|
|
| 99 |
|
|
/* This table of operator lengths is not actually used by the working code, |
| 100 |
|
|
but its size is needed for a check that ensures it is the correct size for the |
| 101 |
|
|
number of opcodes (thus catching update omissions). */ |
| 102 |
|
|
|
| 103 |
|
|
static const pcre_uint8 OP_lengths[] = { OP_LENGTHS }; |
| 104 |
|
|
|
| 105 |
|
|
|
| 106 |
|
|
|
| 107 |
|
|
/************************************************* |
| 108 |
|
|
* Print single- or multi-byte character * |
| 109 |
|
|
*************************************************/ |
| 110 |
|
|
|
| 111 |
|
|
static int |
| 112 |
|
|
print_char(FILE *f, pcre_uchar *ptr, BOOL utf) |
| 113 |
|
|
{ |
| 114 |
|
|
int c = *ptr; |
| 115 |
|
|
|
| 116 |
|
|
#ifndef SUPPORT_UTF |
| 117 |
zherczeg |
826 |
|
| 118 |
ph10 |
805 |
(void)utf; /* Avoid compiler warning */ |
| 119 |
zherczeg |
826 |
if (PRINTABLE(c)) fprintf(f, "%c", c); |
| 120 |
|
|
else if (c <= 0xff) fprintf(f, "\\x%02x", c); |
| 121 |
|
|
else fprintf(f, "\\x{%x}", c); |
| 122 |
ph10 |
805 |
return 0; |
| 123 |
|
|
|
| 124 |
|
|
#else |
| 125 |
|
|
|
| 126 |
|
|
#ifdef COMPILE_PCRE8 |
| 127 |
|
|
|
| 128 |
|
|
if (!utf || (c & 0xc0) != 0xc0) |
| 129 |
|
|
{ |
| 130 |
|
|
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); |
| 131 |
|
|
return 0; |
| 132 |
|
|
} |
| 133 |
|
|
else |
| 134 |
|
|
{ |
| 135 |
|
|
int i; |
| 136 |
|
|
int a = PRIV(utf8_table4)[c & 0x3f]; /* Number of additional bytes */ |
| 137 |
|
|
int s = 6*a; |
| 138 |
|
|
c = (c & PRIV(utf8_table3)[a]) << s; |
| 139 |
|
|
for (i = 1; i <= a; i++) |
| 140 |
|
|
{ |
| 141 |
|
|
/* This is a check for malformed UTF-8; it should only occur if the sanity |
| 142 |
|
|
check has been turned off. Rather than swallow random bytes, just stop if |
| 143 |
|
|
we hit a bad one. Print it with \X instead of \x as an indication. */ |
| 144 |
|
|
|
| 145 |
|
|
if ((ptr[i] & 0xc0) != 0x80) |
| 146 |
|
|
{ |
| 147 |
|
|
fprintf(f, "\\X{%x}", c); |
| 148 |
|
|
return i - 1; |
| 149 |
|
|
} |
| 150 |
|
|
|
| 151 |
|
|
/* The byte is OK */ |
| 152 |
|
|
|
| 153 |
|
|
s -= 6; |
| 154 |
|
|
c |= (ptr[i] & 0x3f) << s; |
| 155 |
|
|
} |
| 156 |
|
|
fprintf(f, "\\x{%x}", c); |
| 157 |
|
|
return a; |
| 158 |
|
|
} |
| 159 |
|
|
|
| 160 |
|
|
#else |
| 161 |
|
|
|
| 162 |
|
|
#ifdef COMPILE_PCRE16 |
| 163 |
|
|
|
| 164 |
|
|
if (!utf || (c & 0xfc00) != 0xd800) |
| 165 |
|
|
{ |
| 166 |
|
|
if (PRINTABLE(c)) fprintf(f, "%c", c); |
| 167 |
|
|
else if (c <= 0xff) fprintf(f, "\\x%02x", c); |
| 168 |
|
|
else fprintf(f, "\\x{%x}", c); |
| 169 |
|
|
return 0; |
| 170 |
|
|
} |
| 171 |
|
|
else |
| 172 |
|
|
{ |
| 173 |
|
|
/* This is a check for malformed UTF-16; it should only occur if the sanity |
| 174 |
|
|
check has been turned off. Rather than swallow a low surrogate, just stop if |
| 175 |
|
|
we hit a bad one. Print it with \X instead of \x as an indication. */ |
| 176 |
|
|
|
| 177 |
|
|
if ((ptr[1] & 0xfc00) != 0xdc00) |
| 178 |
|
|
{ |
| 179 |
|
|
fprintf(f, "\\X{%x}", c); |
| 180 |
|
|
return 0; |
| 181 |
|
|
} |
| 182 |
|
|
|
| 183 |
|
|
c = (((c & 0x3ff) << 10) | (ptr[1] & 0x3ff)) + 0x10000; |
| 184 |
|
|
fprintf(f, "\\x{%x}", c); |
| 185 |
|
|
return 1; |
| 186 |
|
|
} |
| 187 |
|
|
|
| 188 |
|
|
#endif /* COMPILE_PCRE16 */ |
| 189 |
|
|
|
| 190 |
|
|
#endif /* COMPILE_PCRE8 */ |
| 191 |
|
|
|
| 192 |
|
|
#endif /* SUPPORT_UTF */ |
| 193 |
|
|
} |
| 194 |
|
|
|
| 195 |
|
|
/************************************************* |
| 196 |
|
|
* Print uchar string (regardless of utf) * |
| 197 |
|
|
*************************************************/ |
| 198 |
|
|
|
| 199 |
|
|
static void |
| 200 |
|
|
print_puchar(FILE *f, PCRE_PUCHAR ptr) |
| 201 |
|
|
{ |
| 202 |
|
|
while (*ptr != '\0') |
| 203 |
|
|
{ |
| 204 |
|
|
register int c = *ptr++; |
| 205 |
|
|
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c); |
| 206 |
|
|
} |
| 207 |
|
|
} |
| 208 |
|
|
|
| 209 |
|
|
/************************************************* |
| 210 |
|
|
* Find Unicode property name * |
| 211 |
|
|
*************************************************/ |
| 212 |
|
|
|
| 213 |
|
|
static const char * |
| 214 |
|
|
get_ucpname(int ptype, int pvalue) |
| 215 |
|
|
{ |
| 216 |
|
|
#ifdef SUPPORT_UCP |
| 217 |
|
|
int i; |
| 218 |
|
|
for (i = PRIV(utt_size) - 1; i >= 0; i--) |
| 219 |
|
|
{ |
| 220 |
|
|
if (ptype == PRIV(utt)[i].type && pvalue == PRIV(utt)[i].value) break; |
| 221 |
|
|
} |
| 222 |
|
|
return (i >= 0)? PRIV(utt_names) + PRIV(utt)[i].name_offset : "??"; |
| 223 |
|
|
#else |
| 224 |
|
|
/* It gets harder and harder to shut off unwanted compiler warnings. */ |
| 225 |
|
|
ptype = ptype * pvalue; |
| 226 |
|
|
return (ptype == pvalue)? "??" : "??"; |
| 227 |
|
|
#endif |
| 228 |
|
|
} |
| 229 |
|
|
|
| 230 |
|
|
|
| 231 |
|
|
|
| 232 |
|
|
/************************************************* |
| 233 |
|
|
* Print compiled regex * |
| 234 |
|
|
*************************************************/ |
| 235 |
|
|
|
| 236 |
|
|
/* Make this function work for a regex with integers either byte order. |
| 237 |
|
|
However, we assume that what we are passed is a compiled regex. The |
| 238 |
|
|
print_lengths flag controls whether offsets and lengths of items are printed. |
| 239 |
|
|
They can be turned off from pcretest so that automatic tests on bytecode can be |
| 240 |
|
|
written that do not depend on the value of LINK_SIZE. */ |
| 241 |
|
|
|
| 242 |
zherczeg |
806 |
#ifdef PCRE_INCLUDED |
| 243 |
|
|
static /* Keep the following function as private. */ |
| 244 |
|
|
#endif |
| 245 |
ph10 |
805 |
#ifdef COMPILE_PCRE8 |
| 246 |
|
|
void |
| 247 |
|
|
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths) |
| 248 |
|
|
#else |
| 249 |
|
|
void |
| 250 |
|
|
pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths) |
| 251 |
|
|
#endif |
| 252 |
|
|
{ |
| 253 |
zherczeg |
852 |
REAL_PCRE *re = (REAL_PCRE *)external_re; |
| 254 |
ph10 |
805 |
pcre_uchar *codestart, *code; |
| 255 |
|
|
BOOL utf; |
| 256 |
|
|
|
| 257 |
|
|
unsigned int options = re->options; |
| 258 |
|
|
int offset = re->name_table_offset; |
| 259 |
|
|
int count = re->name_count; |
| 260 |
|
|
int size = re->name_entry_size; |
| 261 |
|
|
|
| 262 |
|
|
if (re->magic_number != MAGIC_NUMBER) |
| 263 |
|
|
{ |
| 264 |
|
|
offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff); |
| 265 |
|
|
count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff); |
| 266 |
|
|
size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff); |
| 267 |
|
|
options = ((options << 24) & 0xff000000) | |
| 268 |
|
|
((options << 8) & 0x00ff0000) | |
| 269 |
|
|
((options >> 8) & 0x0000ff00) | |
| 270 |
|
|
((options >> 24) & 0x000000ff); |
| 271 |
|
|
} |
| 272 |
|
|
|
| 273 |
|
|
code = codestart = (pcre_uchar *)re + offset + count * size; |
| 274 |
|
|
/* PCRE_UTF16 has the same value as PCRE_UTF8. */ |
| 275 |
|
|
utf = (options & PCRE_UTF8) != 0; |
| 276 |
|
|
|
| 277 |
|
|
for(;;) |
| 278 |
|
|
{ |
| 279 |
|
|
pcre_uchar *ccode; |
| 280 |
|
|
const char *flag = " "; |
| 281 |
|
|
int c; |
| 282 |
|
|
int extra = 0; |
| 283 |
|
|
|
| 284 |
|
|
if (print_lengths) |
| 285 |
|
|
fprintf(f, "%3d ", (int)(code - codestart)); |
| 286 |
|
|
else |
| 287 |
|
|
fprintf(f, " "); |
| 288 |
|
|
|
| 289 |
|
|
switch(*code) |
| 290 |
|
|
{ |
| 291 |
|
|
/* ========================================================================== */ |
| 292 |
|
|
/* These cases are never obeyed. This is a fudge that causes a compile- |
| 293 |
|
|
time error if the vectors OP_names or OP_lengths, which are indexed |
| 294 |
|
|
by opcode, are not the correct length. It seems to be the only way to do |
| 295 |
|
|
such a check at compile time, as the sizeof() operator does not work in |
| 296 |
|
|
the C preprocessor. */ |
| 297 |
|
|
|
| 298 |
|
|
case OP_TABLE_LENGTH: |
| 299 |
|
|
case OP_TABLE_LENGTH + |
| 300 |
|
|
((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && |
| 301 |
|
|
(sizeof(OP_lengths) == OP_TABLE_LENGTH)): |
| 302 |
|
|
break; |
| 303 |
|
|
/* ========================================================================== */ |
| 304 |
|
|
|
| 305 |
|
|
case OP_END: |
| 306 |
|
|
fprintf(f, " %s\n", OP_names[*code]); |
| 307 |
|
|
fprintf(f, "------------------------------------------------------------------\n"); |
| 308 |
|
|
return; |
| 309 |
|
|
|
| 310 |
|
|
case OP_CHAR: |
| 311 |
|
|
fprintf(f, " "); |
| 312 |
|
|
do |
| 313 |
|
|
{ |
| 314 |
|
|
code++; |
| 315 |
|
|
code += 1 + print_char(f, code, utf); |
| 316 |
|
|
} |
| 317 |
|
|
while (*code == OP_CHAR); |
| 318 |
|
|
fprintf(f, "\n"); |
| 319 |
|
|
continue; |
| 320 |
|
|
|
| 321 |
|
|
case OP_CHARI: |
| 322 |
|
|
fprintf(f, " /i "); |
| 323 |
|
|
do |
| 324 |
|
|
{ |
| 325 |
|
|
code++; |
| 326 |
|
|
code += 1 + print_char(f, code, utf); |
| 327 |
|
|
} |
| 328 |
|
|
while (*code == OP_CHARI); |
| 329 |
|
|
fprintf(f, "\n"); |
| 330 |
|
|
continue; |
| 331 |
|
|
|
| 332 |
|
|
case OP_CBRA: |
| 333 |
|
|
case OP_CBRAPOS: |
| 334 |
|
|
case OP_SCBRA: |
| 335 |
|
|
case OP_SCBRAPOS: |
| 336 |
|
|
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 337 |
|
|
else fprintf(f, " "); |
| 338 |
|
|
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE)); |
| 339 |
|
|
break; |
| 340 |
|
|
|
| 341 |
|
|
case OP_BRA: |
| 342 |
|
|
case OP_BRAPOS: |
| 343 |
|
|
case OP_SBRA: |
| 344 |
|
|
case OP_SBRAPOS: |
| 345 |
|
|
case OP_KETRMAX: |
| 346 |
|
|
case OP_KETRMIN: |
| 347 |
|
|
case OP_KETRPOS: |
| 348 |
|
|
case OP_ALT: |
| 349 |
|
|
case OP_KET: |
| 350 |
|
|
case OP_ASSERT: |
| 351 |
|
|
case OP_ASSERT_NOT: |
| 352 |
|
|
case OP_ASSERTBACK: |
| 353 |
|
|
case OP_ASSERTBACK_NOT: |
| 354 |
|
|
case OP_ONCE: |
| 355 |
|
|
case OP_ONCE_NC: |
| 356 |
|
|
case OP_COND: |
| 357 |
|
|
case OP_SCOND: |
| 358 |
|
|
case OP_REVERSE: |
| 359 |
|
|
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 360 |
|
|
else fprintf(f, " "); |
| 361 |
|
|
fprintf(f, "%s", OP_names[*code]); |
| 362 |
|
|
break; |
| 363 |
|
|
|
| 364 |
|
|
case OP_CLOSE: |
| 365 |
|
|
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1)); |
| 366 |
|
|
break; |
| 367 |
|
|
|
| 368 |
|
|
case OP_CREF: |
| 369 |
|
|
case OP_NCREF: |
| 370 |
|
|
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); |
| 371 |
|
|
break; |
| 372 |
|
|
|
| 373 |
|
|
case OP_RREF: |
| 374 |
|
|
c = GET2(code, 1); |
| 375 |
|
|
if (c == RREF_ANY) |
| 376 |
|
|
fprintf(f, " Cond recurse any"); |
| 377 |
|
|
else |
| 378 |
|
|
fprintf(f, " Cond recurse %d", c); |
| 379 |
|
|
break; |
| 380 |
|
|
|
| 381 |
|
|
case OP_NRREF: |
| 382 |
|
|
c = GET2(code, 1); |
| 383 |
|
|
if (c == RREF_ANY) |
| 384 |
|
|
fprintf(f, " Cond nrecurse any"); |
| 385 |
|
|
else |
| 386 |
|
|
fprintf(f, " Cond nrecurse %d", c); |
| 387 |
|
|
break; |
| 388 |
|
|
|
| 389 |
|
|
case OP_DEF: |
| 390 |
|
|
fprintf(f, " Cond def"); |
| 391 |
|
|
break; |
| 392 |
|
|
|
| 393 |
|
|
case OP_STARI: |
| 394 |
|
|
case OP_MINSTARI: |
| 395 |
|
|
case OP_POSSTARI: |
| 396 |
|
|
case OP_PLUSI: |
| 397 |
|
|
case OP_MINPLUSI: |
| 398 |
|
|
case OP_POSPLUSI: |
| 399 |
|
|
case OP_QUERYI: |
| 400 |
|
|
case OP_MINQUERYI: |
| 401 |
|
|
case OP_POSQUERYI: |
| 402 |
|
|
flag = "/i"; |
| 403 |
|
|
/* Fall through */ |
| 404 |
|
|
case OP_STAR: |
| 405 |
|
|
case OP_MINSTAR: |
| 406 |
|
|
case OP_POSSTAR: |
| 407 |
|
|
case OP_PLUS: |
| 408 |
|
|
case OP_MINPLUS: |
| 409 |
|
|
case OP_POSPLUS: |
| 410 |
|
|
case OP_QUERY: |
| 411 |
|
|
case OP_MINQUERY: |
| 412 |
|
|
case OP_POSQUERY: |
| 413 |
|
|
case OP_TYPESTAR: |
| 414 |
|
|
case OP_TYPEMINSTAR: |
| 415 |
|
|
case OP_TYPEPOSSTAR: |
| 416 |
|
|
case OP_TYPEPLUS: |
| 417 |
|
|
case OP_TYPEMINPLUS: |
| 418 |
|
|
case OP_TYPEPOSPLUS: |
| 419 |
|
|
case OP_TYPEQUERY: |
| 420 |
|
|
case OP_TYPEMINQUERY: |
| 421 |
|
|
case OP_TYPEPOSQUERY: |
| 422 |
|
|
fprintf(f, " %s ", flag); |
| 423 |
|
|
if (*code >= OP_TYPESTAR) |
| 424 |
|
|
{ |
| 425 |
|
|
fprintf(f, "%s", OP_names[code[1]]); |
| 426 |
|
|
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) |
| 427 |
|
|
{ |
| 428 |
|
|
fprintf(f, " %s ", get_ucpname(code[2], code[3])); |
| 429 |
|
|
extra = 2; |
| 430 |
|
|
} |
| 431 |
|
|
} |
| 432 |
|
|
else extra = print_char(f, code+1, utf); |
| 433 |
|
|
fprintf(f, "%s", OP_names[*code]); |
| 434 |
|
|
break; |
| 435 |
|
|
|
| 436 |
|
|
case OP_EXACTI: |
| 437 |
|
|
case OP_UPTOI: |
| 438 |
|
|
case OP_MINUPTOI: |
| 439 |
|
|
case OP_POSUPTOI: |
| 440 |
|
|
flag = "/i"; |
| 441 |
|
|
/* Fall through */ |
| 442 |
|
|
case OP_EXACT: |
| 443 |
|
|
case OP_UPTO: |
| 444 |
|
|
case OP_MINUPTO: |
| 445 |
|
|
case OP_POSUPTO: |
| 446 |
|
|
fprintf(f, " %s ", flag); |
| 447 |
|
|
extra = print_char(f, code + 1 + IMM2_SIZE, utf); |
| 448 |
|
|
fprintf(f, "{"); |
| 449 |
|
|
if (*code != OP_EXACT && *code != OP_EXACTI) fprintf(f, "0,"); |
| 450 |
|
|
fprintf(f, "%d}", GET2(code,1)); |
| 451 |
|
|
if (*code == OP_MINUPTO || *code == OP_MINUPTOI) fprintf(f, "?"); |
| 452 |
|
|
else if (*code == OP_POSUPTO || *code == OP_POSUPTOI) fprintf(f, "+"); |
| 453 |
|
|
break; |
| 454 |
|
|
|
| 455 |
|
|
case OP_TYPEEXACT: |
| 456 |
|
|
case OP_TYPEUPTO: |
| 457 |
|
|
case OP_TYPEMINUPTO: |
| 458 |
|
|
case OP_TYPEPOSUPTO: |
| 459 |
|
|
fprintf(f, " %s", OP_names[code[1 + IMM2_SIZE]]); |
| 460 |
|
|
if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP) |
| 461 |
|
|
{ |
| 462 |
|
|
fprintf(f, " %s ", get_ucpname(code[1 + IMM2_SIZE + 1], |
| 463 |
|
|
code[1 + IMM2_SIZE + 2])); |
| 464 |
|
|
extra = 2; |
| 465 |
|
|
} |
| 466 |
|
|
fprintf(f, "{"); |
| 467 |
|
|
if (*code != OP_TYPEEXACT) fprintf(f, "0,"); |
| 468 |
|
|
fprintf(f, "%d}", GET2(code,1)); |
| 469 |
|
|
if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); |
| 470 |
|
|
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+"); |
| 471 |
|
|
break; |
| 472 |
|
|
|
| 473 |
|
|
case OP_NOTI: |
| 474 |
|
|
flag = "/i"; |
| 475 |
|
|
/* Fall through */ |
| 476 |
|
|
case OP_NOT: |
| 477 |
|
|
c = code[1]; |
| 478 |
|
|
if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c); |
| 479 |
ph10 |
810 |
else if (utf || c > 0xff) |
| 480 |
|
|
fprintf(f, " %s [^\\x{%02x}]", flag, c); |
| 481 |
ph10 |
842 |
else |
| 482 |
ph10 |
810 |
fprintf(f, " %s [^\\x%02x]", flag, c); |
| 483 |
ph10 |
805 |
break; |
| 484 |
|
|
|
| 485 |
|
|
case OP_NOTSTARI: |
| 486 |
|
|
case OP_NOTMINSTARI: |
| 487 |
|
|
case OP_NOTPOSSTARI: |
| 488 |
|
|
case OP_NOTPLUSI: |
| 489 |
|
|
case OP_NOTMINPLUSI: |
| 490 |
|
|
case OP_NOTPOSPLUSI: |
| 491 |
|
|
case OP_NOTQUERYI: |
| 492 |
|
|
case OP_NOTMINQUERYI: |
| 493 |
|
|
case OP_NOTPOSQUERYI: |
| 494 |
|
|
flag = "/i"; |
| 495 |
|
|
/* Fall through */ |
| 496 |
|
|
|
| 497 |
|
|
case OP_NOTSTAR: |
| 498 |
|
|
case OP_NOTMINSTAR: |
| 499 |
|
|
case OP_NOTPOSSTAR: |
| 500 |
|
|
case OP_NOTPLUS: |
| 501 |
|
|
case OP_NOTMINPLUS: |
| 502 |
|
|
case OP_NOTPOSPLUS: |
| 503 |
|
|
case OP_NOTQUERY: |
| 504 |
|
|
case OP_NOTMINQUERY: |
| 505 |
|
|
case OP_NOTPOSQUERY: |
| 506 |
|
|
c = code[1]; |
| 507 |
|
|
if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c); |
| 508 |
|
|
else fprintf(f, " %s [^\\x%02x]", flag, c); |
| 509 |
|
|
fprintf(f, "%s", OP_names[*code]); |
| 510 |
|
|
break; |
| 511 |
|
|
|
| 512 |
|
|
case OP_NOTEXACTI: |
| 513 |
|
|
case OP_NOTUPTOI: |
| 514 |
|
|
case OP_NOTMINUPTOI: |
| 515 |
|
|
case OP_NOTPOSUPTOI: |
| 516 |
|
|
flag = "/i"; |
| 517 |
|
|
/* Fall through */ |
| 518 |
|
|
|
| 519 |
|
|
case OP_NOTEXACT: |
| 520 |
|
|
case OP_NOTUPTO: |
| 521 |
|
|
case OP_NOTMINUPTO: |
| 522 |
|
|
case OP_NOTPOSUPTO: |
| 523 |
|
|
c = code[1 + IMM2_SIZE]; |
| 524 |
|
|
if (PRINTABLE(c)) fprintf(f, " %s [^%c]{", flag, c); |
| 525 |
|
|
else fprintf(f, " %s [^\\x%02x]{", flag, c); |
| 526 |
|
|
if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,"); |
| 527 |
|
|
fprintf(f, "%d}", GET2(code,1)); |
| 528 |
|
|
if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?"); |
| 529 |
|
|
else |
| 530 |
|
|
if (*code == OP_NOTPOSUPTO || *code == OP_NOTPOSUPTOI) fprintf(f, "+"); |
| 531 |
|
|
break; |
| 532 |
|
|
|
| 533 |
|
|
case OP_RECURSE: |
| 534 |
|
|
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 535 |
|
|
else fprintf(f, " "); |
| 536 |
|
|
fprintf(f, "%s", OP_names[*code]); |
| 537 |
|
|
break; |
| 538 |
|
|
|
| 539 |
|
|
case OP_REFI: |
| 540 |
|
|
flag = "/i"; |
| 541 |
|
|
/* Fall through */ |
| 542 |
|
|
case OP_REF: |
| 543 |
|
|
fprintf(f, " %s \\%d", flag, GET2(code,1)); |
| 544 |
|
|
ccode = code + PRIV(OP_lengths)[*code]; |
| 545 |
|
|
goto CLASS_REF_REPEAT; |
| 546 |
|
|
|
| 547 |
|
|
case OP_CALLOUT: |
| 548 |
|
|
fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2), |
| 549 |
|
|
GET(code, 2 + LINK_SIZE)); |
| 550 |
|
|
break; |
| 551 |
|
|
|
| 552 |
|
|
case OP_PROP: |
| 553 |
|
|
case OP_NOTPROP: |
| 554 |
|
|
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2])); |
| 555 |
|
|
break; |
| 556 |
|
|
|
| 557 |
|
|
/* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no |
| 558 |
|
|
harm in having this code always here, and it makes it less messy without |
| 559 |
|
|
all those #ifdefs. */ |
| 560 |
|
|
|
| 561 |
|
|
case OP_CLASS: |
| 562 |
|
|
case OP_NCLASS: |
| 563 |
|
|
case OP_XCLASS: |
| 564 |
|
|
{ |
| 565 |
|
|
int i, min, max; |
| 566 |
|
|
BOOL printmap; |
| 567 |
|
|
pcre_uint8 *map; |
| 568 |
|
|
|
| 569 |
|
|
fprintf(f, " ["); |
| 570 |
|
|
|
| 571 |
|
|
if (*code == OP_XCLASS) |
| 572 |
|
|
{ |
| 573 |
|
|
extra = GET(code, 1); |
| 574 |
|
|
ccode = code + LINK_SIZE + 1; |
| 575 |
|
|
printmap = (*ccode & XCL_MAP) != 0; |
| 576 |
|
|
if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^"); |
| 577 |
|
|
} |
| 578 |
|
|
else |
| 579 |
|
|
{ |
| 580 |
|
|
printmap = TRUE; |
| 581 |
|
|
ccode = code + 1; |
| 582 |
|
|
} |
| 583 |
|
|
|
| 584 |
|
|
/* Print a bit map */ |
| 585 |
|
|
|
| 586 |
|
|
if (printmap) |
| 587 |
|
|
{ |
| 588 |
|
|
map = (pcre_uint8 *)ccode; |
| 589 |
|
|
for (i = 0; i < 256; i++) |
| 590 |
|
|
{ |
| 591 |
|
|
if ((map[i/8] & (1 << (i&7))) != 0) |
| 592 |
|
|
{ |
| 593 |
|
|
int j; |
| 594 |
|
|
for (j = i+1; j < 256; j++) |
| 595 |
|
|
if ((map[j/8] & (1 << (j&7))) == 0) break; |
| 596 |
|
|
if (i == '-' || i == ']') fprintf(f, "\\"); |
| 597 |
|
|
if (PRINTABLE(i)) fprintf(f, "%c", i); |
| 598 |
|
|
else fprintf(f, "\\x%02x", i); |
| 599 |
|
|
if (--j > i) |
| 600 |
|
|
{ |
| 601 |
|
|
if (j != i + 1) fprintf(f, "-"); |
| 602 |
|
|
if (j == '-' || j == ']') fprintf(f, "\\"); |
| 603 |
|
|
if (PRINTABLE(j)) fprintf(f, "%c", j); |
| 604 |
|
|
else fprintf(f, "\\x%02x", j); |
| 605 |
|
|
} |
| 606 |
|
|
i = j; |
| 607 |
|
|
} |
| 608 |
|
|
} |
| 609 |
|
|
ccode += 32 / sizeof(pcre_uchar); |
| 610 |
|
|
} |
| 611 |
|
|
|
| 612 |
|
|
/* For an XCLASS there is always some additional data */ |
| 613 |
|
|
|
| 614 |
|
|
if (*code == OP_XCLASS) |
| 615 |
|
|
{ |
| 616 |
|
|
int ch; |
| 617 |
|
|
while ((ch = *ccode++) != XCL_END) |
| 618 |
|
|
{ |
| 619 |
|
|
if (ch == XCL_PROP) |
| 620 |
|
|
{ |
| 621 |
|
|
int ptype = *ccode++; |
| 622 |
|
|
int pvalue = *ccode++; |
| 623 |
|
|
fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue)); |
| 624 |
|
|
} |
| 625 |
|
|
else if (ch == XCL_NOTPROP) |
| 626 |
|
|
{ |
| 627 |
|
|
int ptype = *ccode++; |
| 628 |
|
|
int pvalue = *ccode++; |
| 629 |
|
|
fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue)); |
| 630 |
|
|
} |
| 631 |
|
|
else |
| 632 |
|
|
{ |
| 633 |
zherczeg |
833 |
ccode += 1 + print_char(f, ccode, utf); |
| 634 |
ph10 |
805 |
if (ch == XCL_RANGE) |
| 635 |
|
|
{ |
| 636 |
|
|
fprintf(f, "-"); |
| 637 |
zherczeg |
833 |
ccode += 1 + print_char(f, ccode, utf); |
| 638 |
ph10 |
805 |
} |
| 639 |
|
|
} |
| 640 |
|
|
} |
| 641 |
|
|
} |
| 642 |
|
|
|
| 643 |
|
|
/* Indicate a non-UTF class which was created by negation */ |
| 644 |
|
|
|
| 645 |
|
|
fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : ""); |
| 646 |
|
|
|
| 647 |
|
|
/* Handle repeats after a class or a back reference */ |
| 648 |
|
|
|
| 649 |
|
|
CLASS_REF_REPEAT: |
| 650 |
|
|
switch(*ccode) |
| 651 |
|
|
{ |
| 652 |
|
|
case OP_CRSTAR: |
| 653 |
|
|
case OP_CRMINSTAR: |
| 654 |
|
|
case OP_CRPLUS: |
| 655 |
|
|
case OP_CRMINPLUS: |
| 656 |
|
|
case OP_CRQUERY: |
| 657 |
|
|
case OP_CRMINQUERY: |
| 658 |
|
|
fprintf(f, "%s", OP_names[*ccode]); |
| 659 |
|
|
extra += PRIV(OP_lengths)[*ccode]; |
| 660 |
|
|
break; |
| 661 |
|
|
|
| 662 |
|
|
case OP_CRRANGE: |
| 663 |
|
|
case OP_CRMINRANGE: |
| 664 |
|
|
min = GET2(ccode,1); |
| 665 |
|
|
max = GET2(ccode,1 + IMM2_SIZE); |
| 666 |
|
|
if (max == 0) fprintf(f, "{%d,}", min); |
| 667 |
|
|
else fprintf(f, "{%d,%d}", min, max); |
| 668 |
|
|
if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); |
| 669 |
|
|
extra += PRIV(OP_lengths)[*ccode]; |
| 670 |
|
|
break; |
| 671 |
|
|
|
| 672 |
|
|
/* Do nothing if it's not a repeat; this code stops picky compilers |
| 673 |
|
|
warning about the lack of a default code path. */ |
| 674 |
|
|
|
| 675 |
|
|
default: |
| 676 |
|
|
break; |
| 677 |
|
|
} |
| 678 |
|
|
} |
| 679 |
|
|
break; |
| 680 |
|
|
|
| 681 |
|
|
case OP_MARK: |
| 682 |
|
|
case OP_PRUNE_ARG: |
| 683 |
|
|
case OP_SKIP_ARG: |
| 684 |
|
|
case OP_THEN_ARG: |
| 685 |
|
|
fprintf(f, " %s ", OP_names[*code]); |
| 686 |
|
|
print_puchar(f, code + 2); |
| 687 |
|
|
extra += code[1]; |
| 688 |
|
|
break; |
| 689 |
|
|
|
| 690 |
|
|
case OP_THEN: |
| 691 |
|
|
fprintf(f, " %s", OP_names[*code]); |
| 692 |
|
|
break; |
| 693 |
|
|
|
| 694 |
|
|
case OP_CIRCM: |
| 695 |
|
|
case OP_DOLLM: |
| 696 |
|
|
flag = "/m"; |
| 697 |
|
|
/* Fall through */ |
| 698 |
|
|
|
| 699 |
|
|
/* Anything else is just an item with no data, but possibly a flag. */ |
| 700 |
|
|
|
| 701 |
|
|
default: |
| 702 |
|
|
fprintf(f, " %s %s", flag, OP_names[*code]); |
| 703 |
|
|
break; |
| 704 |
|
|
} |
| 705 |
|
|
|
| 706 |
|
|
code += PRIV(OP_lengths)[*code] + extra; |
| 707 |
|
|
fprintf(f, "\n"); |
| 708 |
|
|
} |
| 709 |
|
|
} |
| 710 |
|
|
|
| 711 |
|
|
/* End of pcre_printint.src */ |