| 1 |
nigel |
85 |
/************************************************* |
| 2 |
|
|
* Perl-Compatible Regular Expressions * |
| 3 |
|
|
*************************************************/ |
| 4 |
|
|
|
| 5 |
|
|
/* PCRE is a library of functions to support regular expressions whose syntax |
| 6 |
|
|
and semantics are as close as possible to those of the Perl 5 language. |
| 7 |
|
|
|
| 8 |
|
|
Written by Philip Hazel |
| 9 |
ph10 |
475 |
Copyright (c) 1997-2010 University of Cambridge |
| 10 |
nigel |
85 |
|
| 11 |
|
|
----------------------------------------------------------------------------- |
| 12 |
|
|
Redistribution and use in source and binary forms, with or without |
| 13 |
|
|
modification, are permitted provided that the following conditions are met: |
| 14 |
|
|
|
| 15 |
|
|
* Redistributions of source code must retain the above copyright notice, |
| 16 |
|
|
this list of conditions and the following disclaimer. |
| 17 |
|
|
|
| 18 |
|
|
* Redistributions in binary form must reproduce the above copyright |
| 19 |
|
|
notice, this list of conditions and the following disclaimer in the |
| 20 |
|
|
documentation and/or other materials provided with the distribution. |
| 21 |
|
|
|
| 22 |
|
|
* Neither the name of the University of Cambridge nor the names of its |
| 23 |
|
|
contributors may be used to endorse or promote products derived from |
| 24 |
|
|
this software without specific prior written permission. |
| 25 |
|
|
|
| 26 |
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 27 |
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 28 |
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 29 |
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 30 |
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 31 |
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 32 |
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 33 |
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 34 |
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 35 |
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 36 |
|
|
POSSIBILITY OF SUCH DAMAGE. |
| 37 |
|
|
----------------------------------------------------------------------------- |
| 38 |
|
|
*/ |
| 39 |
|
|
|
| 40 |
|
|
|
| 41 |
|
|
/* This module contains a PCRE private debugging function for printing out the |
| 42 |
|
|
internal form of a compiled regular expression, along with some supporting |
| 43 |
|
|
local functions. This source file is used in two places: |
| 44 |
|
|
|
| 45 |
|
|
(1) It is #included by pcre_compile.c when it is compiled in debugging mode |
| 46 |
ph10 |
475 |
(PCRE_DEBUG defined in pcre_internal.h). It is not included in production |
| 47 |
|
|
compiles. |
| 48 |
nigel |
85 |
|
| 49 |
|
|
(2) It is always #included by pcretest.c, which can be asked to print out a |
| 50 |
|
|
compiled regex for debugging purposes. */ |
| 51 |
|
|
|
| 52 |
|
|
|
| 53 |
nigel |
93 |
/* Macro that decides whether a character should be output as a literal or in |
| 54 |
|
|
hexadecimal. We don't use isprint() because that can vary from system to system |
| 55 |
|
|
(even without the use of locales) and we want the output always to be the same, |
| 56 |
|
|
for testing purposes. This macro is used in pcretest as well as in this file. */ |
| 57 |
|
|
|
| 58 |
ph10 |
391 |
#ifdef EBCDIC |
| 59 |
|
|
#define PRINTABLE(c) ((c) >= 64 && (c) < 255) |
| 60 |
|
|
#else |
| 61 |
nigel |
93 |
#define PRINTABLE(c) ((c) >= 32 && (c) < 127) |
| 62 |
ph10 |
391 |
#endif |
| 63 |
nigel |
93 |
|
| 64 |
|
|
/* The table of operator names. */ |
| 65 |
|
|
|
| 66 |
nigel |
85 |
static const char *OP_names[] = { OP_NAME_LIST }; |
| 67 |
|
|
|
| 68 |
|
|
|
| 69 |
nigel |
93 |
|
| 70 |
nigel |
85 |
/************************************************* |
| 71 |
|
|
* Print single- or multi-byte character * |
| 72 |
|
|
*************************************************/ |
| 73 |
|
|
|
| 74 |
|
|
static int |
| 75 |
|
|
print_char(FILE *f, uschar *ptr, BOOL utf8) |
| 76 |
|
|
{ |
| 77 |
|
|
int c = *ptr; |
| 78 |
|
|
|
| 79 |
ph10 |
107 |
#ifndef SUPPORT_UTF8 |
| 80 |
|
|
utf8 = utf8; /* Avoid compiler warning */ |
| 81 |
|
|
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); |
| 82 |
|
|
return 0; |
| 83 |
|
|
|
| 84 |
|
|
#else |
| 85 |
nigel |
85 |
if (!utf8 || (c & 0xc0) != 0xc0) |
| 86 |
|
|
{ |
| 87 |
nigel |
93 |
if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c); |
| 88 |
nigel |
85 |
return 0; |
| 89 |
|
|
} |
| 90 |
|
|
else |
| 91 |
|
|
{ |
| 92 |
|
|
int i; |
| 93 |
|
|
int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ |
| 94 |
|
|
int s = 6*a; |
| 95 |
|
|
c = (c & _pcre_utf8_table3[a]) << s; |
| 96 |
|
|
for (i = 1; i <= a; i++) |
| 97 |
|
|
{ |
| 98 |
|
|
/* This is a check for malformed UTF-8; it should only occur if the sanity |
| 99 |
|
|
check has been turned off. Rather than swallow random bytes, just stop if |
| 100 |
|
|
we hit a bad one. Print it with \X instead of \x as an indication. */ |
| 101 |
|
|
|
| 102 |
|
|
if ((ptr[i] & 0xc0) != 0x80) |
| 103 |
|
|
{ |
| 104 |
|
|
fprintf(f, "\\X{%x}", c); |
| 105 |
|
|
return i - 1; |
| 106 |
|
|
} |
| 107 |
|
|
|
| 108 |
|
|
/* The byte is OK */ |
| 109 |
|
|
|
| 110 |
|
|
s -= 6; |
| 111 |
|
|
c |= (ptr[i] & 0x3f) << s; |
| 112 |
|
|
} |
| 113 |
|
|
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c); |
| 114 |
|
|
return a; |
| 115 |
|
|
} |
| 116 |
ph10 |
111 |
#endif |
| 117 |
nigel |
85 |
} |
| 118 |
|
|
|
| 119 |
|
|
|
| 120 |
|
|
|
| 121 |
|
|
/************************************************* |
| 122 |
|
|
* Find Unicode property name * |
| 123 |
|
|
*************************************************/ |
| 124 |
|
|
|
| 125 |
|
|
static const char * |
| 126 |
nigel |
87 |
get_ucpname(int ptype, int pvalue) |
| 127 |
nigel |
85 |
{ |
| 128 |
|
|
#ifdef SUPPORT_UCP |
| 129 |
|
|
int i; |
| 130 |
ph10 |
217 |
for (i = _pcre_utt_size - 1; i >= 0; i--) |
| 131 |
nigel |
85 |
{ |
| 132 |
nigel |
87 |
if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break; |
| 133 |
nigel |
85 |
} |
| 134 |
ph10 |
240 |
return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??"; |
| 135 |
nigel |
85 |
#else |
| 136 |
nigel |
91 |
/* It gets harder and harder to shut off unwanted compiler warnings. */ |
| 137 |
|
|
ptype = ptype * pvalue; |
| 138 |
|
|
return (ptype == pvalue)? "??" : "??"; |
| 139 |
nigel |
85 |
#endif |
| 140 |
|
|
} |
| 141 |
|
|
|
| 142 |
|
|
|
| 143 |
|
|
|
| 144 |
|
|
/************************************************* |
| 145 |
|
|
* Print compiled regex * |
| 146 |
|
|
*************************************************/ |
| 147 |
|
|
|
| 148 |
|
|
/* Make this function work for a regex with integers either byte order. |
| 149 |
ph10 |
116 |
However, we assume that what we are passed is a compiled regex. The |
| 150 |
ph10 |
123 |
print_lengths flag controls whether offsets and lengths of items are printed. |
| 151 |
ph10 |
116 |
They can be turned off from pcretest so that automatic tests on bytecode can be |
| 152 |
|
|
written that do not depend on the value of LINK_SIZE. */ |
| 153 |
nigel |
85 |
|
| 154 |
|
|
static void |
| 155 |
ph10 |
116 |
pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths) |
| 156 |
nigel |
85 |
{ |
| 157 |
|
|
real_pcre *re = (real_pcre *)external_re; |
| 158 |
|
|
uschar *codestart, *code; |
| 159 |
|
|
BOOL utf8; |
| 160 |
|
|
|
| 161 |
|
|
unsigned int options = re->options; |
| 162 |
|
|
int offset = re->name_table_offset; |
| 163 |
|
|
int count = re->name_count; |
| 164 |
|
|
int size = re->name_entry_size; |
| 165 |
|
|
|
| 166 |
|
|
if (re->magic_number != MAGIC_NUMBER) |
| 167 |
|
|
{ |
| 168 |
|
|
offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff); |
| 169 |
|
|
count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff); |
| 170 |
|
|
size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff); |
| 171 |
|
|
options = ((options << 24) & 0xff000000) | |
| 172 |
|
|
((options << 8) & 0x00ff0000) | |
| 173 |
|
|
((options >> 8) & 0x0000ff00) | |
| 174 |
|
|
((options >> 24) & 0x000000ff); |
| 175 |
|
|
} |
| 176 |
|
|
|
| 177 |
|
|
code = codestart = (uschar *)re + offset + count * size; |
| 178 |
|
|
utf8 = (options & PCRE_UTF8) != 0; |
| 179 |
|
|
|
| 180 |
|
|
for(;;) |
| 181 |
|
|
{ |
| 182 |
|
|
uschar *ccode; |
| 183 |
|
|
int c; |
| 184 |
|
|
int extra = 0; |
| 185 |
|
|
|
| 186 |
ph10 |
116 |
if (print_lengths) |
| 187 |
|
|
fprintf(f, "%3d ", (int)(code - codestart)); |
| 188 |
|
|
else |
| 189 |
ph10 |
123 |
fprintf(f, " "); |
| 190 |
nigel |
85 |
|
| 191 |
|
|
switch(*code) |
| 192 |
|
|
{ |
| 193 |
ph10 |
498 |
/* ========================================================================== */ |
| 194 |
|
|
/* These cases are never obeyed. This is a fudge that causes a compile- |
| 195 |
|
|
time error if the vectors OP_names or _pcre_OP_lengths, which are indexed |
| 196 |
|
|
by opcode, are not the correct length. It seems to be the only way to do |
| 197 |
|
|
such a check at compile time, as the sizeof() operator does not work in |
| 198 |
|
|
the C preprocessor. We do this while compiling pcretest, because that |
| 199 |
|
|
#includes pcre_tables.c, which holds _pcre_OP_lengths. We can't do this |
| 200 |
|
|
when building pcre_compile.c with PCRE_DEBUG set, because it doesn't then |
| 201 |
|
|
know the size of _pcre_OP_lengths. */ |
| 202 |
|
|
|
| 203 |
|
|
#ifdef COMPILING_PCRETEST |
| 204 |
|
|
case OP_TABLE_LENGTH: |
| 205 |
|
|
case OP_TABLE_LENGTH + |
| 206 |
|
|
((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) && |
| 207 |
|
|
(sizeof(_pcre_OP_lengths) == OP_TABLE_LENGTH)): |
| 208 |
|
|
break; |
| 209 |
|
|
#endif |
| 210 |
|
|
/* ========================================================================== */ |
| 211 |
|
|
|
| 212 |
nigel |
85 |
case OP_END: |
| 213 |
|
|
fprintf(f, " %s\n", OP_names[*code]); |
| 214 |
|
|
fprintf(f, "------------------------------------------------------------------\n"); |
| 215 |
|
|
return; |
| 216 |
|
|
|
| 217 |
|
|
case OP_OPT: |
| 218 |
|
|
fprintf(f, " %.2x %s", code[1], OP_names[*code]); |
| 219 |
|
|
break; |
| 220 |
|
|
|
| 221 |
|
|
case OP_CHAR: |
| 222 |
nigel |
91 |
fprintf(f, " "); |
| 223 |
|
|
do |
| 224 |
nigel |
85 |
{ |
| 225 |
nigel |
91 |
code++; |
| 226 |
|
|
code += 1 + print_char(f, code, utf8); |
| 227 |
nigel |
85 |
} |
| 228 |
nigel |
91 |
while (*code == OP_CHAR); |
| 229 |
|
|
fprintf(f, "\n"); |
| 230 |
|
|
continue; |
| 231 |
nigel |
85 |
|
| 232 |
|
|
case OP_CHARNC: |
| 233 |
nigel |
91 |
fprintf(f, " NC "); |
| 234 |
|
|
do |
| 235 |
nigel |
85 |
{ |
| 236 |
nigel |
91 |
code++; |
| 237 |
|
|
code += 1 + print_char(f, code, utf8); |
| 238 |
nigel |
85 |
} |
| 239 |
nigel |
91 |
while (*code == OP_CHARNC); |
| 240 |
|
|
fprintf(f, "\n"); |
| 241 |
|
|
continue; |
| 242 |
nigel |
85 |
|
| 243 |
nigel |
93 |
case OP_CBRA: |
| 244 |
|
|
case OP_SCBRA: |
| 245 |
ph10 |
116 |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 246 |
ph10 |
123 |
else fprintf(f, " "); |
| 247 |
ph10 |
116 |
fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE)); |
| 248 |
nigel |
93 |
break; |
| 249 |
|
|
|
| 250 |
|
|
case OP_BRA: |
| 251 |
|
|
case OP_SBRA: |
| 252 |
nigel |
85 |
case OP_KETRMAX: |
| 253 |
|
|
case OP_KETRMIN: |
| 254 |
|
|
case OP_ALT: |
| 255 |
|
|
case OP_KET: |
| 256 |
|
|
case OP_ASSERT: |
| 257 |
|
|
case OP_ASSERT_NOT: |
| 258 |
|
|
case OP_ASSERTBACK: |
| 259 |
|
|
case OP_ASSERTBACK_NOT: |
| 260 |
|
|
case OP_ONCE: |
| 261 |
|
|
case OP_COND: |
| 262 |
nigel |
93 |
case OP_SCOND: |
| 263 |
nigel |
85 |
case OP_REVERSE: |
| 264 |
ph10 |
116 |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 265 |
ph10 |
123 |
else fprintf(f, " "); |
| 266 |
ph10 |
116 |
fprintf(f, "%s", OP_names[*code]); |
| 267 |
nigel |
85 |
break; |
| 268 |
ph10 |
461 |
|
| 269 |
ph10 |
447 |
case OP_CLOSE: |
| 270 |
|
|
fprintf(f, " %s %d", OP_names[*code], GET2(code, 1)); |
| 271 |
ph10 |
461 |
break; |
| 272 |
nigel |
85 |
|
| 273 |
nigel |
93 |
case OP_CREF: |
| 274 |
ph10 |
461 |
case OP_NCREF: |
| 275 |
nigel |
93 |
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]); |
| 276 |
nigel |
85 |
break; |
| 277 |
|
|
|
| 278 |
nigel |
93 |
case OP_RREF: |
| 279 |
|
|
c = GET2(code, 1); |
| 280 |
|
|
if (c == RREF_ANY) |
| 281 |
|
|
fprintf(f, " Cond recurse any"); |
| 282 |
nigel |
85 |
else |
| 283 |
nigel |
93 |
fprintf(f, " Cond recurse %d", c); |
| 284 |
nigel |
85 |
break; |
| 285 |
|
|
|
| 286 |
ph10 |
459 |
case OP_NRREF: |
| 287 |
|
|
c = GET2(code, 1); |
| 288 |
|
|
if (c == RREF_ANY) |
| 289 |
|
|
fprintf(f, " Cond nrecurse any"); |
| 290 |
|
|
else |
| 291 |
|
|
fprintf(f, " Cond nrecurse %d", c); |
| 292 |
|
|
break; |
| 293 |
|
|
|
| 294 |
nigel |
93 |
case OP_DEF: |
| 295 |
|
|
fprintf(f, " Cond def"); |
| 296 |
|
|
break; |
| 297 |
|
|
|
| 298 |
nigel |
85 |
case OP_STAR: |
| 299 |
|
|
case OP_MINSTAR: |
| 300 |
nigel |
93 |
case OP_POSSTAR: |
| 301 |
nigel |
85 |
case OP_PLUS: |
| 302 |
|
|
case OP_MINPLUS: |
| 303 |
nigel |
93 |
case OP_POSPLUS: |
| 304 |
nigel |
85 |
case OP_QUERY: |
| 305 |
|
|
case OP_MINQUERY: |
| 306 |
nigel |
93 |
case OP_POSQUERY: |
| 307 |
nigel |
85 |
case OP_TYPESTAR: |
| 308 |
|
|
case OP_TYPEMINSTAR: |
| 309 |
nigel |
93 |
case OP_TYPEPOSSTAR: |
| 310 |
nigel |
85 |
case OP_TYPEPLUS: |
| 311 |
|
|
case OP_TYPEMINPLUS: |
| 312 |
nigel |
93 |
case OP_TYPEPOSPLUS: |
| 313 |
nigel |
85 |
case OP_TYPEQUERY: |
| 314 |
|
|
case OP_TYPEMINQUERY: |
| 315 |
nigel |
93 |
case OP_TYPEPOSQUERY: |
| 316 |
nigel |
85 |
fprintf(f, " "); |
| 317 |
|
|
if (*code >= OP_TYPESTAR) |
| 318 |
|
|
{ |
| 319 |
|
|
fprintf(f, "%s", OP_names[code[1]]); |
| 320 |
|
|
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) |
| 321 |
|
|
{ |
| 322 |
nigel |
87 |
fprintf(f, " %s ", get_ucpname(code[2], code[3])); |
| 323 |
|
|
extra = 2; |
| 324 |
nigel |
85 |
} |
| 325 |
|
|
} |
| 326 |
|
|
else extra = print_char(f, code+1, utf8); |
| 327 |
|
|
fprintf(f, "%s", OP_names[*code]); |
| 328 |
|
|
break; |
| 329 |
|
|
|
| 330 |
|
|
case OP_EXACT: |
| 331 |
|
|
case OP_UPTO: |
| 332 |
|
|
case OP_MINUPTO: |
| 333 |
nigel |
93 |
case OP_POSUPTO: |
| 334 |
nigel |
85 |
fprintf(f, " "); |
| 335 |
|
|
extra = print_char(f, code+3, utf8); |
| 336 |
|
|
fprintf(f, "{"); |
| 337 |
nigel |
93 |
if (*code != OP_EXACT) fprintf(f, "0,"); |
| 338 |
nigel |
85 |
fprintf(f, "%d}", GET2(code,1)); |
| 339 |
|
|
if (*code == OP_MINUPTO) fprintf(f, "?"); |
| 340 |
nigel |
93 |
else if (*code == OP_POSUPTO) fprintf(f, "+"); |
| 341 |
nigel |
85 |
break; |
| 342 |
|
|
|
| 343 |
|
|
case OP_TYPEEXACT: |
| 344 |
|
|
case OP_TYPEUPTO: |
| 345 |
|
|
case OP_TYPEMINUPTO: |
| 346 |
nigel |
93 |
case OP_TYPEPOSUPTO: |
| 347 |
nigel |
85 |
fprintf(f, " %s", OP_names[code[3]]); |
| 348 |
|
|
if (code[3] == OP_PROP || code[3] == OP_NOTPROP) |
| 349 |
|
|
{ |
| 350 |
nigel |
87 |
fprintf(f, " %s ", get_ucpname(code[4], code[5])); |
| 351 |
|
|
extra = 2; |
| 352 |
nigel |
85 |
} |
| 353 |
|
|
fprintf(f, "{"); |
| 354 |
|
|
if (*code != OP_TYPEEXACT) fprintf(f, "0,"); |
| 355 |
|
|
fprintf(f, "%d}", GET2(code,1)); |
| 356 |
|
|
if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); |
| 357 |
nigel |
93 |
else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+"); |
| 358 |
nigel |
85 |
break; |
| 359 |
|
|
|
| 360 |
|
|
case OP_NOT: |
| 361 |
nigel |
93 |
c = code[1]; |
| 362 |
|
|
if (PRINTABLE(c)) fprintf(f, " [^%c]", c); |
| 363 |
nigel |
85 |
else fprintf(f, " [^\\x%02x]", c); |
| 364 |
|
|
break; |
| 365 |
|
|
|
| 366 |
|
|
case OP_NOTSTAR: |
| 367 |
|
|
case OP_NOTMINSTAR: |
| 368 |
nigel |
93 |
case OP_NOTPOSSTAR: |
| 369 |
nigel |
85 |
case OP_NOTPLUS: |
| 370 |
|
|
case OP_NOTMINPLUS: |
| 371 |
nigel |
93 |
case OP_NOTPOSPLUS: |
| 372 |
nigel |
85 |
case OP_NOTQUERY: |
| 373 |
|
|
case OP_NOTMINQUERY: |
| 374 |
nigel |
93 |
case OP_NOTPOSQUERY: |
| 375 |
|
|
c = code[1]; |
| 376 |
|
|
if (PRINTABLE(c)) fprintf(f, " [^%c]", c); |
| 377 |
nigel |
85 |
else fprintf(f, " [^\\x%02x]", c); |
| 378 |
|
|
fprintf(f, "%s", OP_names[*code]); |
| 379 |
|
|
break; |
| 380 |
|
|
|
| 381 |
|
|
case OP_NOTEXACT: |
| 382 |
|
|
case OP_NOTUPTO: |
| 383 |
|
|
case OP_NOTMINUPTO: |
| 384 |
nigel |
93 |
case OP_NOTPOSUPTO: |
| 385 |
|
|
c = code[3]; |
| 386 |
|
|
if (PRINTABLE(c)) fprintf(f, " [^%c]{", c); |
| 387 |
nigel |
85 |
else fprintf(f, " [^\\x%02x]{", c); |
| 388 |
|
|
if (*code != OP_NOTEXACT) fprintf(f, "0,"); |
| 389 |
|
|
fprintf(f, "%d}", GET2(code,1)); |
| 390 |
|
|
if (*code == OP_NOTMINUPTO) fprintf(f, "?"); |
| 391 |
nigel |
93 |
else if (*code == OP_NOTPOSUPTO) fprintf(f, "+"); |
| 392 |
nigel |
85 |
break; |
| 393 |
|
|
|
| 394 |
|
|
case OP_RECURSE: |
| 395 |
ph10 |
116 |
if (print_lengths) fprintf(f, "%3d ", GET(code, 1)); |
| 396 |
ph10 |
123 |
else fprintf(f, " "); |
| 397 |
ph10 |
116 |
fprintf(f, "%s", OP_names[*code]); |
| 398 |
nigel |
85 |
break; |
| 399 |
|
|
|
| 400 |
|
|
case OP_REF: |
| 401 |
|
|
fprintf(f, " \\%d", GET2(code,1)); |
| 402 |
|
|
ccode = code + _pcre_OP_lengths[*code]; |
| 403 |
|
|
goto CLASS_REF_REPEAT; |
| 404 |
|
|
|
| 405 |
|
|
case OP_CALLOUT: |
| 406 |
|
|
fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2), |
| 407 |
|
|
GET(code, 2 + LINK_SIZE)); |
| 408 |
|
|
break; |
| 409 |
|
|
|
| 410 |
|
|
case OP_PROP: |
| 411 |
|
|
case OP_NOTPROP: |
| 412 |
nigel |
87 |
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2])); |
| 413 |
nigel |
85 |
break; |
| 414 |
|
|
|
| 415 |
|
|
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in |
| 416 |
|
|
having this code always here, and it makes it less messy without all those |
| 417 |
|
|
#ifdefs. */ |
| 418 |
|
|
|
| 419 |
|
|
case OP_CLASS: |
| 420 |
|
|
case OP_NCLASS: |
| 421 |
|
|
case OP_XCLASS: |
| 422 |
|
|
{ |
| 423 |
|
|
int i, min, max; |
| 424 |
|
|
BOOL printmap; |
| 425 |
|
|
|
| 426 |
|
|
fprintf(f, " ["); |
| 427 |
|
|
|
| 428 |
|
|
if (*code == OP_XCLASS) |
| 429 |
|
|
{ |
| 430 |
|
|
extra = GET(code, 1); |
| 431 |
|
|
ccode = code + LINK_SIZE + 1; |
| 432 |
|
|
printmap = (*ccode & XCL_MAP) != 0; |
| 433 |
|
|
if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^"); |
| 434 |
|
|
} |
| 435 |
|
|
else |
| 436 |
|
|
{ |
| 437 |
|
|
printmap = TRUE; |
| 438 |
|
|
ccode = code + 1; |
| 439 |
|
|
} |
| 440 |
|
|
|
| 441 |
|
|
/* Print a bit map */ |
| 442 |
|
|
|
| 443 |
|
|
if (printmap) |
| 444 |
|
|
{ |
| 445 |
|
|
for (i = 0; i < 256; i++) |
| 446 |
|
|
{ |
| 447 |
|
|
if ((ccode[i/8] & (1 << (i&7))) != 0) |
| 448 |
|
|
{ |
| 449 |
|
|
int j; |
| 450 |
|
|
for (j = i+1; j < 256; j++) |
| 451 |
|
|
if ((ccode[j/8] & (1 << (j&7))) == 0) break; |
| 452 |
|
|
if (i == '-' || i == ']') fprintf(f, "\\"); |
| 453 |
nigel |
93 |
if (PRINTABLE(i)) fprintf(f, "%c", i); |
| 454 |
|
|
else fprintf(f, "\\x%02x", i); |
| 455 |
nigel |
85 |
if (--j > i) |
| 456 |
|
|
{ |
| 457 |
|
|
if (j != i + 1) fprintf(f, "-"); |
| 458 |
|
|
if (j == '-' || j == ']') fprintf(f, "\\"); |
| 459 |
nigel |
93 |
if (PRINTABLE(j)) fprintf(f, "%c", j); |
| 460 |
|
|
else fprintf(f, "\\x%02x", j); |
| 461 |
nigel |
85 |
} |
| 462 |
|
|
i = j; |
| 463 |
|
|
} |
| 464 |
|
|
} |
| 465 |
|
|
ccode += 32; |
| 466 |
|
|
} |
| 467 |
|
|
|
| 468 |
|
|
/* For an XCLASS there is always some additional data */ |
| 469 |
|
|
|
| 470 |
|
|
if (*code == OP_XCLASS) |
| 471 |
|
|
{ |
| 472 |
|
|
int ch; |
| 473 |
|
|
while ((ch = *ccode++) != XCL_END) |
| 474 |
|
|
{ |
| 475 |
|
|
if (ch == XCL_PROP) |
| 476 |
|
|
{ |
| 477 |
nigel |
87 |
int ptype = *ccode++; |
| 478 |
|
|
int pvalue = *ccode++; |
| 479 |
|
|
fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue)); |
| 480 |
nigel |
85 |
} |
| 481 |
|
|
else if (ch == XCL_NOTPROP) |
| 482 |
|
|
{ |
| 483 |
nigel |
87 |
int ptype = *ccode++; |
| 484 |
|
|
int pvalue = *ccode++; |
| 485 |
|
|
fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue)); |
| 486 |
nigel |
85 |
} |
| 487 |
|
|
else |
| 488 |
|
|
{ |
| 489 |
|
|
ccode += 1 + print_char(f, ccode, TRUE); |
| 490 |
|
|
if (ch == XCL_RANGE) |
| 491 |
|
|
{ |
| 492 |
|
|
fprintf(f, "-"); |
| 493 |
|
|
ccode += 1 + print_char(f, ccode, TRUE); |
| 494 |
|
|
} |
| 495 |
|
|
} |
| 496 |
|
|
} |
| 497 |
|
|
} |
| 498 |
|
|
|
| 499 |
|
|
/* Indicate a non-UTF8 class which was created by negation */ |
| 500 |
|
|
|
| 501 |
|
|
fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : ""); |
| 502 |
|
|
|
| 503 |
|
|
/* Handle repeats after a class or a back reference */ |
| 504 |
|
|
|
| 505 |
|
|
CLASS_REF_REPEAT: |
| 506 |
|
|
switch(*ccode) |
| 507 |
|
|
{ |
| 508 |
|
|
case OP_CRSTAR: |
| 509 |
|
|
case OP_CRMINSTAR: |
| 510 |
|
|
case OP_CRPLUS: |
| 511 |
|
|
case OP_CRMINPLUS: |
| 512 |
|
|
case OP_CRQUERY: |
| 513 |
|
|
case OP_CRMINQUERY: |
| 514 |
|
|
fprintf(f, "%s", OP_names[*ccode]); |
| 515 |
|
|
extra += _pcre_OP_lengths[*ccode]; |
| 516 |
|
|
break; |
| 517 |
|
|
|
| 518 |
|
|
case OP_CRRANGE: |
| 519 |
|
|
case OP_CRMINRANGE: |
| 520 |
|
|
min = GET2(ccode,1); |
| 521 |
|
|
max = GET2(ccode,3); |
| 522 |
|
|
if (max == 0) fprintf(f, "{%d,}", min); |
| 523 |
|
|
else fprintf(f, "{%d,%d}", min, max); |
| 524 |
|
|
if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); |
| 525 |
|
|
extra += _pcre_OP_lengths[*ccode]; |
| 526 |
|
|
break; |
| 527 |
nigel |
87 |
|
| 528 |
|
|
/* Do nothing if it's not a repeat; this code stops picky compilers |
| 529 |
|
|
warning about the lack of a default code path. */ |
| 530 |
|
|
|
| 531 |
|
|
default: |
| 532 |
|
|
break; |
| 533 |
nigel |
85 |
} |
| 534 |
|
|
} |
| 535 |
|
|
break; |
| 536 |
ph10 |
510 |
|
| 537 |
|
|
case OP_MARK: |
| 538 |
|
|
case OP_PRUNE_ARG: |
| 539 |
|
|
case OP_SKIP_ARG: |
| 540 |
|
|
case OP_THEN_ARG: |
| 541 |
|
|
fprintf(f, " %s %s", OP_names[*code], code + 2); |
| 542 |
|
|
extra += code[1]; |
| 543 |
|
|
break; |
| 544 |
nigel |
85 |
|
| 545 |
|
|
/* Anything else is just an item with no data*/ |
| 546 |
|
|
|
| 547 |
|
|
default: |
| 548 |
|
|
fprintf(f, " %s", OP_names[*code]); |
| 549 |
|
|
break; |
| 550 |
|
|
} |
| 551 |
|
|
|
| 552 |
|
|
code += _pcre_OP_lengths[*code] + extra; |
| 553 |
|
|
fprintf(f, "\n"); |
| 554 |
|
|
} |
| 555 |
|
|
} |
| 556 |
|
|
|
| 557 |
|
|
/* End of pcre_printint.src */ |