/[pcre]/code/branches/pcre16/pcre_printint.src
ViewVC logotype

Contents of /code/branches/pcre16/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 459 - (show annotations) (download) (as text)
Sun Oct 4 09:21:39 2009 UTC (4 years, 9 months ago) by ph10
Original Path: code/trunk/pcre_printint.src
File MIME type: application/x-wais-source
File size: 14757 byte(s)
Fix problems with conditional references to duplicate named subpatterns.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2009 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains a PCRE private debugging function for printing out the
42 internal form of a compiled regular expression, along with some supporting
43 local functions. This source file is used in two places:
44
45 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47
48 (2) It is always #included by pcretest.c, which can be asked to print out a
49 compiled regex for debugging purposes. */
50
51
52 /* Macro that decides whether a character should be output as a literal or in
53 hexadecimal. We don't use isprint() because that can vary from system to system
54 (even without the use of locales) and we want the output always to be the same,
55 for testing purposes. This macro is used in pcretest as well as in this file. */
56
57 #ifdef EBCDIC
58 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
59 #else
60 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
61 #endif
62
63 /* The table of operator names. */
64
65 static const char *OP_names[] = { OP_NAME_LIST };
66
67
68
69 /*************************************************
70 * Print single- or multi-byte character *
71 *************************************************/
72
73 static int
74 print_char(FILE *f, uschar *ptr, BOOL utf8)
75 {
76 int c = *ptr;
77
78 #ifndef SUPPORT_UTF8
79 utf8 = utf8; /* Avoid compiler warning */
80 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
81 return 0;
82
83 #else
84 if (!utf8 || (c & 0xc0) != 0xc0)
85 {
86 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
87 return 0;
88 }
89 else
90 {
91 int i;
92 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
93 int s = 6*a;
94 c = (c & _pcre_utf8_table3[a]) << s;
95 for (i = 1; i <= a; i++)
96 {
97 /* This is a check for malformed UTF-8; it should only occur if the sanity
98 check has been turned off. Rather than swallow random bytes, just stop if
99 we hit a bad one. Print it with \X instead of \x as an indication. */
100
101 if ((ptr[i] & 0xc0) != 0x80)
102 {
103 fprintf(f, "\\X{%x}", c);
104 return i - 1;
105 }
106
107 /* The byte is OK */
108
109 s -= 6;
110 c |= (ptr[i] & 0x3f) << s;
111 }
112 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
113 return a;
114 }
115 #endif
116 }
117
118
119
120 /*************************************************
121 * Find Unicode property name *
122 *************************************************/
123
124 static const char *
125 get_ucpname(int ptype, int pvalue)
126 {
127 #ifdef SUPPORT_UCP
128 int i;
129 for (i = _pcre_utt_size - 1; i >= 0; i--)
130 {
131 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
132 }
133 return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
134 #else
135 /* It gets harder and harder to shut off unwanted compiler warnings. */
136 ptype = ptype * pvalue;
137 return (ptype == pvalue)? "??" : "??";
138 #endif
139 }
140
141
142
143 /*************************************************
144 * Print compiled regex *
145 *************************************************/
146
147 /* Make this function work for a regex with integers either byte order.
148 However, we assume that what we are passed is a compiled regex. The
149 print_lengths flag controls whether offsets and lengths of items are printed.
150 They can be turned off from pcretest so that automatic tests on bytecode can be
151 written that do not depend on the value of LINK_SIZE. */
152
153 static void
154 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
155 {
156 real_pcre *re = (real_pcre *)external_re;
157 uschar *codestart, *code;
158 BOOL utf8;
159
160 unsigned int options = re->options;
161 int offset = re->name_table_offset;
162 int count = re->name_count;
163 int size = re->name_entry_size;
164
165 if (re->magic_number != MAGIC_NUMBER)
166 {
167 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
168 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
169 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
170 options = ((options << 24) & 0xff000000) |
171 ((options << 8) & 0x00ff0000) |
172 ((options >> 8) & 0x0000ff00) |
173 ((options >> 24) & 0x000000ff);
174 }
175
176 code = codestart = (uschar *)re + offset + count * size;
177 utf8 = (options & PCRE_UTF8) != 0;
178
179 for(;;)
180 {
181 uschar *ccode;
182 int c;
183 int extra = 0;
184
185 if (print_lengths)
186 fprintf(f, "%3d ", (int)(code - codestart));
187 else
188 fprintf(f, " ");
189
190 switch(*code)
191 {
192 case OP_END:
193 fprintf(f, " %s\n", OP_names[*code]);
194 fprintf(f, "------------------------------------------------------------------\n");
195 return;
196
197 case OP_OPT:
198 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
199 break;
200
201 case OP_CHAR:
202 fprintf(f, " ");
203 do
204 {
205 code++;
206 code += 1 + print_char(f, code, utf8);
207 }
208 while (*code == OP_CHAR);
209 fprintf(f, "\n");
210 continue;
211
212 case OP_CHARNC:
213 fprintf(f, " NC ");
214 do
215 {
216 code++;
217 code += 1 + print_char(f, code, utf8);
218 }
219 while (*code == OP_CHARNC);
220 fprintf(f, "\n");
221 continue;
222
223 case OP_CBRA:
224 case OP_SCBRA:
225 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
226 else fprintf(f, " ");
227 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
228 break;
229
230 case OP_BRA:
231 case OP_SBRA:
232 case OP_KETRMAX:
233 case OP_KETRMIN:
234 case OP_ALT:
235 case OP_KET:
236 case OP_ASSERT:
237 case OP_ASSERT_NOT:
238 case OP_ASSERTBACK:
239 case OP_ASSERTBACK_NOT:
240 case OP_ONCE:
241 case OP_COND:
242 case OP_SCOND:
243 case OP_REVERSE:
244 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
245 else fprintf(f, " ");
246 fprintf(f, "%s", OP_names[*code]);
247 break;
248
249 case OP_CLOSE:
250 fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
251 break;
252
253 case OP_CREF:
254 case OP_NCREF:
255 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
256 break;
257
258 case OP_RREF:
259 c = GET2(code, 1);
260 if (c == RREF_ANY)
261 fprintf(f, " Cond recurse any");
262 else
263 fprintf(f, " Cond recurse %d", c);
264 break;
265
266 case OP_NRREF:
267 c = GET2(code, 1);
268 if (c == RREF_ANY)
269 fprintf(f, " Cond nrecurse any");
270 else
271 fprintf(f, " Cond nrecurse %d", c);
272 break;
273
274 case OP_DEF:
275 fprintf(f, " Cond def");
276 break;
277
278 case OP_STAR:
279 case OP_MINSTAR:
280 case OP_POSSTAR:
281 case OP_PLUS:
282 case OP_MINPLUS:
283 case OP_POSPLUS:
284 case OP_QUERY:
285 case OP_MINQUERY:
286 case OP_POSQUERY:
287 case OP_TYPESTAR:
288 case OP_TYPEMINSTAR:
289 case OP_TYPEPOSSTAR:
290 case OP_TYPEPLUS:
291 case OP_TYPEMINPLUS:
292 case OP_TYPEPOSPLUS:
293 case OP_TYPEQUERY:
294 case OP_TYPEMINQUERY:
295 case OP_TYPEPOSQUERY:
296 fprintf(f, " ");
297 if (*code >= OP_TYPESTAR)
298 {
299 fprintf(f, "%s", OP_names[code[1]]);
300 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
301 {
302 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
303 extra = 2;
304 }
305 }
306 else extra = print_char(f, code+1, utf8);
307 fprintf(f, "%s", OP_names[*code]);
308 break;
309
310 case OP_EXACT:
311 case OP_UPTO:
312 case OP_MINUPTO:
313 case OP_POSUPTO:
314 fprintf(f, " ");
315 extra = print_char(f, code+3, utf8);
316 fprintf(f, "{");
317 if (*code != OP_EXACT) fprintf(f, "0,");
318 fprintf(f, "%d}", GET2(code,1));
319 if (*code == OP_MINUPTO) fprintf(f, "?");
320 else if (*code == OP_POSUPTO) fprintf(f, "+");
321 break;
322
323 case OP_TYPEEXACT:
324 case OP_TYPEUPTO:
325 case OP_TYPEMINUPTO:
326 case OP_TYPEPOSUPTO:
327 fprintf(f, " %s", OP_names[code[3]]);
328 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
329 {
330 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
331 extra = 2;
332 }
333 fprintf(f, "{");
334 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
335 fprintf(f, "%d}", GET2(code,1));
336 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
337 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
338 break;
339
340 case OP_NOT:
341 c = code[1];
342 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
343 else fprintf(f, " [^\\x%02x]", c);
344 break;
345
346 case OP_NOTSTAR:
347 case OP_NOTMINSTAR:
348 case OP_NOTPOSSTAR:
349 case OP_NOTPLUS:
350 case OP_NOTMINPLUS:
351 case OP_NOTPOSPLUS:
352 case OP_NOTQUERY:
353 case OP_NOTMINQUERY:
354 case OP_NOTPOSQUERY:
355 c = code[1];
356 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
357 else fprintf(f, " [^\\x%02x]", c);
358 fprintf(f, "%s", OP_names[*code]);
359 break;
360
361 case OP_NOTEXACT:
362 case OP_NOTUPTO:
363 case OP_NOTMINUPTO:
364 case OP_NOTPOSUPTO:
365 c = code[3];
366 if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
367 else fprintf(f, " [^\\x%02x]{", c);
368 if (*code != OP_NOTEXACT) fprintf(f, "0,");
369 fprintf(f, "%d}", GET2(code,1));
370 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
371 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
372 break;
373
374 case OP_RECURSE:
375 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
376 else fprintf(f, " ");
377 fprintf(f, "%s", OP_names[*code]);
378 break;
379
380 case OP_REF:
381 fprintf(f, " \\%d", GET2(code,1));
382 ccode = code + _pcre_OP_lengths[*code];
383 goto CLASS_REF_REPEAT;
384
385 case OP_CALLOUT:
386 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
387 GET(code, 2 + LINK_SIZE));
388 break;
389
390 case OP_PROP:
391 case OP_NOTPROP:
392 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
393 break;
394
395 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
396 having this code always here, and it makes it less messy without all those
397 #ifdefs. */
398
399 case OP_CLASS:
400 case OP_NCLASS:
401 case OP_XCLASS:
402 {
403 int i, min, max;
404 BOOL printmap;
405
406 fprintf(f, " [");
407
408 if (*code == OP_XCLASS)
409 {
410 extra = GET(code, 1);
411 ccode = code + LINK_SIZE + 1;
412 printmap = (*ccode & XCL_MAP) != 0;
413 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
414 }
415 else
416 {
417 printmap = TRUE;
418 ccode = code + 1;
419 }
420
421 /* Print a bit map */
422
423 if (printmap)
424 {
425 for (i = 0; i < 256; i++)
426 {
427 if ((ccode[i/8] & (1 << (i&7))) != 0)
428 {
429 int j;
430 for (j = i+1; j < 256; j++)
431 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
432 if (i == '-' || i == ']') fprintf(f, "\\");
433 if (PRINTABLE(i)) fprintf(f, "%c", i);
434 else fprintf(f, "\\x%02x", i);
435 if (--j > i)
436 {
437 if (j != i + 1) fprintf(f, "-");
438 if (j == '-' || j == ']') fprintf(f, "\\");
439 if (PRINTABLE(j)) fprintf(f, "%c", j);
440 else fprintf(f, "\\x%02x", j);
441 }
442 i = j;
443 }
444 }
445 ccode += 32;
446 }
447
448 /* For an XCLASS there is always some additional data */
449
450 if (*code == OP_XCLASS)
451 {
452 int ch;
453 while ((ch = *ccode++) != XCL_END)
454 {
455 if (ch == XCL_PROP)
456 {
457 int ptype = *ccode++;
458 int pvalue = *ccode++;
459 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
460 }
461 else if (ch == XCL_NOTPROP)
462 {
463 int ptype = *ccode++;
464 int pvalue = *ccode++;
465 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
466 }
467 else
468 {
469 ccode += 1 + print_char(f, ccode, TRUE);
470 if (ch == XCL_RANGE)
471 {
472 fprintf(f, "-");
473 ccode += 1 + print_char(f, ccode, TRUE);
474 }
475 }
476 }
477 }
478
479 /* Indicate a non-UTF8 class which was created by negation */
480
481 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
482
483 /* Handle repeats after a class or a back reference */
484
485 CLASS_REF_REPEAT:
486 switch(*ccode)
487 {
488 case OP_CRSTAR:
489 case OP_CRMINSTAR:
490 case OP_CRPLUS:
491 case OP_CRMINPLUS:
492 case OP_CRQUERY:
493 case OP_CRMINQUERY:
494 fprintf(f, "%s", OP_names[*ccode]);
495 extra += _pcre_OP_lengths[*ccode];
496 break;
497
498 case OP_CRRANGE:
499 case OP_CRMINRANGE:
500 min = GET2(ccode,1);
501 max = GET2(ccode,3);
502 if (max == 0) fprintf(f, "{%d,}", min);
503 else fprintf(f, "{%d,%d}", min, max);
504 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
505 extra += _pcre_OP_lengths[*ccode];
506 break;
507
508 /* Do nothing if it's not a repeat; this code stops picky compilers
509 warning about the lack of a default code path. */
510
511 default:
512 break;
513 }
514 }
515 break;
516
517 /* Anything else is just an item with no data*/
518
519 default:
520 fprintf(f, " %s", OP_names[*code]);
521 break;
522 }
523
524 code += _pcre_OP_lengths[*code] + extra;
525 fprintf(f, "\n");
526 }
527 }
528
529 /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12