/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 240 - (show annotations) (download) (as text)
Tue Sep 11 15:47:20 2007 UTC (7 years ago) by ph10
File MIME type: application/x-wais-source
File size: 14391 byte(s)
Refactoring to reduce the number of relocations in a shared library.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2007 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains a PCRE private debugging function for printing out the
42 internal form of a compiled regular expression, along with some supporting
43 local functions. This source file is used in two places:
44
45 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47
48 (2) It is always #included by pcretest.c, which can be asked to print out a
49 compiled regex for debugging purposes. */
50
51
52 /* Macro that decides whether a character should be output as a literal or in
53 hexadecimal. We don't use isprint() because that can vary from system to system
54 (even without the use of locales) and we want the output always to be the same,
55 for testing purposes. This macro is used in pcretest as well as in this file. */
56
57 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
58
59 /* The table of operator names. */
60
61 static const char *OP_names[] = { OP_NAME_LIST };
62
63
64
65 /*************************************************
66 * Print single- or multi-byte character *
67 *************************************************/
68
69 static int
70 print_char(FILE *f, uschar *ptr, BOOL utf8)
71 {
72 int c = *ptr;
73
74 #ifndef SUPPORT_UTF8
75 utf8 = utf8; /* Avoid compiler warning */
76 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
77 return 0;
78
79 #else
80 if (!utf8 || (c & 0xc0) != 0xc0)
81 {
82 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
83 return 0;
84 }
85 else
86 {
87 int i;
88 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
89 int s = 6*a;
90 c = (c & _pcre_utf8_table3[a]) << s;
91 for (i = 1; i <= a; i++)
92 {
93 /* This is a check for malformed UTF-8; it should only occur if the sanity
94 check has been turned off. Rather than swallow random bytes, just stop if
95 we hit a bad one. Print it with \X instead of \x as an indication. */
96
97 if ((ptr[i] & 0xc0) != 0x80)
98 {
99 fprintf(f, "\\X{%x}", c);
100 return i - 1;
101 }
102
103 /* The byte is OK */
104
105 s -= 6;
106 c |= (ptr[i] & 0x3f) << s;
107 }
108 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
109 return a;
110 }
111 #endif
112 }
113
114
115
116 /*************************************************
117 * Find Unicode property name *
118 *************************************************/
119
120 static const char *
121 get_ucpname(int ptype, int pvalue)
122 {
123 #ifdef SUPPORT_UCP
124 int i;
125 for (i = _pcre_utt_size - 1; i >= 0; i--)
126 {
127 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
128 }
129 return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
130 #else
131 /* It gets harder and harder to shut off unwanted compiler warnings. */
132 ptype = ptype * pvalue;
133 return (ptype == pvalue)? "??" : "??";
134 #endif
135 }
136
137
138
139 /*************************************************
140 * Print compiled regex *
141 *************************************************/
142
143 /* Make this function work for a regex with integers either byte order.
144 However, we assume that what we are passed is a compiled regex. The
145 print_lengths flag controls whether offsets and lengths of items are printed.
146 They can be turned off from pcretest so that automatic tests on bytecode can be
147 written that do not depend on the value of LINK_SIZE. */
148
149 static void
150 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
151 {
152 real_pcre *re = (real_pcre *)external_re;
153 uschar *codestart, *code;
154 BOOL utf8;
155
156 unsigned int options = re->options;
157 int offset = re->name_table_offset;
158 int count = re->name_count;
159 int size = re->name_entry_size;
160
161 if (re->magic_number != MAGIC_NUMBER)
162 {
163 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
164 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
165 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
166 options = ((options << 24) & 0xff000000) |
167 ((options << 8) & 0x00ff0000) |
168 ((options >> 8) & 0x0000ff00) |
169 ((options >> 24) & 0x000000ff);
170 }
171
172 code = codestart = (uschar *)re + offset + count * size;
173 utf8 = (options & PCRE_UTF8) != 0;
174
175 for(;;)
176 {
177 uschar *ccode;
178 int c;
179 int extra = 0;
180
181 if (print_lengths)
182 fprintf(f, "%3d ", (int)(code - codestart));
183 else
184 fprintf(f, " ");
185
186 switch(*code)
187 {
188 case OP_END:
189 fprintf(f, " %s\n", OP_names[*code]);
190 fprintf(f, "------------------------------------------------------------------\n");
191 return;
192
193 case OP_OPT:
194 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
195 break;
196
197 case OP_CHAR:
198 fprintf(f, " ");
199 do
200 {
201 code++;
202 code += 1 + print_char(f, code, utf8);
203 }
204 while (*code == OP_CHAR);
205 fprintf(f, "\n");
206 continue;
207
208 case OP_CHARNC:
209 fprintf(f, " NC ");
210 do
211 {
212 code++;
213 code += 1 + print_char(f, code, utf8);
214 }
215 while (*code == OP_CHARNC);
216 fprintf(f, "\n");
217 continue;
218
219 case OP_CBRA:
220 case OP_SCBRA:
221 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
222 else fprintf(f, " ");
223 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
224 break;
225
226 case OP_BRA:
227 case OP_SBRA:
228 case OP_KETRMAX:
229 case OP_KETRMIN:
230 case OP_ALT:
231 case OP_KET:
232 case OP_ASSERT:
233 case OP_ASSERT_NOT:
234 case OP_ASSERTBACK:
235 case OP_ASSERTBACK_NOT:
236 case OP_ONCE:
237 case OP_COND:
238 case OP_SCOND:
239 case OP_REVERSE:
240 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
241 else fprintf(f, " ");
242 fprintf(f, "%s", OP_names[*code]);
243 break;
244
245 case OP_CREF:
246 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
247 break;
248
249 case OP_RREF:
250 c = GET2(code, 1);
251 if (c == RREF_ANY)
252 fprintf(f, " Cond recurse any");
253 else
254 fprintf(f, " Cond recurse %d", c);
255 break;
256
257 case OP_DEF:
258 fprintf(f, " Cond def");
259 break;
260
261 case OP_STAR:
262 case OP_MINSTAR:
263 case OP_POSSTAR:
264 case OP_PLUS:
265 case OP_MINPLUS:
266 case OP_POSPLUS:
267 case OP_QUERY:
268 case OP_MINQUERY:
269 case OP_POSQUERY:
270 case OP_TYPESTAR:
271 case OP_TYPEMINSTAR:
272 case OP_TYPEPOSSTAR:
273 case OP_TYPEPLUS:
274 case OP_TYPEMINPLUS:
275 case OP_TYPEPOSPLUS:
276 case OP_TYPEQUERY:
277 case OP_TYPEMINQUERY:
278 case OP_TYPEPOSQUERY:
279 fprintf(f, " ");
280 if (*code >= OP_TYPESTAR)
281 {
282 fprintf(f, "%s", OP_names[code[1]]);
283 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
284 {
285 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
286 extra = 2;
287 }
288 }
289 else extra = print_char(f, code+1, utf8);
290 fprintf(f, "%s", OP_names[*code]);
291 break;
292
293 case OP_EXACT:
294 case OP_UPTO:
295 case OP_MINUPTO:
296 case OP_POSUPTO:
297 fprintf(f, " ");
298 extra = print_char(f, code+3, utf8);
299 fprintf(f, "{");
300 if (*code != OP_EXACT) fprintf(f, "0,");
301 fprintf(f, "%d}", GET2(code,1));
302 if (*code == OP_MINUPTO) fprintf(f, "?");
303 else if (*code == OP_POSUPTO) fprintf(f, "+");
304 break;
305
306 case OP_TYPEEXACT:
307 case OP_TYPEUPTO:
308 case OP_TYPEMINUPTO:
309 case OP_TYPEPOSUPTO:
310 fprintf(f, " %s", OP_names[code[3]]);
311 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
312 {
313 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
314 extra = 2;
315 }
316 fprintf(f, "{");
317 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
318 fprintf(f, "%d}", GET2(code,1));
319 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
320 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
321 break;
322
323 case OP_NOT:
324 c = code[1];
325 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
326 else fprintf(f, " [^\\x%02x]", c);
327 break;
328
329 case OP_NOTSTAR:
330 case OP_NOTMINSTAR:
331 case OP_NOTPOSSTAR:
332 case OP_NOTPLUS:
333 case OP_NOTMINPLUS:
334 case OP_NOTPOSPLUS:
335 case OP_NOTQUERY:
336 case OP_NOTMINQUERY:
337 case OP_NOTPOSQUERY:
338 c = code[1];
339 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
340 else fprintf(f, " [^\\x%02x]", c);
341 fprintf(f, "%s", OP_names[*code]);
342 break;
343
344 case OP_NOTEXACT:
345 case OP_NOTUPTO:
346 case OP_NOTMINUPTO:
347 case OP_NOTPOSUPTO:
348 c = code[3];
349 if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
350 else fprintf(f, " [^\\x%02x]{", c);
351 if (*code != OP_NOTEXACT) fprintf(f, "0,");
352 fprintf(f, "%d}", GET2(code,1));
353 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
354 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
355 break;
356
357 case OP_RECURSE:
358 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
359 else fprintf(f, " ");
360 fprintf(f, "%s", OP_names[*code]);
361 break;
362
363 case OP_REF:
364 fprintf(f, " \\%d", GET2(code,1));
365 ccode = code + _pcre_OP_lengths[*code];
366 goto CLASS_REF_REPEAT;
367
368 case OP_CALLOUT:
369 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
370 GET(code, 2 + LINK_SIZE));
371 break;
372
373 case OP_PROP:
374 case OP_NOTPROP:
375 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
376 break;
377
378 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
379 having this code always here, and it makes it less messy without all those
380 #ifdefs. */
381
382 case OP_CLASS:
383 case OP_NCLASS:
384 case OP_XCLASS:
385 {
386 int i, min, max;
387 BOOL printmap;
388
389 fprintf(f, " [");
390
391 if (*code == OP_XCLASS)
392 {
393 extra = GET(code, 1);
394 ccode = code + LINK_SIZE + 1;
395 printmap = (*ccode & XCL_MAP) != 0;
396 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
397 }
398 else
399 {
400 printmap = TRUE;
401 ccode = code + 1;
402 }
403
404 /* Print a bit map */
405
406 if (printmap)
407 {
408 for (i = 0; i < 256; i++)
409 {
410 if ((ccode[i/8] & (1 << (i&7))) != 0)
411 {
412 int j;
413 for (j = i+1; j < 256; j++)
414 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
415 if (i == '-' || i == ']') fprintf(f, "\\");
416 if (PRINTABLE(i)) fprintf(f, "%c", i);
417 else fprintf(f, "\\x%02x", i);
418 if (--j > i)
419 {
420 if (j != i + 1) fprintf(f, "-");
421 if (j == '-' || j == ']') fprintf(f, "\\");
422 if (PRINTABLE(j)) fprintf(f, "%c", j);
423 else fprintf(f, "\\x%02x", j);
424 }
425 i = j;
426 }
427 }
428 ccode += 32;
429 }
430
431 /* For an XCLASS there is always some additional data */
432
433 if (*code == OP_XCLASS)
434 {
435 int ch;
436 while ((ch = *ccode++) != XCL_END)
437 {
438 if (ch == XCL_PROP)
439 {
440 int ptype = *ccode++;
441 int pvalue = *ccode++;
442 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
443 }
444 else if (ch == XCL_NOTPROP)
445 {
446 int ptype = *ccode++;
447 int pvalue = *ccode++;
448 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
449 }
450 else
451 {
452 ccode += 1 + print_char(f, ccode, TRUE);
453 if (ch == XCL_RANGE)
454 {
455 fprintf(f, "-");
456 ccode += 1 + print_char(f, ccode, TRUE);
457 }
458 }
459 }
460 }
461
462 /* Indicate a non-UTF8 class which was created by negation */
463
464 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
465
466 /* Handle repeats after a class or a back reference */
467
468 CLASS_REF_REPEAT:
469 switch(*ccode)
470 {
471 case OP_CRSTAR:
472 case OP_CRMINSTAR:
473 case OP_CRPLUS:
474 case OP_CRMINPLUS:
475 case OP_CRQUERY:
476 case OP_CRMINQUERY:
477 fprintf(f, "%s", OP_names[*ccode]);
478 extra += _pcre_OP_lengths[*ccode];
479 break;
480
481 case OP_CRRANGE:
482 case OP_CRMINRANGE:
483 min = GET2(ccode,1);
484 max = GET2(ccode,3);
485 if (max == 0) fprintf(f, "{%d,}", min);
486 else fprintf(f, "{%d,%d}", min, max);
487 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
488 extra += _pcre_OP_lengths[*ccode];
489 break;
490
491 /* Do nothing if it's not a repeat; this code stops picky compilers
492 warning about the lack of a default code path. */
493
494 default:
495 break;
496 }
497 }
498 break;
499
500 /* Anything else is just an item with no data*/
501
502 default:
503 fprintf(f, " %s", OP_names[*code]);
504 break;
505 }
506
507 code += _pcre_OP_lengths[*code] + extra;
508 fprintf(f, "\n");
509 }
510 }
511
512 /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12