/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 87 - (show annotations) (download) (as text)
Sat Feb 24 21:41:21 2007 UTC (7 years, 8 months ago) by nigel
File MIME type: application/x-wais-source
File size: 12832 byte(s)
Load pcre-6.5 into code/trunk.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2005 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains a PCRE private debugging function for printing out the
42 internal form of a compiled regular expression, along with some supporting
43 local functions. This source file is used in two places:
44
45 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47
48 (2) It is always #included by pcretest.c, which can be asked to print out a
49 compiled regex for debugging purposes. */
50
51
52 static const char *OP_names[] = { OP_NAME_LIST };
53
54
55 /*************************************************
56 * Print single- or multi-byte character *
57 *************************************************/
58
59 static int
60 print_char(FILE *f, uschar *ptr, BOOL utf8)
61 {
62 int c = *ptr;
63
64 if (!utf8 || (c & 0xc0) != 0xc0)
65 {
66 if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
67 return 0;
68 }
69 else
70 {
71 int i;
72 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
73 int s = 6*a;
74 c = (c & _pcre_utf8_table3[a]) << s;
75 for (i = 1; i <= a; i++)
76 {
77 /* This is a check for malformed UTF-8; it should only occur if the sanity
78 check has been turned off. Rather than swallow random bytes, just stop if
79 we hit a bad one. Print it with \X instead of \x as an indication. */
80
81 if ((ptr[i] & 0xc0) != 0x80)
82 {
83 fprintf(f, "\\X{%x}", c);
84 return i - 1;
85 }
86
87 /* The byte is OK */
88
89 s -= 6;
90 c |= (ptr[i] & 0x3f) << s;
91 }
92 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
93 return a;
94 }
95 }
96
97
98
99 /*************************************************
100 * Find Unicode property name *
101 *************************************************/
102
103 static const char *
104 get_ucpname(int ptype, int pvalue)
105 {
106 #ifdef SUPPORT_UCP
107 int i;
108 for (i = _pcre_utt_size; i >= 0; i--)
109 {
110 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
111 }
112 return (i >= 0)? _pcre_utt[i].name : "??";
113 #else
114 ptype = ptype; /* Avoid compiler warning */
115 pvalue = pvalue;
116 return "??";
117 #endif
118 }
119
120
121
122 /*************************************************
123 * Print compiled regex *
124 *************************************************/
125
126 /* Make this function work for a regex with integers either byte order.
127 However, we assume that what we are passed is a compiled regex. */
128
129 static void
130 pcre_printint(pcre *external_re, FILE *f)
131 {
132 real_pcre *re = (real_pcre *)external_re;
133 uschar *codestart, *code;
134 BOOL utf8;
135
136 unsigned int options = re->options;
137 int offset = re->name_table_offset;
138 int count = re->name_count;
139 int size = re->name_entry_size;
140
141 if (re->magic_number != MAGIC_NUMBER)
142 {
143 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
144 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
145 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
146 options = ((options << 24) & 0xff000000) |
147 ((options << 8) & 0x00ff0000) |
148 ((options >> 8) & 0x0000ff00) |
149 ((options >> 24) & 0x000000ff);
150 }
151
152 code = codestart = (uschar *)re + offset + count * size;
153 utf8 = (options & PCRE_UTF8) != 0;
154
155 for(;;)
156 {
157 uschar *ccode;
158 int c;
159 int extra = 0;
160
161 fprintf(f, "%3d ", (int)(code - codestart));
162
163 if (*code >= OP_BRA)
164 {
165 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
166 fprintf(f, "%3d Bra extra\n", GET(code, 1));
167 else
168 fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
169 code += _pcre_OP_lengths[OP_BRA];
170 continue;
171 }
172
173 switch(*code)
174 {
175 case OP_END:
176 fprintf(f, " %s\n", OP_names[*code]);
177 fprintf(f, "------------------------------------------------------------------\n");
178 return;
179
180 case OP_OPT:
181 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
182 break;
183
184 case OP_CHAR:
185 {
186 fprintf(f, " ");
187 do
188 {
189 code++;
190 code += 1 + print_char(f, code, utf8);
191 }
192 while (*code == OP_CHAR);
193 fprintf(f, "\n");
194 continue;
195 }
196 break;
197
198 case OP_CHARNC:
199 {
200 fprintf(f, " NC ");
201 do
202 {
203 code++;
204 code += 1 + print_char(f, code, utf8);
205 }
206 while (*code == OP_CHARNC);
207 fprintf(f, "\n");
208 continue;
209 }
210 break;
211
212 case OP_KETRMAX:
213 case OP_KETRMIN:
214 case OP_ALT:
215 case OP_KET:
216 case OP_ASSERT:
217 case OP_ASSERT_NOT:
218 case OP_ASSERTBACK:
219 case OP_ASSERTBACK_NOT:
220 case OP_ONCE:
221 case OP_COND:
222 case OP_REVERSE:
223 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
224 break;
225
226 case OP_BRANUMBER:
227 printf("%3d %s", GET2(code, 1), OP_names[*code]);
228 break;
229
230 case OP_CREF:
231 if (GET2(code, 1) == CREF_RECURSE)
232 fprintf(f, " Cond recurse");
233 else
234 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
235 break;
236
237 case OP_STAR:
238 case OP_MINSTAR:
239 case OP_PLUS:
240 case OP_MINPLUS:
241 case OP_QUERY:
242 case OP_MINQUERY:
243 case OP_TYPESTAR:
244 case OP_TYPEMINSTAR:
245 case OP_TYPEPLUS:
246 case OP_TYPEMINPLUS:
247 case OP_TYPEQUERY:
248 case OP_TYPEMINQUERY:
249 fprintf(f, " ");
250 if (*code >= OP_TYPESTAR)
251 {
252 fprintf(f, "%s", OP_names[code[1]]);
253 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
254 {
255 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
256 extra = 2;
257 }
258 }
259 else extra = print_char(f, code+1, utf8);
260 fprintf(f, "%s", OP_names[*code]);
261 break;
262
263 case OP_EXACT:
264 case OP_UPTO:
265 case OP_MINUPTO:
266 fprintf(f, " ");
267 extra = print_char(f, code+3, utf8);
268 fprintf(f, "{");
269 if (*code != OP_EXACT) fprintf(f, ",");
270 fprintf(f, "%d}", GET2(code,1));
271 if (*code == OP_MINUPTO) fprintf(f, "?");
272 break;
273
274 case OP_TYPEEXACT:
275 case OP_TYPEUPTO:
276 case OP_TYPEMINUPTO:
277 fprintf(f, " %s", OP_names[code[3]]);
278 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
279 {
280 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
281 extra = 2;
282 }
283 fprintf(f, "{");
284 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
285 fprintf(f, "%d}", GET2(code,1));
286 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
287 break;
288
289 case OP_NOT:
290 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
291 else fprintf(f, " [^\\x%02x]", c);
292 break;
293
294 case OP_NOTSTAR:
295 case OP_NOTMINSTAR:
296 case OP_NOTPLUS:
297 case OP_NOTMINPLUS:
298 case OP_NOTQUERY:
299 case OP_NOTMINQUERY:
300 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
301 else fprintf(f, " [^\\x%02x]", c);
302 fprintf(f, "%s", OP_names[*code]);
303 break;
304
305 case OP_NOTEXACT:
306 case OP_NOTUPTO:
307 case OP_NOTMINUPTO:
308 if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
309 else fprintf(f, " [^\\x%02x]{", c);
310 if (*code != OP_NOTEXACT) fprintf(f, "0,");
311 fprintf(f, "%d}", GET2(code,1));
312 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
313 break;
314
315 case OP_RECURSE:
316 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
317 break;
318
319 case OP_REF:
320 fprintf(f, " \\%d", GET2(code,1));
321 ccode = code + _pcre_OP_lengths[*code];
322 goto CLASS_REF_REPEAT;
323
324 case OP_CALLOUT:
325 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
326 GET(code, 2 + LINK_SIZE));
327 break;
328
329 case OP_PROP:
330 case OP_NOTPROP:
331 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
332 break;
333
334 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
335 having this code always here, and it makes it less messy without all those
336 #ifdefs. */
337
338 case OP_CLASS:
339 case OP_NCLASS:
340 case OP_XCLASS:
341 {
342 int i, min, max;
343 BOOL printmap;
344
345 fprintf(f, " [");
346
347 if (*code == OP_XCLASS)
348 {
349 extra = GET(code, 1);
350 ccode = code + LINK_SIZE + 1;
351 printmap = (*ccode & XCL_MAP) != 0;
352 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
353 }
354 else
355 {
356 printmap = TRUE;
357 ccode = code + 1;
358 }
359
360 /* Print a bit map */
361
362 if (printmap)
363 {
364 for (i = 0; i < 256; i++)
365 {
366 if ((ccode[i/8] & (1 << (i&7))) != 0)
367 {
368 int j;
369 for (j = i+1; j < 256; j++)
370 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
371 if (i == '-' || i == ']') fprintf(f, "\\");
372 if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
373 if (--j > i)
374 {
375 if (j != i + 1) fprintf(f, "-");
376 if (j == '-' || j == ']') fprintf(f, "\\");
377 if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
378 }
379 i = j;
380 }
381 }
382 ccode += 32;
383 }
384
385 /* For an XCLASS there is always some additional data */
386
387 if (*code == OP_XCLASS)
388 {
389 int ch;
390 while ((ch = *ccode++) != XCL_END)
391 {
392 if (ch == XCL_PROP)
393 {
394 int ptype = *ccode++;
395 int pvalue = *ccode++;
396 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
397 }
398 else if (ch == XCL_NOTPROP)
399 {
400 int ptype = *ccode++;
401 int pvalue = *ccode++;
402 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
403 }
404 else
405 {
406 ccode += 1 + print_char(f, ccode, TRUE);
407 if (ch == XCL_RANGE)
408 {
409 fprintf(f, "-");
410 ccode += 1 + print_char(f, ccode, TRUE);
411 }
412 }
413 }
414 }
415
416 /* Indicate a non-UTF8 class which was created by negation */
417
418 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
419
420 /* Handle repeats after a class or a back reference */
421
422 CLASS_REF_REPEAT:
423 switch(*ccode)
424 {
425 case OP_CRSTAR:
426 case OP_CRMINSTAR:
427 case OP_CRPLUS:
428 case OP_CRMINPLUS:
429 case OP_CRQUERY:
430 case OP_CRMINQUERY:
431 fprintf(f, "%s", OP_names[*ccode]);
432 extra += _pcre_OP_lengths[*ccode];
433 break;
434
435 case OP_CRRANGE:
436 case OP_CRMINRANGE:
437 min = GET2(ccode,1);
438 max = GET2(ccode,3);
439 if (max == 0) fprintf(f, "{%d,}", min);
440 else fprintf(f, "{%d,%d}", min, max);
441 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
442 extra += _pcre_OP_lengths[*ccode];
443 break;
444
445 /* Do nothing if it's not a repeat; this code stops picky compilers
446 warning about the lack of a default code path. */
447
448 default:
449 break;
450 }
451 }
452 break;
453
454 /* Anything else is just an item with no data*/
455
456 default:
457 fprintf(f, " %s", OP_names[*code]);
458 break;
459 }
460
461 code += _pcre_OP_lengths[*code] + extra;
462 fprintf(f, "\n");
463 }
464 }
465
466 /* End of pcre_printint.src */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12