/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 447 - (show annotations) (download) (as text)
Tue Sep 15 18:17:54 2009 UTC (4 years, 11 months ago) by ph10
File MIME type: application/x-wais-source
File size: 14563 byte(s)
Capture data when (*ACCEPT) is inside capturing parentheses.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Copyright (c) 1997-2009 University of Cambridge
10
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
17
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
21
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
25
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39
40
41 /* This module contains a PCRE private debugging function for printing out the
42 internal form of a compiled regular expression, along with some supporting
43 local functions. This source file is used in two places:
44
45 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47
48 (2) It is always #included by pcretest.c, which can be asked to print out a
49 compiled regex for debugging purposes. */
50
51
52 /* Macro that decides whether a character should be output as a literal or in
53 hexadecimal. We don't use isprint() because that can vary from system to system
54 (even without the use of locales) and we want the output always to be the same,
55 for testing purposes. This macro is used in pcretest as well as in this file. */
56
57 #ifdef EBCDIC
58 #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
59 #else
60 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
61 #endif
62
63 /* The table of operator names. */
64
65 static const char *OP_names[] = { OP_NAME_LIST };
66
67
68
69 /*************************************************
70 * Print single- or multi-byte character *
71 *************************************************/
72
73 static int
74 print_char(FILE *f, uschar *ptr, BOOL utf8)
75 {
76 int c = *ptr;
77
78 #ifndef SUPPORT_UTF8
79 utf8 = utf8; /* Avoid compiler warning */
80 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
81 return 0;
82
83 #else
84 if (!utf8 || (c & 0xc0) != 0xc0)
85 {
86 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
87 return 0;
88 }
89 else
90 {
91 int i;
92 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
93 int s = 6*a;
94 c = (c & _pcre_utf8_table3[a]) << s;
95 for (i = 1; i <= a; i++)
96 {
97 /* This is a check for malformed UTF-8; it should only occur if the sanity
98 check has been turned off. Rather than swallow random bytes, just stop if
99 we hit a bad one. Print it with \X instead of \x as an indication. */
100
101 if ((ptr[i] & 0xc0) != 0x80)
102 {
103 fprintf(f, "\\X{%x}", c);
104 return i - 1;
105 }
106
107 /* The byte is OK */
108
109 s -= 6;
110 c |= (ptr[i] & 0x3f) << s;
111 }
112 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
113 return a;
114 }
115 #endif
116 }
117
118
119
120 /*************************************************
121 * Find Unicode property name *
122 *************************************************/
123
124 static const char *
125 get_ucpname(int ptype, int pvalue)
126 {
127 #ifdef SUPPORT_UCP
128 int i;
129 for (i = _pcre_utt_size - 1; i >= 0; i--)
130 {
131 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
132 }
133 return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
134 #else
135 /* It gets harder and harder to shut off unwanted compiler warnings. */
136 ptype = ptype * pvalue;
137 return (ptype == pvalue)? "??" : "??";
138 #endif
139 }
140
141
142
143 /*************************************************
144 * Print compiled regex *
145 *************************************************/
146
147 /* Make this function work for a regex with integers either byte order.
148 However, we assume that what we are passed is a compiled regex. The
149 print_lengths flag controls whether offsets and lengths of items are printed.
150 They can be turned off from pcretest so that automatic tests on bytecode can be
151 written that do not depend on the value of LINK_SIZE. */
152
153 static void
154 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
155 {
156 real_pcre *re = (real_pcre *)external_re;
157 uschar *codestart, *code;
158 BOOL utf8;
159
160 unsigned int options = re->options;
161 int offset = re->name_table_offset;
162 int count = re->name_count;
163 int size = re->name_entry_size;
164
165 if (re->magic_number != MAGIC_NUMBER)
166 {
167 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
168 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
169 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
170 options = ((options << 24) & 0xff000000) |
171 ((options << 8) & 0x00ff0000) |
172 ((options >> 8) & 0x0000ff00) |
173 ((options >> 24) & 0x000000ff);
174 }
175
176 code = codestart = (uschar *)re + offset + count * size;
177 utf8 = (options & PCRE_UTF8) != 0;
178
179 for(;;)
180 {
181 uschar *ccode;
182 int c;
183 int extra = 0;
184
185 if (print_lengths)
186 fprintf(f, "%3d ", (int)(code - codestart));
187 else
188 fprintf(f, " ");
189
190 switch(*code)
191 {
192 case OP_END:
193 fprintf(f, " %s\n", OP_names[*code]);
194 fprintf(f, "------------------------------------------------------------------\n");
195 return;
196
197 case OP_OPT:
198 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
199 break;
200
201 case OP_CHAR:
202 fprintf(f, " ");
203 do
204 {
205 code++;
206 code += 1 + print_char(f, code, utf8);
207 }
208 while (*code == OP_CHAR);
209 fprintf(f, "\n");
210 continue;
211
212 case OP_CHARNC:
213 fprintf(f, " NC ");
214 do
215 {
216 code++;
217 code += 1 + print_char(f, code, utf8);
218 }
219 while (*code == OP_CHARNC);
220 fprintf(f, "\n");
221 continue;
222
223 case OP_CBRA:
224 case OP_SCBRA:
225 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
226 else fprintf(f, " ");
227 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
228 break;
229
230 case OP_BRA:
231 case OP_SBRA:
232 case OP_KETRMAX:
233 case OP_KETRMIN:
234 case OP_ALT:
235 case OP_KET:
236 case OP_ASSERT:
237 case OP_ASSERT_NOT:
238 case OP_ASSERTBACK:
239 case OP_ASSERTBACK_NOT:
240 case OP_ONCE:
241 case OP_COND:
242 case OP_SCOND:
243 case OP_REVERSE:
244 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
245 else fprintf(f, " ");
246 fprintf(f, "%s", OP_names[*code]);
247 break;
248
249 case OP_CLOSE:
250 fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
251 break;
252
253 case OP_CREF:
254 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
255 break;
256
257 case OP_RREF:
258 c = GET2(code, 1);
259 if (c == RREF_ANY)
260 fprintf(f, " Cond recurse any");
261 else
262 fprintf(f, " Cond recurse %d", c);
263 break;
264
265 case OP_DEF:
266 fprintf(f, " Cond def");
267 break;
268
269 case OP_STAR:
270 case OP_MINSTAR:
271 case OP_POSSTAR:
272 case OP_PLUS:
273 case OP_MINPLUS:
274 case OP_POSPLUS:
275 case OP_QUERY:
276 case OP_MINQUERY:
277 case OP_POSQUERY:
278 case OP_TYPESTAR:
279 case OP_TYPEMINSTAR:
280 case OP_TYPEPOSSTAR:
281 case OP_TYPEPLUS:
282 case OP_TYPEMINPLUS:
283 case OP_TYPEPOSPLUS:
284 case OP_TYPEQUERY:
285 case OP_TYPEMINQUERY:
286 case OP_TYPEPOSQUERY:
287 fprintf(f, " ");
288 if (*code >= OP_TYPESTAR)
289 {
290 fprintf(f, "%s", OP_names[code[1]]);
291 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
292 {
293 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
294 extra = 2;
295 }
296 }
297 else extra = print_char(f, code+1, utf8);
298 fprintf(f, "%s", OP_names[*code]);
299 break;
300
301 case OP_EXACT:
302 case OP_UPTO:
303 case OP_MINUPTO:
304 case OP_POSUPTO:
305 fprintf(f, " ");
306 extra = print_char(f, code+3, utf8);
307 fprintf(f, "{");
308 if (*code != OP_EXACT) fprintf(f, "0,");
309 fprintf(f, "%d}", GET2(code,1));
310 if (*code == OP_MINUPTO) fprintf(f, "?");
311 else if (*code == OP_POSUPTO) fprintf(f, "+");
312 break;
313
314 case OP_TYPEEXACT:
315 case OP_TYPEUPTO:
316 case OP_TYPEMINUPTO:
317 case OP_TYPEPOSUPTO:
318 fprintf(f, " %s", OP_names[code[3]]);
319 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
320 {
321 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
322 extra = 2;
323 }
324 fprintf(f, "{");
325 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
326 fprintf(f, "%d}", GET2(code,1));
327 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
328 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
329 break;
330
331 case OP_NOT:
332 c = code[1];
333 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
334 else fprintf(f, " [^\\x%02x]", c);
335 break;
336
337 case OP_NOTSTAR:
338 case OP_NOTMINSTAR:
339 case OP_NOTPOSSTAR:
340 case OP_NOTPLUS:
341 case OP_NOTMINPLUS:
342 case OP_NOTPOSPLUS:
343 case OP_NOTQUERY:
344 case OP_NOTMINQUERY:
345 case OP_NOTPOSQUERY:
346 c = code[1];
347 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
348 else fprintf(f, " [^\\x%02x]", c);
349 fprintf(f, "%s", OP_names[*code]);
350 break;
351
352 case OP_NOTEXACT:
353 case OP_NOTUPTO:
354 case OP_NOTMINUPTO:
355 case OP_NOTPOSUPTO:
356 c = code[3];
357 if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
358 else fprintf(f, " [^\\x%02x]{", c);
359 if (*code != OP_NOTEXACT) fprintf(f, "0,");
360 fprintf(f, "%d}", GET2(code,1));
361 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
362 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
363 break;
364
365 case OP_RECURSE:
366 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
367 else fprintf(f, " ");
368 fprintf(f, "%s", OP_names[*code]);
369 break;
370
371 case OP_REF:
372 fprintf(f, " \\%d", GET2(code,1));
373 ccode = code + _pcre_OP_lengths[*code];
374 goto CLASS_REF_REPEAT;
375
376 case OP_CALLOUT:
377 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
378 GET(code, 2 + LINK_SIZE));
379 break;
380
381 case OP_PROP:
382 case OP_NOTPROP:
383 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
384 break;
385
386 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
387 having this code always here, and it makes it less messy without all those
388 #ifdefs. */
389
390 case OP_CLASS:
391 case OP_NCLASS:
392 case OP_XCLASS:
393 {
394 int i, min, max;
395 BOOL printmap;
396
397 fprintf(f, " [");
398
399 if (*code == OP_XCLASS)
400 {
401 extra = GET(code, 1);
402 ccode = code + LINK_SIZE + 1;
403 printmap = (*ccode & XCL_MAP) != 0;
404 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
405 }
406 else
407 {
408 printmap = TRUE;
409 ccode = code + 1;
410 }
411
412 /* Print a bit map */
413
414 if (printmap)
415 {
416 for (i = 0; i < 256; i++)
417 {
418 if ((ccode[i/8] & (1 << (i&7))) != 0)
419 {
420 int j;
421 for (j = i+1; j < 256; j++)
422 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
423 if (i == '-' || i == ']') fprintf(f, "\\");
424 if (PRINTABLE(i)) fprintf(f, "%c", i);
425 else fprintf(f, "\\x%02x", i);
426 if (--j > i)
427 {
428 if (j != i + 1) fprintf(f, "-");
429 if (j == '-' || j == ']') fprintf(f, "\\");
430 if (PRINTABLE(j)) fprintf(f, "%c", j);
431 else fprintf(f, "\\x%02x", j);
432 }
433 i = j;
434 }
435 }
436 ccode += 32;
437 }
438
439 /* For an XCLASS there is always some additional data */
440
441 if (*code == OP_XCLASS)
442 {
443 int ch;
444 while ((ch = *ccode++) != XCL_END)
445 {
446 if (ch == XCL_PROP)
447 {
448 int ptype = *ccode++;
449 int pvalue = *ccode++;
450 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
451 }
452 else if (ch == XCL_NOTPROP)
453 {
454 int ptype = *ccode++;
455 int pvalue = *ccode++;
456 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
457 }
458 else
459 {
460 ccode += 1 + print_char(f, ccode, TRUE);
461 if (ch == XCL_RANGE)
462 {
463 fprintf(f, "-");
464 ccode += 1 + print_char(f, ccode, TRUE);
465 }
466 }
467 }
468 }
469
470 /* Indicate a non-UTF8 class which was created by negation */
471
472 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
473
474 /* Handle repeats after a class or a back reference */
475
476 CLASS_REF_REPEAT:
477 switch(*ccode)
478 {
479 case OP_CRSTAR:
480 case OP_CRMINSTAR:
481 case OP_CRPLUS:
482 case OP_CRMINPLUS:
483 case OP_CRQUERY:
484 case OP_CRMINQUERY:
485 fprintf(f, "%s", OP_names[*ccode]);
486 extra += _pcre_OP_lengths[*ccode];
487 break;
488
489 case OP_CRRANGE:
490 case OP_CRMINRANGE:
491 min = GET2(ccode,1);
492 max = GET2(ccode,3);
493 if (max == 0) fprintf(f, "{%d,}", min);
494 else fprintf(f, "{%d,%d}", min, max);
495 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
496 extra += _pcre_OP_lengths[*ccode];
497 break;
498
499 /* Do nothing if it's not a repeat; this code stops picky compilers
500 warning about the lack of a default code path. */
501
502 default:
503 break;
504 }
505 }
506 break;
507
508 /* Anything else is just an item with no data*/
509
510 default:
511 fprintf(f, " %s", OP_names[*code]);
512 break;
513 }
514
515 code += _pcre_OP_lengths[*code] + extra;
516 fprintf(f, "\n");
517 }
518 }
519
520 /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12