/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 459 - (hide annotations) (download) (as text)
Sun Oct 4 09:21:39 2009 UTC (3 years, 7 months ago) by ph10
File MIME type: application/x-wais-source
File size: 14757 byte(s)
Fix problems with conditional references to duplicate named subpatterns.

1 nigel 85 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 391 Copyright (c) 1997-2009 University of Cambridge
10 nigel 85
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains a PCRE private debugging function for printing out the
42     internal form of a compiled regular expression, along with some supporting
43     local functions. This source file is used in two places:
44    
45     (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46     (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47    
48     (2) It is always #included by pcretest.c, which can be asked to print out a
49     compiled regex for debugging purposes. */
50    
51    
52 nigel 93 /* Macro that decides whether a character should be output as a literal or in
53     hexadecimal. We don't use isprint() because that can vary from system to system
54     (even without the use of locales) and we want the output always to be the same,
55     for testing purposes. This macro is used in pcretest as well as in this file. */
56    
57 ph10 391 #ifdef EBCDIC
58     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
59     #else
60 nigel 93 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
61 ph10 391 #endif
62 nigel 93
63     /* The table of operator names. */
64    
65 nigel 85 static const char *OP_names[] = { OP_NAME_LIST };
66    
67    
68 nigel 93
69 nigel 85 /*************************************************
70     * Print single- or multi-byte character *
71     *************************************************/
72    
73     static int
74     print_char(FILE *f, uschar *ptr, BOOL utf8)
75     {
76     int c = *ptr;
77    
78 ph10 107 #ifndef SUPPORT_UTF8
79     utf8 = utf8; /* Avoid compiler warning */
80     if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
81     return 0;
82    
83     #else
84 nigel 85 if (!utf8 || (c & 0xc0) != 0xc0)
85     {
86 nigel 93 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
87 nigel 85 return 0;
88     }
89     else
90     {
91     int i;
92     int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
93     int s = 6*a;
94     c = (c & _pcre_utf8_table3[a]) << s;
95     for (i = 1; i <= a; i++)
96     {
97     /* This is a check for malformed UTF-8; it should only occur if the sanity
98     check has been turned off. Rather than swallow random bytes, just stop if
99     we hit a bad one. Print it with \X instead of \x as an indication. */
100    
101     if ((ptr[i] & 0xc0) != 0x80)
102     {
103     fprintf(f, "\\X{%x}", c);
104     return i - 1;
105     }
106    
107     /* The byte is OK */
108    
109     s -= 6;
110     c |= (ptr[i] & 0x3f) << s;
111     }
112     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
113     return a;
114     }
115 ph10 111 #endif
116 nigel 85 }
117    
118    
119    
120     /*************************************************
121     * Find Unicode property name *
122     *************************************************/
123    
124     static const char *
125 nigel 87 get_ucpname(int ptype, int pvalue)
126 nigel 85 {
127     #ifdef SUPPORT_UCP
128     int i;
129 ph10 217 for (i = _pcre_utt_size - 1; i >= 0; i--)
130 nigel 85 {
131 nigel 87 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
132 nigel 85 }
133 ph10 240 return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
134 nigel 85 #else
135 nigel 91 /* It gets harder and harder to shut off unwanted compiler warnings. */
136     ptype = ptype * pvalue;
137     return (ptype == pvalue)? "??" : "??";
138 nigel 85 #endif
139     }
140    
141    
142    
143     /*************************************************
144     * Print compiled regex *
145     *************************************************/
146    
147     /* Make this function work for a regex with integers either byte order.
148 ph10 116 However, we assume that what we are passed is a compiled regex. The
149 ph10 123 print_lengths flag controls whether offsets and lengths of items are printed.
150 ph10 116 They can be turned off from pcretest so that automatic tests on bytecode can be
151     written that do not depend on the value of LINK_SIZE. */
152 nigel 85
153     static void
154 ph10 116 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
155 nigel 85 {
156     real_pcre *re = (real_pcre *)external_re;
157     uschar *codestart, *code;
158     BOOL utf8;
159    
160     unsigned int options = re->options;
161     int offset = re->name_table_offset;
162     int count = re->name_count;
163     int size = re->name_entry_size;
164    
165     if (re->magic_number != MAGIC_NUMBER)
166     {
167     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
168     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
169     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
170     options = ((options << 24) & 0xff000000) |
171     ((options << 8) & 0x00ff0000) |
172     ((options >> 8) & 0x0000ff00) |
173     ((options >> 24) & 0x000000ff);
174     }
175    
176     code = codestart = (uschar *)re + offset + count * size;
177     utf8 = (options & PCRE_UTF8) != 0;
178    
179     for(;;)
180     {
181     uschar *ccode;
182     int c;
183     int extra = 0;
184    
185 ph10 116 if (print_lengths)
186     fprintf(f, "%3d ", (int)(code - codestart));
187     else
188 ph10 123 fprintf(f, " ");
189 nigel 85
190     switch(*code)
191     {
192     case OP_END:
193     fprintf(f, " %s\n", OP_names[*code]);
194     fprintf(f, "------------------------------------------------------------------\n");
195     return;
196    
197     case OP_OPT:
198     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
199     break;
200    
201     case OP_CHAR:
202 nigel 91 fprintf(f, " ");
203     do
204 nigel 85 {
205 nigel 91 code++;
206     code += 1 + print_char(f, code, utf8);
207 nigel 85 }
208 nigel 91 while (*code == OP_CHAR);
209     fprintf(f, "\n");
210     continue;
211 nigel 85
212     case OP_CHARNC:
213 nigel 91 fprintf(f, " NC ");
214     do
215 nigel 85 {
216 nigel 91 code++;
217     code += 1 + print_char(f, code, utf8);
218 nigel 85 }
219 nigel 91 while (*code == OP_CHARNC);
220     fprintf(f, "\n");
221     continue;
222 nigel 85
223 nigel 93 case OP_CBRA:
224     case OP_SCBRA:
225 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
226 ph10 123 else fprintf(f, " ");
227 ph10 116 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
228 nigel 93 break;
229    
230     case OP_BRA:
231     case OP_SBRA:
232 nigel 85 case OP_KETRMAX:
233     case OP_KETRMIN:
234     case OP_ALT:
235     case OP_KET:
236     case OP_ASSERT:
237     case OP_ASSERT_NOT:
238     case OP_ASSERTBACK:
239     case OP_ASSERTBACK_NOT:
240     case OP_ONCE:
241     case OP_COND:
242 nigel 93 case OP_SCOND:
243 nigel 85 case OP_REVERSE:
244 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
245 ph10 123 else fprintf(f, " ");
246 ph10 116 fprintf(f, "%s", OP_names[*code]);
247 nigel 85 break;
248 ph10 447
249     case OP_CLOSE:
250     fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
251     break;
252 nigel 85
253 nigel 93 case OP_CREF:
254 ph10 459 case OP_NCREF:
255 nigel 93 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
256 nigel 85 break;
257    
258 nigel 93 case OP_RREF:
259     c = GET2(code, 1);
260     if (c == RREF_ANY)
261     fprintf(f, " Cond recurse any");
262 nigel 85 else
263 nigel 93 fprintf(f, " Cond recurse %d", c);
264 nigel 85 break;
265    
266 ph10 459 case OP_NRREF:
267     c = GET2(code, 1);
268     if (c == RREF_ANY)
269     fprintf(f, " Cond nrecurse any");
270     else
271     fprintf(f, " Cond nrecurse %d", c);
272     break;
273    
274 nigel 93 case OP_DEF:
275     fprintf(f, " Cond def");
276     break;
277    
278 nigel 85 case OP_STAR:
279     case OP_MINSTAR:
280 nigel 93 case OP_POSSTAR:
281 nigel 85 case OP_PLUS:
282     case OP_MINPLUS:
283 nigel 93 case OP_POSPLUS:
284 nigel 85 case OP_QUERY:
285     case OP_MINQUERY:
286 nigel 93 case OP_POSQUERY:
287 nigel 85 case OP_TYPESTAR:
288     case OP_TYPEMINSTAR:
289 nigel 93 case OP_TYPEPOSSTAR:
290 nigel 85 case OP_TYPEPLUS:
291     case OP_TYPEMINPLUS:
292 nigel 93 case OP_TYPEPOSPLUS:
293 nigel 85 case OP_TYPEQUERY:
294     case OP_TYPEMINQUERY:
295 nigel 93 case OP_TYPEPOSQUERY:
296 nigel 85 fprintf(f, " ");
297     if (*code >= OP_TYPESTAR)
298     {
299     fprintf(f, "%s", OP_names[code[1]]);
300     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
301     {
302 nigel 87 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
303     extra = 2;
304 nigel 85 }
305     }
306     else extra = print_char(f, code+1, utf8);
307     fprintf(f, "%s", OP_names[*code]);
308     break;
309    
310     case OP_EXACT:
311     case OP_UPTO:
312     case OP_MINUPTO:
313 nigel 93 case OP_POSUPTO:
314 nigel 85 fprintf(f, " ");
315     extra = print_char(f, code+3, utf8);
316     fprintf(f, "{");
317 nigel 93 if (*code != OP_EXACT) fprintf(f, "0,");
318 nigel 85 fprintf(f, "%d}", GET2(code,1));
319     if (*code == OP_MINUPTO) fprintf(f, "?");
320 nigel 93 else if (*code == OP_POSUPTO) fprintf(f, "+");
321 nigel 85 break;
322    
323     case OP_TYPEEXACT:
324     case OP_TYPEUPTO:
325     case OP_TYPEMINUPTO:
326 nigel 93 case OP_TYPEPOSUPTO:
327 nigel 85 fprintf(f, " %s", OP_names[code[3]]);
328     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
329     {
330 nigel 87 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
331     extra = 2;
332 nigel 85 }
333     fprintf(f, "{");
334     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
335     fprintf(f, "%d}", GET2(code,1));
336     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
337 nigel 93 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
338 nigel 85 break;
339    
340     case OP_NOT:
341 nigel 93 c = code[1];
342     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
343 nigel 85 else fprintf(f, " [^\\x%02x]", c);
344     break;
345    
346     case OP_NOTSTAR:
347     case OP_NOTMINSTAR:
348 nigel 93 case OP_NOTPOSSTAR:
349 nigel 85 case OP_NOTPLUS:
350     case OP_NOTMINPLUS:
351 nigel 93 case OP_NOTPOSPLUS:
352 nigel 85 case OP_NOTQUERY:
353     case OP_NOTMINQUERY:
354 nigel 93 case OP_NOTPOSQUERY:
355     c = code[1];
356     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
357 nigel 85 else fprintf(f, " [^\\x%02x]", c);
358     fprintf(f, "%s", OP_names[*code]);
359     break;
360    
361     case OP_NOTEXACT:
362     case OP_NOTUPTO:
363     case OP_NOTMINUPTO:
364 nigel 93 case OP_NOTPOSUPTO:
365     c = code[3];
366     if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
367 nigel 85 else fprintf(f, " [^\\x%02x]{", c);
368     if (*code != OP_NOTEXACT) fprintf(f, "0,");
369     fprintf(f, "%d}", GET2(code,1));
370     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
371 nigel 93 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
372 nigel 85 break;
373    
374     case OP_RECURSE:
375 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
376 ph10 123 else fprintf(f, " ");
377 ph10 116 fprintf(f, "%s", OP_names[*code]);
378 nigel 85 break;
379    
380     case OP_REF:
381     fprintf(f, " \\%d", GET2(code,1));
382     ccode = code + _pcre_OP_lengths[*code];
383     goto CLASS_REF_REPEAT;
384    
385     case OP_CALLOUT:
386     fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
387     GET(code, 2 + LINK_SIZE));
388     break;
389    
390     case OP_PROP:
391     case OP_NOTPROP:
392 nigel 87 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
393 nigel 85 break;
394    
395     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
396     having this code always here, and it makes it less messy without all those
397     #ifdefs. */
398    
399     case OP_CLASS:
400     case OP_NCLASS:
401     case OP_XCLASS:
402     {
403     int i, min, max;
404     BOOL printmap;
405    
406     fprintf(f, " [");
407    
408     if (*code == OP_XCLASS)
409     {
410     extra = GET(code, 1);
411     ccode = code + LINK_SIZE + 1;
412     printmap = (*ccode & XCL_MAP) != 0;
413     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
414     }
415     else
416     {
417     printmap = TRUE;
418     ccode = code + 1;
419     }
420    
421     /* Print a bit map */
422    
423     if (printmap)
424     {
425     for (i = 0; i < 256; i++)
426     {
427     if ((ccode[i/8] & (1 << (i&7))) != 0)
428     {
429     int j;
430     for (j = i+1; j < 256; j++)
431     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
432     if (i == '-' || i == ']') fprintf(f, "\\");
433 nigel 93 if (PRINTABLE(i)) fprintf(f, "%c", i);
434     else fprintf(f, "\\x%02x", i);
435 nigel 85 if (--j > i)
436     {
437     if (j != i + 1) fprintf(f, "-");
438     if (j == '-' || j == ']') fprintf(f, "\\");
439 nigel 93 if (PRINTABLE(j)) fprintf(f, "%c", j);
440     else fprintf(f, "\\x%02x", j);
441 nigel 85 }
442     i = j;
443     }
444     }
445     ccode += 32;
446     }
447    
448     /* For an XCLASS there is always some additional data */
449    
450     if (*code == OP_XCLASS)
451     {
452     int ch;
453     while ((ch = *ccode++) != XCL_END)
454     {
455     if (ch == XCL_PROP)
456     {
457 nigel 87 int ptype = *ccode++;
458     int pvalue = *ccode++;
459     fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
460 nigel 85 }
461     else if (ch == XCL_NOTPROP)
462     {
463 nigel 87 int ptype = *ccode++;
464     int pvalue = *ccode++;
465     fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
466 nigel 85 }
467     else
468     {
469     ccode += 1 + print_char(f, ccode, TRUE);
470     if (ch == XCL_RANGE)
471     {
472     fprintf(f, "-");
473     ccode += 1 + print_char(f, ccode, TRUE);
474     }
475     }
476     }
477     }
478    
479     /* Indicate a non-UTF8 class which was created by negation */
480    
481     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
482    
483     /* Handle repeats after a class or a back reference */
484    
485     CLASS_REF_REPEAT:
486     switch(*ccode)
487     {
488     case OP_CRSTAR:
489     case OP_CRMINSTAR:
490     case OP_CRPLUS:
491     case OP_CRMINPLUS:
492     case OP_CRQUERY:
493     case OP_CRMINQUERY:
494     fprintf(f, "%s", OP_names[*ccode]);
495     extra += _pcre_OP_lengths[*ccode];
496     break;
497    
498     case OP_CRRANGE:
499     case OP_CRMINRANGE:
500     min = GET2(ccode,1);
501     max = GET2(ccode,3);
502     if (max == 0) fprintf(f, "{%d,}", min);
503     else fprintf(f, "{%d,%d}", min, max);
504     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
505     extra += _pcre_OP_lengths[*ccode];
506     break;
507 nigel 87
508     /* Do nothing if it's not a repeat; this code stops picky compilers
509     warning about the lack of a default code path. */
510    
511     default:
512     break;
513 nigel 85 }
514     }
515     break;
516    
517     /* Anything else is just an item with no data*/
518    
519     default:
520     fprintf(f, " %s", OP_names[*code]);
521     break;
522     }
523    
524     code += _pcre_OP_lengths[*code] + extra;
525     fprintf(f, "\n");
526     }
527     }
528    
529     /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12