/[pcre]/code/tags/pcre-7.9/pcre_printint.src
ViewVC logotype

Contents of /code/tags/pcre-7.9/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 417 - (hide annotations) (download) (as text)
Sat Apr 11 16:44:43 2009 UTC (5 years, 5 months ago) by ph10
File MIME type: application/x-wais-source
File size: 14464 byte(s)
Tag release 7.9.

1 nigel 85 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 391 Copyright (c) 1997-2009 University of Cambridge
10 nigel 85
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains a PCRE private debugging function for printing out the
42     internal form of a compiled regular expression, along with some supporting
43     local functions. This source file is used in two places:
44    
45     (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46     (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47    
48     (2) It is always #included by pcretest.c, which can be asked to print out a
49     compiled regex for debugging purposes. */
50    
51    
52 nigel 93 /* Macro that decides whether a character should be output as a literal or in
53     hexadecimal. We don't use isprint() because that can vary from system to system
54     (even without the use of locales) and we want the output always to be the same,
55     for testing purposes. This macro is used in pcretest as well as in this file. */
56    
57 ph10 391 #ifdef EBCDIC
58     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
59     #else
60 nigel 93 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
61 ph10 391 #endif
62 nigel 93
63     /* The table of operator names. */
64    
65 nigel 85 static const char *OP_names[] = { OP_NAME_LIST };
66    
67    
68 nigel 93
69 nigel 85 /*************************************************
70     * Print single- or multi-byte character *
71     *************************************************/
72    
73     static int
74     print_char(FILE *f, uschar *ptr, BOOL utf8)
75     {
76     int c = *ptr;
77    
78 ph10 107 #ifndef SUPPORT_UTF8
79     utf8 = utf8; /* Avoid compiler warning */
80     if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
81     return 0;
82    
83     #else
84 nigel 85 if (!utf8 || (c & 0xc0) != 0xc0)
85     {
86 nigel 93 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
87 nigel 85 return 0;
88     }
89     else
90     {
91     int i;
92     int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
93     int s = 6*a;
94     c = (c & _pcre_utf8_table3[a]) << s;
95     for (i = 1; i <= a; i++)
96     {
97     /* This is a check for malformed UTF-8; it should only occur if the sanity
98     check has been turned off. Rather than swallow random bytes, just stop if
99     we hit a bad one. Print it with \X instead of \x as an indication. */
100    
101     if ((ptr[i] & 0xc0) != 0x80)
102     {
103     fprintf(f, "\\X{%x}", c);
104     return i - 1;
105     }
106    
107     /* The byte is OK */
108    
109     s -= 6;
110     c |= (ptr[i] & 0x3f) << s;
111     }
112     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
113     return a;
114     }
115 ph10 111 #endif
116 nigel 85 }
117    
118    
119    
120     /*************************************************
121     * Find Unicode property name *
122     *************************************************/
123    
124     static const char *
125 nigel 87 get_ucpname(int ptype, int pvalue)
126 nigel 85 {
127     #ifdef SUPPORT_UCP
128     int i;
129 ph10 217 for (i = _pcre_utt_size - 1; i >= 0; i--)
130 nigel 85 {
131 nigel 87 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
132 nigel 85 }
133 ph10 240 return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
134 nigel 85 #else
135 nigel 91 /* It gets harder and harder to shut off unwanted compiler warnings. */
136     ptype = ptype * pvalue;
137     return (ptype == pvalue)? "??" : "??";
138 nigel 85 #endif
139     }
140    
141    
142    
143     /*************************************************
144     * Print compiled regex *
145     *************************************************/
146    
147     /* Make this function work for a regex with integers either byte order.
148 ph10 116 However, we assume that what we are passed is a compiled regex. The
149 ph10 123 print_lengths flag controls whether offsets and lengths of items are printed.
150 ph10 116 They can be turned off from pcretest so that automatic tests on bytecode can be
151     written that do not depend on the value of LINK_SIZE. */
152 nigel 85
153     static void
154 ph10 116 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
155 nigel 85 {
156     real_pcre *re = (real_pcre *)external_re;
157     uschar *codestart, *code;
158     BOOL utf8;
159    
160     unsigned int options = re->options;
161     int offset = re->name_table_offset;
162     int count = re->name_count;
163     int size = re->name_entry_size;
164    
165     if (re->magic_number != MAGIC_NUMBER)
166     {
167     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
168     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
169     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
170     options = ((options << 24) & 0xff000000) |
171     ((options << 8) & 0x00ff0000) |
172     ((options >> 8) & 0x0000ff00) |
173     ((options >> 24) & 0x000000ff);
174     }
175    
176     code = codestart = (uschar *)re + offset + count * size;
177     utf8 = (options & PCRE_UTF8) != 0;
178    
179     for(;;)
180     {
181     uschar *ccode;
182     int c;
183     int extra = 0;
184    
185 ph10 116 if (print_lengths)
186     fprintf(f, "%3d ", (int)(code - codestart));
187     else
188 ph10 123 fprintf(f, " ");
189 nigel 85
190     switch(*code)
191     {
192     case OP_END:
193     fprintf(f, " %s\n", OP_names[*code]);
194     fprintf(f, "------------------------------------------------------------------\n");
195     return;
196    
197     case OP_OPT:
198     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
199     break;
200    
201     case OP_CHAR:
202 nigel 91 fprintf(f, " ");
203     do
204 nigel 85 {
205 nigel 91 code++;
206     code += 1 + print_char(f, code, utf8);
207 nigel 85 }
208 nigel 91 while (*code == OP_CHAR);
209     fprintf(f, "\n");
210     continue;
211 nigel 85
212     case OP_CHARNC:
213 nigel 91 fprintf(f, " NC ");
214     do
215 nigel 85 {
216 nigel 91 code++;
217     code += 1 + print_char(f, code, utf8);
218 nigel 85 }
219 nigel 91 while (*code == OP_CHARNC);
220     fprintf(f, "\n");
221     continue;
222 nigel 85
223 nigel 93 case OP_CBRA:
224     case OP_SCBRA:
225 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
226 ph10 123 else fprintf(f, " ");
227 ph10 116 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
228 nigel 93 break;
229    
230     case OP_BRA:
231     case OP_SBRA:
232 nigel 85 case OP_KETRMAX:
233     case OP_KETRMIN:
234     case OP_ALT:
235     case OP_KET:
236     case OP_ASSERT:
237     case OP_ASSERT_NOT:
238     case OP_ASSERTBACK:
239     case OP_ASSERTBACK_NOT:
240     case OP_ONCE:
241     case OP_COND:
242 nigel 93 case OP_SCOND:
243 nigel 85 case OP_REVERSE:
244 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
245 ph10 123 else fprintf(f, " ");
246 ph10 116 fprintf(f, "%s", OP_names[*code]);
247 nigel 85 break;
248    
249 nigel 93 case OP_CREF:
250     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
251 nigel 85 break;
252    
253 nigel 93 case OP_RREF:
254     c = GET2(code, 1);
255     if (c == RREF_ANY)
256     fprintf(f, " Cond recurse any");
257 nigel 85 else
258 nigel 93 fprintf(f, " Cond recurse %d", c);
259 nigel 85 break;
260    
261 nigel 93 case OP_DEF:
262     fprintf(f, " Cond def");
263     break;
264    
265 nigel 85 case OP_STAR:
266     case OP_MINSTAR:
267 nigel 93 case OP_POSSTAR:
268 nigel 85 case OP_PLUS:
269     case OP_MINPLUS:
270 nigel 93 case OP_POSPLUS:
271 nigel 85 case OP_QUERY:
272     case OP_MINQUERY:
273 nigel 93 case OP_POSQUERY:
274 nigel 85 case OP_TYPESTAR:
275     case OP_TYPEMINSTAR:
276 nigel 93 case OP_TYPEPOSSTAR:
277 nigel 85 case OP_TYPEPLUS:
278     case OP_TYPEMINPLUS:
279 nigel 93 case OP_TYPEPOSPLUS:
280 nigel 85 case OP_TYPEQUERY:
281     case OP_TYPEMINQUERY:
282 nigel 93 case OP_TYPEPOSQUERY:
283 nigel 85 fprintf(f, " ");
284     if (*code >= OP_TYPESTAR)
285     {
286     fprintf(f, "%s", OP_names[code[1]]);
287     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
288     {
289 nigel 87 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
290     extra = 2;
291 nigel 85 }
292     }
293     else extra = print_char(f, code+1, utf8);
294     fprintf(f, "%s", OP_names[*code]);
295     break;
296    
297     case OP_EXACT:
298     case OP_UPTO:
299     case OP_MINUPTO:
300 nigel 93 case OP_POSUPTO:
301 nigel 85 fprintf(f, " ");
302     extra = print_char(f, code+3, utf8);
303     fprintf(f, "{");
304 nigel 93 if (*code != OP_EXACT) fprintf(f, "0,");
305 nigel 85 fprintf(f, "%d}", GET2(code,1));
306     if (*code == OP_MINUPTO) fprintf(f, "?");
307 nigel 93 else if (*code == OP_POSUPTO) fprintf(f, "+");
308 nigel 85 break;
309    
310     case OP_TYPEEXACT:
311     case OP_TYPEUPTO:
312     case OP_TYPEMINUPTO:
313 nigel 93 case OP_TYPEPOSUPTO:
314 nigel 85 fprintf(f, " %s", OP_names[code[3]]);
315     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
316     {
317 nigel 87 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
318     extra = 2;
319 nigel 85 }
320     fprintf(f, "{");
321     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
322     fprintf(f, "%d}", GET2(code,1));
323     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
324 nigel 93 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
325 nigel 85 break;
326    
327     case OP_NOT:
328 nigel 93 c = code[1];
329     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
330 nigel 85 else fprintf(f, " [^\\x%02x]", c);
331     break;
332    
333     case OP_NOTSTAR:
334     case OP_NOTMINSTAR:
335 nigel 93 case OP_NOTPOSSTAR:
336 nigel 85 case OP_NOTPLUS:
337     case OP_NOTMINPLUS:
338 nigel 93 case OP_NOTPOSPLUS:
339 nigel 85 case OP_NOTQUERY:
340     case OP_NOTMINQUERY:
341 nigel 93 case OP_NOTPOSQUERY:
342     c = code[1];
343     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
344 nigel 85 else fprintf(f, " [^\\x%02x]", c);
345     fprintf(f, "%s", OP_names[*code]);
346     break;
347    
348     case OP_NOTEXACT:
349     case OP_NOTUPTO:
350     case OP_NOTMINUPTO:
351 nigel 93 case OP_NOTPOSUPTO:
352     c = code[3];
353     if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
354 nigel 85 else fprintf(f, " [^\\x%02x]{", c);
355     if (*code != OP_NOTEXACT) fprintf(f, "0,");
356     fprintf(f, "%d}", GET2(code,1));
357     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
358 nigel 93 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
359 nigel 85 break;
360    
361     case OP_RECURSE:
362 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
363 ph10 123 else fprintf(f, " ");
364 ph10 116 fprintf(f, "%s", OP_names[*code]);
365 nigel 85 break;
366    
367     case OP_REF:
368     fprintf(f, " \\%d", GET2(code,1));
369     ccode = code + _pcre_OP_lengths[*code];
370     goto CLASS_REF_REPEAT;
371    
372     case OP_CALLOUT:
373     fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
374     GET(code, 2 + LINK_SIZE));
375     break;
376    
377     case OP_PROP:
378     case OP_NOTPROP:
379 nigel 87 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
380 nigel 85 break;
381    
382     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
383     having this code always here, and it makes it less messy without all those
384     #ifdefs. */
385    
386     case OP_CLASS:
387     case OP_NCLASS:
388     case OP_XCLASS:
389     {
390     int i, min, max;
391     BOOL printmap;
392    
393     fprintf(f, " [");
394    
395     if (*code == OP_XCLASS)
396     {
397     extra = GET(code, 1);
398     ccode = code + LINK_SIZE + 1;
399     printmap = (*ccode & XCL_MAP) != 0;
400     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
401     }
402     else
403     {
404     printmap = TRUE;
405     ccode = code + 1;
406     }
407    
408     /* Print a bit map */
409    
410     if (printmap)
411     {
412     for (i = 0; i < 256; i++)
413     {
414     if ((ccode[i/8] & (1 << (i&7))) != 0)
415     {
416     int j;
417     for (j = i+1; j < 256; j++)
418     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
419     if (i == '-' || i == ']') fprintf(f, "\\");
420 nigel 93 if (PRINTABLE(i)) fprintf(f, "%c", i);
421     else fprintf(f, "\\x%02x", i);
422 nigel 85 if (--j > i)
423     {
424     if (j != i + 1) fprintf(f, "-");
425     if (j == '-' || j == ']') fprintf(f, "\\");
426 nigel 93 if (PRINTABLE(j)) fprintf(f, "%c", j);
427     else fprintf(f, "\\x%02x", j);
428 nigel 85 }
429     i = j;
430     }
431     }
432     ccode += 32;
433     }
434    
435     /* For an XCLASS there is always some additional data */
436    
437     if (*code == OP_XCLASS)
438     {
439     int ch;
440     while ((ch = *ccode++) != XCL_END)
441     {
442     if (ch == XCL_PROP)
443     {
444 nigel 87 int ptype = *ccode++;
445     int pvalue = *ccode++;
446     fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
447 nigel 85 }
448     else if (ch == XCL_NOTPROP)
449     {
450 nigel 87 int ptype = *ccode++;
451     int pvalue = *ccode++;
452     fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
453 nigel 85 }
454     else
455     {
456     ccode += 1 + print_char(f, ccode, TRUE);
457     if (ch == XCL_RANGE)
458     {
459     fprintf(f, "-");
460     ccode += 1 + print_char(f, ccode, TRUE);
461     }
462     }
463     }
464     }
465    
466     /* Indicate a non-UTF8 class which was created by negation */
467    
468     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
469    
470     /* Handle repeats after a class or a back reference */
471    
472     CLASS_REF_REPEAT:
473     switch(*ccode)
474     {
475     case OP_CRSTAR:
476     case OP_CRMINSTAR:
477     case OP_CRPLUS:
478     case OP_CRMINPLUS:
479     case OP_CRQUERY:
480     case OP_CRMINQUERY:
481     fprintf(f, "%s", OP_names[*ccode]);
482     extra += _pcre_OP_lengths[*ccode];
483     break;
484    
485     case OP_CRRANGE:
486     case OP_CRMINRANGE:
487     min = GET2(ccode,1);
488     max = GET2(ccode,3);
489     if (max == 0) fprintf(f, "{%d,}", min);
490     else fprintf(f, "{%d,%d}", min, max);
491     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
492     extra += _pcre_OP_lengths[*ccode];
493     break;
494 nigel 87
495     /* Do nothing if it's not a repeat; this code stops picky compilers
496     warning about the lack of a default code path. */
497    
498     default:
499     break;
500 nigel 85 }
501     }
502     break;
503    
504     /* Anything else is just an item with no data*/
505    
506     default:
507     fprintf(f, " %s", OP_names[*code]);
508     break;
509     }
510    
511     code += _pcre_OP_lengths[*code] + extra;
512     fprintf(f, "\n");
513     }
514     }
515    
516     /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12