/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 85 - (hide annotations) (download) (as text)
Sat Feb 24 21:41:13 2007 UTC (7 years, 5 months ago) by nigel
File MIME type: application/x-wais-source
File size: 12388 byte(s)
Load pcre-6.4 into code/trunk.

1 nigel 85 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9     Copyright (c) 1997-2005 University of Cambridge
10    
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains a PCRE private debugging function for printing out the
42     internal form of a compiled regular expression, along with some supporting
43     local functions. This source file is used in two places:
44    
45     (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46     (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47    
48     (2) It is always #included by pcretest.c, which can be asked to print out a
49     compiled regex for debugging purposes. */
50    
51    
52     static const char *OP_names[] = { OP_NAME_LIST };
53    
54    
55     /*************************************************
56     * Print single- or multi-byte character *
57     *************************************************/
58    
59     static int
60     print_char(FILE *f, uschar *ptr, BOOL utf8)
61     {
62     int c = *ptr;
63    
64     if (!utf8 || (c & 0xc0) != 0xc0)
65     {
66     if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
67     return 0;
68     }
69     else
70     {
71     int i;
72     int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
73     int s = 6*a;
74     c = (c & _pcre_utf8_table3[a]) << s;
75     for (i = 1; i <= a; i++)
76     {
77     /* This is a check for malformed UTF-8; it should only occur if the sanity
78     check has been turned off. Rather than swallow random bytes, just stop if
79     we hit a bad one. Print it with \X instead of \x as an indication. */
80    
81     if ((ptr[i] & 0xc0) != 0x80)
82     {
83     fprintf(f, "\\X{%x}", c);
84     return i - 1;
85     }
86    
87     /* The byte is OK */
88    
89     s -= 6;
90     c |= (ptr[i] & 0x3f) << s;
91     }
92     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
93     return a;
94     }
95     }
96    
97    
98    
99     /*************************************************
100     * Find Unicode property name *
101     *************************************************/
102    
103     static const char *
104     get_ucpname(int property)
105     {
106     #ifdef SUPPORT_UCP
107     int i;
108     for (i = _pcre_utt_size; i >= 0; i--)
109     {
110     if (property == _pcre_utt[i].value) break;
111     }
112     return (i >= 0)? _pcre_utt[i].name : "??";
113     #else
114     return "??";
115     #endif
116     }
117    
118    
119    
120     /*************************************************
121     * Print compiled regex *
122     *************************************************/
123    
124     /* Make this function work for a regex with integers either byte order.
125     However, we assume that what we are passed is a compiled regex. */
126    
127     static void
128     pcre_printint(pcre *external_re, FILE *f)
129     {
130     real_pcre *re = (real_pcre *)external_re;
131     uschar *codestart, *code;
132     BOOL utf8;
133    
134     unsigned int options = re->options;
135     int offset = re->name_table_offset;
136     int count = re->name_count;
137     int size = re->name_entry_size;
138    
139     if (re->magic_number != MAGIC_NUMBER)
140     {
141     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
142     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
143     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
144     options = ((options << 24) & 0xff000000) |
145     ((options << 8) & 0x00ff0000) |
146     ((options >> 8) & 0x0000ff00) |
147     ((options >> 24) & 0x000000ff);
148     }
149    
150     code = codestart = (uschar *)re + offset + count * size;
151     utf8 = (options & PCRE_UTF8) != 0;
152    
153     for(;;)
154     {
155     uschar *ccode;
156     int c;
157     int extra = 0;
158    
159     fprintf(f, "%3d ", (int)(code - codestart));
160    
161     if (*code >= OP_BRA)
162     {
163     if (*code - OP_BRA > EXTRACT_BASIC_MAX)
164     fprintf(f, "%3d Bra extra\n", GET(code, 1));
165     else
166     fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
167     code += _pcre_OP_lengths[OP_BRA];
168     continue;
169     }
170    
171     switch(*code)
172     {
173     case OP_END:
174     fprintf(f, " %s\n", OP_names[*code]);
175     fprintf(f, "------------------------------------------------------------------\n");
176     return;
177    
178     case OP_OPT:
179     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
180     break;
181    
182     case OP_CHAR:
183     {
184     fprintf(f, " ");
185     do
186     {
187     code++;
188     code += 1 + print_char(f, code, utf8);
189     }
190     while (*code == OP_CHAR);
191     fprintf(f, "\n");
192     continue;
193     }
194     break;
195    
196     case OP_CHARNC:
197     {
198     fprintf(f, " NC ");
199     do
200     {
201     code++;
202     code += 1 + print_char(f, code, utf8);
203     }
204     while (*code == OP_CHARNC);
205     fprintf(f, "\n");
206     continue;
207     }
208     break;
209    
210     case OP_KETRMAX:
211     case OP_KETRMIN:
212     case OP_ALT:
213     case OP_KET:
214     case OP_ASSERT:
215     case OP_ASSERT_NOT:
216     case OP_ASSERTBACK:
217     case OP_ASSERTBACK_NOT:
218     case OP_ONCE:
219     case OP_COND:
220     case OP_REVERSE:
221     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
222     break;
223    
224     case OP_BRANUMBER:
225     printf("%3d %s", GET2(code, 1), OP_names[*code]);
226     break;
227    
228     case OP_CREF:
229     if (GET2(code, 1) == CREF_RECURSE)
230     fprintf(f, " Cond recurse");
231     else
232     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
233     break;
234    
235     case OP_STAR:
236     case OP_MINSTAR:
237     case OP_PLUS:
238     case OP_MINPLUS:
239     case OP_QUERY:
240     case OP_MINQUERY:
241     case OP_TYPESTAR:
242     case OP_TYPEMINSTAR:
243     case OP_TYPEPLUS:
244     case OP_TYPEMINPLUS:
245     case OP_TYPEQUERY:
246     case OP_TYPEMINQUERY:
247     fprintf(f, " ");
248     if (*code >= OP_TYPESTAR)
249     {
250     fprintf(f, "%s", OP_names[code[1]]);
251     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
252     {
253     fprintf(f, " %s ", get_ucpname(code[2]));
254     extra = 1;
255     }
256     }
257     else extra = print_char(f, code+1, utf8);
258     fprintf(f, "%s", OP_names[*code]);
259     break;
260    
261     case OP_EXACT:
262     case OP_UPTO:
263     case OP_MINUPTO:
264     fprintf(f, " ");
265     extra = print_char(f, code+3, utf8);
266     fprintf(f, "{");
267     if (*code != OP_EXACT) fprintf(f, ",");
268     fprintf(f, "%d}", GET2(code,1));
269     if (*code == OP_MINUPTO) fprintf(f, "?");
270     break;
271    
272     case OP_TYPEEXACT:
273     case OP_TYPEUPTO:
274     case OP_TYPEMINUPTO:
275     fprintf(f, " %s", OP_names[code[3]]);
276     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
277     {
278     fprintf(f, " %s ", get_ucpname(code[4]));
279     extra = 1;
280     }
281     fprintf(f, "{");
282     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
283     fprintf(f, "%d}", GET2(code,1));
284     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
285     break;
286    
287     case OP_NOT:
288     if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
289     else fprintf(f, " [^\\x%02x]", c);
290     break;
291    
292     case OP_NOTSTAR:
293     case OP_NOTMINSTAR:
294     case OP_NOTPLUS:
295     case OP_NOTMINPLUS:
296     case OP_NOTQUERY:
297     case OP_NOTMINQUERY:
298     if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
299     else fprintf(f, " [^\\x%02x]", c);
300     fprintf(f, "%s", OP_names[*code]);
301     break;
302    
303     case OP_NOTEXACT:
304     case OP_NOTUPTO:
305     case OP_NOTMINUPTO:
306     if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
307     else fprintf(f, " [^\\x%02x]{", c);
308     if (*code != OP_NOTEXACT) fprintf(f, "0,");
309     fprintf(f, "%d}", GET2(code,1));
310     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
311     break;
312    
313     case OP_RECURSE:
314     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
315     break;
316    
317     case OP_REF:
318     fprintf(f, " \\%d", GET2(code,1));
319     ccode = code + _pcre_OP_lengths[*code];
320     goto CLASS_REF_REPEAT;
321    
322     case OP_CALLOUT:
323     fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
324     GET(code, 2 + LINK_SIZE));
325     break;
326    
327     case OP_PROP:
328     case OP_NOTPROP:
329     fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
330     break;
331    
332     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
333     having this code always here, and it makes it less messy without all those
334     #ifdefs. */
335    
336     case OP_CLASS:
337     case OP_NCLASS:
338     case OP_XCLASS:
339     {
340     int i, min, max;
341     BOOL printmap;
342    
343     fprintf(f, " [");
344    
345     if (*code == OP_XCLASS)
346     {
347     extra = GET(code, 1);
348     ccode = code + LINK_SIZE + 1;
349     printmap = (*ccode & XCL_MAP) != 0;
350     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
351     }
352     else
353     {
354     printmap = TRUE;
355     ccode = code + 1;
356     }
357    
358     /* Print a bit map */
359    
360     if (printmap)
361     {
362     for (i = 0; i < 256; i++)
363     {
364     if ((ccode[i/8] & (1 << (i&7))) != 0)
365     {
366     int j;
367     for (j = i+1; j < 256; j++)
368     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
369     if (i == '-' || i == ']') fprintf(f, "\\");
370     if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
371     if (--j > i)
372     {
373     if (j != i + 1) fprintf(f, "-");
374     if (j == '-' || j == ']') fprintf(f, "\\");
375     if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
376     }
377     i = j;
378     }
379     }
380     ccode += 32;
381     }
382    
383     /* For an XCLASS there is always some additional data */
384    
385     if (*code == OP_XCLASS)
386     {
387     int ch;
388     while ((ch = *ccode++) != XCL_END)
389     {
390     if (ch == XCL_PROP)
391     {
392     fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
393     }
394     else if (ch == XCL_NOTPROP)
395     {
396     fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
397     }
398     else
399     {
400     ccode += 1 + print_char(f, ccode, TRUE);
401     if (ch == XCL_RANGE)
402     {
403     fprintf(f, "-");
404     ccode += 1 + print_char(f, ccode, TRUE);
405     }
406     }
407     }
408     }
409    
410     /* Indicate a non-UTF8 class which was created by negation */
411    
412     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
413    
414     /* Handle repeats after a class or a back reference */
415    
416     CLASS_REF_REPEAT:
417     switch(*ccode)
418     {
419     case OP_CRSTAR:
420     case OP_CRMINSTAR:
421     case OP_CRPLUS:
422     case OP_CRMINPLUS:
423     case OP_CRQUERY:
424     case OP_CRMINQUERY:
425     fprintf(f, "%s", OP_names[*ccode]);
426     extra += _pcre_OP_lengths[*ccode];
427     break;
428    
429     case OP_CRRANGE:
430     case OP_CRMINRANGE:
431     min = GET2(ccode,1);
432     max = GET2(ccode,3);
433     if (max == 0) fprintf(f, "{%d,}", min);
434     else fprintf(f, "{%d,%d}", min, max);
435     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
436     extra += _pcre_OP_lengths[*ccode];
437     break;
438     }
439     }
440     break;
441    
442     /* Anything else is just an item with no data*/
443    
444     default:
445     fprintf(f, " %s", OP_names[*code]);
446     break;
447     }
448    
449     code += _pcre_OP_lengths[*code] + extra;
450     fprintf(f, "\n");
451     }
452     }
453    
454     /* End of pcre_printint.src */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12