/[pcre]/code/tags/pcre-6.1/pcre_printint.c
ViewVC logotype

Contents of /code/tags/pcre-6.1/pcre_printint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 80 - (hide annotations) (download)
Sat Feb 24 21:40:54 2007 UTC (6 years, 3 months ago) by nigel
File MIME type: text/plain
File size: 12105 byte(s)
Tag code/trunk as code/tags/pcre-6.1.

1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9     Copyright (c) 1997-2005 University of Cambridge
10    
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains an PCRE private debugging function for printing out the
42     internal form of a compiled regular expression, along with some supporting
43     local functions. */
44    
45    
46     #include "pcre_internal.h"
47    
48    
49     static const char *OP_names[] = { OP_NAME_LIST };
50    
51    
52     /*************************************************
53     * Print single- or multi-byte character *
54     *************************************************/
55    
56     static int
57     print_char(FILE *f, uschar *ptr, BOOL utf8)
58     {
59     int c = *ptr;
60    
61     if (!utf8 || (c & 0xc0) != 0xc0)
62     {
63     if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
64     return 0;
65     }
66     else
67     {
68     int i;
69     int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
70     int s = 6*a;
71     c = (c & _pcre_utf8_table3[a]) << s;
72     for (i = 1; i <= a; i++)
73     {
74     /* This is a check for malformed UTF-8; it should only occur if the sanity
75     check has been turned off. Rather than swallow random bytes, just stop if
76     we hit a bad one. Print it with \X instead of \x as an indication. */
77    
78     if ((ptr[i] & 0xc0) != 0x80)
79     {
80     fprintf(f, "\\X{%x}", c);
81     return i - 1;
82     }
83    
84     /* The byte is OK */
85    
86     s -= 6;
87     c |= (ptr[i] & 0x3f) << s;
88     }
89     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
90     return a;
91     }
92     }
93    
94    
95    
96     /*************************************************
97     * Find Unicode property name *
98     *************************************************/
99    
100     static const char *
101     get_ucpname(int property)
102     {
103     #ifdef SUPPORT_UCP
104     int i;
105     for (i = _pcre_utt_size; i >= 0; i--)
106     {
107     if (property == _pcre_utt[i].value) break;
108     }
109     return (i >= 0)? _pcre_utt[i].name : "??";
110     #else
111     return "??";
112     #endif
113     }
114    
115    
116    
117     /*************************************************
118     * Print compiled regex *
119     *************************************************/
120    
121     /* Make this function work for a regex with integers either byte order.
122     However, we assume that what we are passed is a compiled regex. */
123    
124     EXPORT void
125     _pcre_printint(pcre *external_re, FILE *f)
126     {
127     real_pcre *re = (real_pcre *)external_re;
128     uschar *codestart, *code;
129     BOOL utf8;
130    
131     unsigned int options = re->options;
132     int offset = re->name_table_offset;
133     int count = re->name_count;
134     int size = re->name_entry_size;
135    
136     if (re->magic_number != MAGIC_NUMBER)
137     {
138     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
139     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
140     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
141     options = ((options << 24) & 0xff000000) |
142     ((options << 8) & 0x00ff0000) |
143     ((options >> 8) & 0x0000ff00) |
144     ((options >> 24) & 0x000000ff);
145     }
146    
147     code = codestart = (uschar *)re + offset + count * size;
148     utf8 = (options & PCRE_UTF8) != 0;
149    
150     for(;;)
151     {
152     uschar *ccode;
153     int c;
154     int extra = 0;
155    
156     fprintf(f, "%3d ", (int)(code - codestart));
157    
158     if (*code >= OP_BRA)
159     {
160     if (*code - OP_BRA > EXTRACT_BASIC_MAX)
161     fprintf(f, "%3d Bra extra\n", GET(code, 1));
162     else
163     fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
164     code += _pcre_OP_lengths[OP_BRA];
165     continue;
166     }
167    
168     switch(*code)
169     {
170     case OP_END:
171     fprintf(f, " %s\n", OP_names[*code]);
172     fprintf(f, "------------------------------------------------------------------\n");
173     return;
174    
175     case OP_OPT:
176     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
177     break;
178    
179     case OP_CHAR:
180     {
181     fprintf(f, " ");
182     do
183     {
184     code++;
185     code += 1 + print_char(f, code, utf8);
186     }
187     while (*code == OP_CHAR);
188     fprintf(f, "\n");
189     continue;
190     }
191     break;
192    
193     case OP_CHARNC:
194     {
195     fprintf(f, " NC ");
196     do
197     {
198     code++;
199     code += 1 + print_char(f, code, utf8);
200     }
201     while (*code == OP_CHARNC);
202     fprintf(f, "\n");
203     continue;
204     }
205     break;
206    
207     case OP_KETRMAX:
208     case OP_KETRMIN:
209     case OP_ALT:
210     case OP_KET:
211     case OP_ASSERT:
212     case OP_ASSERT_NOT:
213     case OP_ASSERTBACK:
214     case OP_ASSERTBACK_NOT:
215     case OP_ONCE:
216     case OP_COND:
217     case OP_REVERSE:
218     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
219     break;
220    
221     case OP_BRANUMBER:
222     printf("%3d %s", GET2(code, 1), OP_names[*code]);
223     break;
224    
225     case OP_CREF:
226     if (GET2(code, 1) == CREF_RECURSE)
227     fprintf(f, " Cond recurse");
228     else
229     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
230     break;
231    
232     case OP_STAR:
233     case OP_MINSTAR:
234     case OP_PLUS:
235     case OP_MINPLUS:
236     case OP_QUERY:
237     case OP_MINQUERY:
238     case OP_TYPESTAR:
239     case OP_TYPEMINSTAR:
240     case OP_TYPEPLUS:
241     case OP_TYPEMINPLUS:
242     case OP_TYPEQUERY:
243     case OP_TYPEMINQUERY:
244     fprintf(f, " ");
245     if (*code >= OP_TYPESTAR)
246     {
247     fprintf(f, "%s", OP_names[code[1]]);
248     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
249     {
250     fprintf(f, " %s ", get_ucpname(code[2]));
251     extra = 1;
252     }
253     }
254     else extra = print_char(f, code+1, utf8);
255     fprintf(f, "%s", OP_names[*code]);
256     break;
257    
258     case OP_EXACT:
259     case OP_UPTO:
260     case OP_MINUPTO:
261     fprintf(f, " ");
262     extra = print_char(f, code+3, utf8);
263     fprintf(f, "{");
264     if (*code != OP_EXACT) fprintf(f, ",");
265     fprintf(f, "%d}", GET2(code,1));
266     if (*code == OP_MINUPTO) fprintf(f, "?");
267     break;
268    
269     case OP_TYPEEXACT:
270     case OP_TYPEUPTO:
271     case OP_TYPEMINUPTO:
272     fprintf(f, " %s", OP_names[code[3]]);
273     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
274     {
275     fprintf(f, " %s ", get_ucpname(code[4]));
276     extra = 1;
277     }
278     fprintf(f, "{");
279     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
280     fprintf(f, "%d}", GET2(code,1));
281     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
282     break;
283    
284     case OP_NOT:
285     if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
286     else fprintf(f, " [^\\x%02x]", c);
287     break;
288    
289     case OP_NOTSTAR:
290     case OP_NOTMINSTAR:
291     case OP_NOTPLUS:
292     case OP_NOTMINPLUS:
293     case OP_NOTQUERY:
294     case OP_NOTMINQUERY:
295     if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
296     else fprintf(f, " [^\\x%02x]", c);
297     fprintf(f, "%s", OP_names[*code]);
298     break;
299    
300     case OP_NOTEXACT:
301     case OP_NOTUPTO:
302     case OP_NOTMINUPTO:
303     if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
304     else fprintf(f, " [^\\x%02x]{", c);
305     if (*code != OP_NOTEXACT) fprintf(f, "0,");
306     fprintf(f, "%d}", GET2(code,1));
307     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
308     break;
309    
310     case OP_RECURSE:
311     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
312     break;
313    
314     case OP_REF:
315     fprintf(f, " \\%d", GET2(code,1));
316     ccode = code + _pcre_OP_lengths[*code];
317     goto CLASS_REF_REPEAT;
318    
319     case OP_CALLOUT:
320     fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
321     GET(code, 2 + LINK_SIZE));
322     break;
323    
324     case OP_PROP:
325     case OP_NOTPROP:
326     fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
327     break;
328    
329     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
330     having this code always here, and it makes it less messy without all those
331     #ifdefs. */
332    
333     case OP_CLASS:
334     case OP_NCLASS:
335     case OP_XCLASS:
336     {
337     int i, min, max;
338     BOOL printmap;
339    
340     fprintf(f, " [");
341    
342     if (*code == OP_XCLASS)
343     {
344     extra = GET(code, 1);
345     ccode = code + LINK_SIZE + 1;
346     printmap = (*ccode & XCL_MAP) != 0;
347     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
348     }
349     else
350     {
351     printmap = TRUE;
352     ccode = code + 1;
353     }
354    
355     /* Print a bit map */
356    
357     if (printmap)
358     {
359     for (i = 0; i < 256; i++)
360     {
361     if ((ccode[i/8] & (1 << (i&7))) != 0)
362     {
363     int j;
364     for (j = i+1; j < 256; j++)
365     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
366     if (i == '-' || i == ']') fprintf(f, "\\");
367     if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
368     if (--j > i)
369     {
370     if (j != i + 1) fprintf(f, "-");
371     if (j == '-' || j == ']') fprintf(f, "\\");
372     if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
373     }
374     i = j;
375     }
376     }
377     ccode += 32;
378     }
379    
380     /* For an XCLASS there is always some additional data */
381    
382     if (*code == OP_XCLASS)
383     {
384     int ch;
385     while ((ch = *ccode++) != XCL_END)
386     {
387     if (ch == XCL_PROP)
388     {
389     fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
390     }
391     else if (ch == XCL_NOTPROP)
392     {
393     fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
394     }
395     else
396     {
397     ccode += 1 + print_char(f, ccode, TRUE);
398     if (ch == XCL_RANGE)
399     {
400     fprintf(f, "-");
401     ccode += 1 + print_char(f, ccode, TRUE);
402     }
403     }
404     }
405     }
406    
407     /* Indicate a non-UTF8 class which was created by negation */
408    
409     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
410    
411     /* Handle repeats after a class or a back reference */
412    
413     CLASS_REF_REPEAT:
414     switch(*ccode)
415     {
416     case OP_CRSTAR:
417     case OP_CRMINSTAR:
418     case OP_CRPLUS:
419     case OP_CRMINPLUS:
420     case OP_CRQUERY:
421     case OP_CRMINQUERY:
422     fprintf(f, "%s", OP_names[*ccode]);
423     extra += _pcre_OP_lengths[*ccode];
424     break;
425    
426     case OP_CRRANGE:
427     case OP_CRMINRANGE:
428     min = GET2(ccode,1);
429     max = GET2(ccode,3);
430     if (max == 0) fprintf(f, "{%d,}", min);
431     else fprintf(f, "{%d,%d}", min, max);
432     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
433     extra += _pcre_OP_lengths[*ccode];
434     break;
435     }
436     }
437     break;
438    
439     /* Anything else is just an item with no data*/
440    
441     default:
442     fprintf(f, " %s", OP_names[*code]);
443     break;
444     }
445    
446     code += _pcre_OP_lengths[*code] + extra;
447     fprintf(f, "\n");
448     }
449     }
450    
451     /* End of pcre_printint.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12