/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 87 - (hide annotations) (download) (as text)
Sat Feb 24 21:41:21 2007 UTC (6 years, 2 months ago) by nigel
File MIME type: application/x-wais-source
File size: 12832 byte(s)
Load pcre-6.5 into code/trunk.

1 nigel 85 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9     Copyright (c) 1997-2005 University of Cambridge
10    
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains a PCRE private debugging function for printing out the
42     internal form of a compiled regular expression, along with some supporting
43     local functions. This source file is used in two places:
44    
45     (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46     (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47    
48     (2) It is always #included by pcretest.c, which can be asked to print out a
49     compiled regex for debugging purposes. */
50    
51    
52     static const char *OP_names[] = { OP_NAME_LIST };
53    
54    
55     /*************************************************
56     * Print single- or multi-byte character *
57     *************************************************/
58    
59     static int
60     print_char(FILE *f, uschar *ptr, BOOL utf8)
61     {
62     int c = *ptr;
63    
64     if (!utf8 || (c & 0xc0) != 0xc0)
65     {
66     if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
67     return 0;
68     }
69     else
70     {
71     int i;
72     int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
73     int s = 6*a;
74     c = (c & _pcre_utf8_table3[a]) << s;
75     for (i = 1; i <= a; i++)
76     {
77     /* This is a check for malformed UTF-8; it should only occur if the sanity
78     check has been turned off. Rather than swallow random bytes, just stop if
79     we hit a bad one. Print it with \X instead of \x as an indication. */
80    
81     if ((ptr[i] & 0xc0) != 0x80)
82     {
83     fprintf(f, "\\X{%x}", c);
84     return i - 1;
85     }
86    
87     /* The byte is OK */
88    
89     s -= 6;
90     c |= (ptr[i] & 0x3f) << s;
91     }
92     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
93     return a;
94     }
95     }
96    
97    
98    
99     /*************************************************
100     * Find Unicode property name *
101     *************************************************/
102    
103     static const char *
104 nigel 87 get_ucpname(int ptype, int pvalue)
105 nigel 85 {
106     #ifdef SUPPORT_UCP
107     int i;
108     for (i = _pcre_utt_size; i >= 0; i--)
109     {
110 nigel 87 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
111 nigel 85 }
112     return (i >= 0)? _pcre_utt[i].name : "??";
113     #else
114 nigel 87 ptype = ptype; /* Avoid compiler warning */
115     pvalue = pvalue;
116 nigel 85 return "??";
117     #endif
118     }
119    
120    
121    
122     /*************************************************
123     * Print compiled regex *
124     *************************************************/
125    
126     /* Make this function work for a regex with integers either byte order.
127     However, we assume that what we are passed is a compiled regex. */
128    
129     static void
130     pcre_printint(pcre *external_re, FILE *f)
131     {
132     real_pcre *re = (real_pcre *)external_re;
133     uschar *codestart, *code;
134     BOOL utf8;
135    
136     unsigned int options = re->options;
137     int offset = re->name_table_offset;
138     int count = re->name_count;
139     int size = re->name_entry_size;
140    
141     if (re->magic_number != MAGIC_NUMBER)
142     {
143     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
144     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
145     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
146     options = ((options << 24) & 0xff000000) |
147     ((options << 8) & 0x00ff0000) |
148     ((options >> 8) & 0x0000ff00) |
149     ((options >> 24) & 0x000000ff);
150     }
151    
152     code = codestart = (uschar *)re + offset + count * size;
153     utf8 = (options & PCRE_UTF8) != 0;
154    
155     for(;;)
156     {
157     uschar *ccode;
158     int c;
159     int extra = 0;
160    
161     fprintf(f, "%3d ", (int)(code - codestart));
162    
163     if (*code >= OP_BRA)
164     {
165     if (*code - OP_BRA > EXTRACT_BASIC_MAX)
166     fprintf(f, "%3d Bra extra\n", GET(code, 1));
167     else
168     fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
169     code += _pcre_OP_lengths[OP_BRA];
170     continue;
171     }
172    
173     switch(*code)
174     {
175     case OP_END:
176     fprintf(f, " %s\n", OP_names[*code]);
177     fprintf(f, "------------------------------------------------------------------\n");
178     return;
179    
180     case OP_OPT:
181     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
182     break;
183    
184     case OP_CHAR:
185     {
186     fprintf(f, " ");
187     do
188     {
189     code++;
190     code += 1 + print_char(f, code, utf8);
191     }
192     while (*code == OP_CHAR);
193     fprintf(f, "\n");
194     continue;
195     }
196     break;
197    
198     case OP_CHARNC:
199     {
200     fprintf(f, " NC ");
201     do
202     {
203     code++;
204     code += 1 + print_char(f, code, utf8);
205     }
206     while (*code == OP_CHARNC);
207     fprintf(f, "\n");
208     continue;
209     }
210     break;
211    
212     case OP_KETRMAX:
213     case OP_KETRMIN:
214     case OP_ALT:
215     case OP_KET:
216     case OP_ASSERT:
217     case OP_ASSERT_NOT:
218     case OP_ASSERTBACK:
219     case OP_ASSERTBACK_NOT:
220     case OP_ONCE:
221     case OP_COND:
222     case OP_REVERSE:
223     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
224     break;
225    
226     case OP_BRANUMBER:
227     printf("%3d %s", GET2(code, 1), OP_names[*code]);
228     break;
229    
230     case OP_CREF:
231     if (GET2(code, 1) == CREF_RECURSE)
232     fprintf(f, " Cond recurse");
233     else
234     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
235     break;
236    
237     case OP_STAR:
238     case OP_MINSTAR:
239     case OP_PLUS:
240     case OP_MINPLUS:
241     case OP_QUERY:
242     case OP_MINQUERY:
243     case OP_TYPESTAR:
244     case OP_TYPEMINSTAR:
245     case OP_TYPEPLUS:
246     case OP_TYPEMINPLUS:
247     case OP_TYPEQUERY:
248     case OP_TYPEMINQUERY:
249     fprintf(f, " ");
250     if (*code >= OP_TYPESTAR)
251     {
252     fprintf(f, "%s", OP_names[code[1]]);
253     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
254     {
255 nigel 87 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
256     extra = 2;
257 nigel 85 }
258     }
259     else extra = print_char(f, code+1, utf8);
260     fprintf(f, "%s", OP_names[*code]);
261     break;
262    
263     case OP_EXACT:
264     case OP_UPTO:
265     case OP_MINUPTO:
266     fprintf(f, " ");
267     extra = print_char(f, code+3, utf8);
268     fprintf(f, "{");
269     if (*code != OP_EXACT) fprintf(f, ",");
270     fprintf(f, "%d}", GET2(code,1));
271     if (*code == OP_MINUPTO) fprintf(f, "?");
272     break;
273    
274     case OP_TYPEEXACT:
275     case OP_TYPEUPTO:
276     case OP_TYPEMINUPTO:
277     fprintf(f, " %s", OP_names[code[3]]);
278     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
279     {
280 nigel 87 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
281     extra = 2;
282 nigel 85 }
283     fprintf(f, "{");
284     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
285     fprintf(f, "%d}", GET2(code,1));
286     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
287     break;
288    
289     case OP_NOT:
290     if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
291     else fprintf(f, " [^\\x%02x]", c);
292     break;
293    
294     case OP_NOTSTAR:
295     case OP_NOTMINSTAR:
296     case OP_NOTPLUS:
297     case OP_NOTMINPLUS:
298     case OP_NOTQUERY:
299     case OP_NOTMINQUERY:
300     if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
301     else fprintf(f, " [^\\x%02x]", c);
302     fprintf(f, "%s", OP_names[*code]);
303     break;
304    
305     case OP_NOTEXACT:
306     case OP_NOTUPTO:
307     case OP_NOTMINUPTO:
308     if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
309     else fprintf(f, " [^\\x%02x]{", c);
310     if (*code != OP_NOTEXACT) fprintf(f, "0,");
311     fprintf(f, "%d}", GET2(code,1));
312     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
313     break;
314    
315     case OP_RECURSE:
316     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
317     break;
318    
319     case OP_REF:
320     fprintf(f, " \\%d", GET2(code,1));
321     ccode = code + _pcre_OP_lengths[*code];
322     goto CLASS_REF_REPEAT;
323    
324     case OP_CALLOUT:
325     fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
326     GET(code, 2 + LINK_SIZE));
327     break;
328    
329     case OP_PROP:
330     case OP_NOTPROP:
331 nigel 87 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
332 nigel 85 break;
333    
334     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
335     having this code always here, and it makes it less messy without all those
336     #ifdefs. */
337    
338     case OP_CLASS:
339     case OP_NCLASS:
340     case OP_XCLASS:
341     {
342     int i, min, max;
343     BOOL printmap;
344    
345     fprintf(f, " [");
346    
347     if (*code == OP_XCLASS)
348     {
349     extra = GET(code, 1);
350     ccode = code + LINK_SIZE + 1;
351     printmap = (*ccode & XCL_MAP) != 0;
352     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
353     }
354     else
355     {
356     printmap = TRUE;
357     ccode = code + 1;
358     }
359    
360     /* Print a bit map */
361    
362     if (printmap)
363     {
364     for (i = 0; i < 256; i++)
365     {
366     if ((ccode[i/8] & (1 << (i&7))) != 0)
367     {
368     int j;
369     for (j = i+1; j < 256; j++)
370     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
371     if (i == '-' || i == ']') fprintf(f, "\\");
372     if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
373     if (--j > i)
374     {
375     if (j != i + 1) fprintf(f, "-");
376     if (j == '-' || j == ']') fprintf(f, "\\");
377     if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
378     }
379     i = j;
380     }
381     }
382     ccode += 32;
383     }
384    
385     /* For an XCLASS there is always some additional data */
386    
387     if (*code == OP_XCLASS)
388     {
389     int ch;
390     while ((ch = *ccode++) != XCL_END)
391     {
392     if (ch == XCL_PROP)
393     {
394 nigel 87 int ptype = *ccode++;
395     int pvalue = *ccode++;
396     fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
397 nigel 85 }
398     else if (ch == XCL_NOTPROP)
399     {
400 nigel 87 int ptype = *ccode++;
401     int pvalue = *ccode++;
402     fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
403 nigel 85 }
404     else
405     {
406     ccode += 1 + print_char(f, ccode, TRUE);
407     if (ch == XCL_RANGE)
408     {
409     fprintf(f, "-");
410     ccode += 1 + print_char(f, ccode, TRUE);
411     }
412     }
413     }
414     }
415    
416     /* Indicate a non-UTF8 class which was created by negation */
417    
418     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
419    
420     /* Handle repeats after a class or a back reference */
421    
422     CLASS_REF_REPEAT:
423     switch(*ccode)
424     {
425     case OP_CRSTAR:
426     case OP_CRMINSTAR:
427     case OP_CRPLUS:
428     case OP_CRMINPLUS:
429     case OP_CRQUERY:
430     case OP_CRMINQUERY:
431     fprintf(f, "%s", OP_names[*ccode]);
432     extra += _pcre_OP_lengths[*ccode];
433     break;
434    
435     case OP_CRRANGE:
436     case OP_CRMINRANGE:
437     min = GET2(ccode,1);
438     max = GET2(ccode,3);
439     if (max == 0) fprintf(f, "{%d,}", min);
440     else fprintf(f, "{%d,%d}", min, max);
441     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
442     extra += _pcre_OP_lengths[*ccode];
443     break;
444 nigel 87
445     /* Do nothing if it's not a repeat; this code stops picky compilers
446     warning about the lack of a default code path. */
447    
448     default:
449     break;
450 nigel 85 }
451     }
452     break;
453    
454     /* Anything else is just an item with no data*/
455    
456     default:
457     fprintf(f, " %s", OP_names[*code]);
458     break;
459     }
460    
461     code += _pcre_OP_lengths[*code] + extra;
462     fprintf(f, "\n");
463     }
464     }
465    
466     /* End of pcre_printint.src */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12