/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 91 - (hide annotations) (download) (as text)
Sat Feb 24 21:41:34 2007 UTC (7 years, 8 months ago) by nigel
File MIME type: application/x-wais-source
File size: 12800 byte(s)
Load pcre-6.7 into code/trunk.

1 nigel 85 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9     Copyright (c) 1997-2005 University of Cambridge
10    
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains a PCRE private debugging function for printing out the
42     internal form of a compiled regular expression, along with some supporting
43     local functions. This source file is used in two places:
44    
45     (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46     (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47    
48     (2) It is always #included by pcretest.c, which can be asked to print out a
49     compiled regex for debugging purposes. */
50    
51    
52     static const char *OP_names[] = { OP_NAME_LIST };
53    
54    
55     /*************************************************
56     * Print single- or multi-byte character *
57     *************************************************/
58    
59     static int
60     print_char(FILE *f, uschar *ptr, BOOL utf8)
61     {
62     int c = *ptr;
63    
64     if (!utf8 || (c & 0xc0) != 0xc0)
65     {
66     if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
67     return 0;
68     }
69     else
70     {
71     int i;
72     int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
73     int s = 6*a;
74     c = (c & _pcre_utf8_table3[a]) << s;
75     for (i = 1; i <= a; i++)
76     {
77     /* This is a check for malformed UTF-8; it should only occur if the sanity
78     check has been turned off. Rather than swallow random bytes, just stop if
79     we hit a bad one. Print it with \X instead of \x as an indication. */
80    
81     if ((ptr[i] & 0xc0) != 0x80)
82     {
83     fprintf(f, "\\X{%x}", c);
84     return i - 1;
85     }
86    
87     /* The byte is OK */
88    
89     s -= 6;
90     c |= (ptr[i] & 0x3f) << s;
91     }
92     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
93     return a;
94     }
95     }
96    
97    
98    
99     /*************************************************
100     * Find Unicode property name *
101     *************************************************/
102    
103     static const char *
104 nigel 87 get_ucpname(int ptype, int pvalue)
105 nigel 85 {
106     #ifdef SUPPORT_UCP
107     int i;
108     for (i = _pcre_utt_size; i >= 0; i--)
109     {
110 nigel 87 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
111 nigel 85 }
112     return (i >= 0)? _pcre_utt[i].name : "??";
113     #else
114 nigel 91 /* It gets harder and harder to shut off unwanted compiler warnings. */
115     ptype = ptype * pvalue;
116     return (ptype == pvalue)? "??" : "??";
117 nigel 85 #endif
118     }
119    
120    
121    
122     /*************************************************
123     * Print compiled regex *
124     *************************************************/
125    
126     /* Make this function work for a regex with integers either byte order.
127     However, we assume that what we are passed is a compiled regex. */
128    
129     static void
130     pcre_printint(pcre *external_re, FILE *f)
131     {
132     real_pcre *re = (real_pcre *)external_re;
133     uschar *codestart, *code;
134     BOOL utf8;
135    
136     unsigned int options = re->options;
137     int offset = re->name_table_offset;
138     int count = re->name_count;
139     int size = re->name_entry_size;
140    
141     if (re->magic_number != MAGIC_NUMBER)
142     {
143     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
144     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
145     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
146     options = ((options << 24) & 0xff000000) |
147     ((options << 8) & 0x00ff0000) |
148     ((options >> 8) & 0x0000ff00) |
149     ((options >> 24) & 0x000000ff);
150     }
151    
152     code = codestart = (uschar *)re + offset + count * size;
153     utf8 = (options & PCRE_UTF8) != 0;
154    
155     for(;;)
156     {
157     uschar *ccode;
158     int c;
159     int extra = 0;
160    
161     fprintf(f, "%3d ", (int)(code - codestart));
162    
163     if (*code >= OP_BRA)
164     {
165     if (*code - OP_BRA > EXTRACT_BASIC_MAX)
166     fprintf(f, "%3d Bra extra\n", GET(code, 1));
167     else
168     fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
169     code += _pcre_OP_lengths[OP_BRA];
170     continue;
171     }
172    
173     switch(*code)
174     {
175     case OP_END:
176     fprintf(f, " %s\n", OP_names[*code]);
177     fprintf(f, "------------------------------------------------------------------\n");
178     return;
179    
180     case OP_OPT:
181     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
182     break;
183    
184     case OP_CHAR:
185 nigel 91 fprintf(f, " ");
186     do
187 nigel 85 {
188 nigel 91 code++;
189     code += 1 + print_char(f, code, utf8);
190 nigel 85 }
191 nigel 91 while (*code == OP_CHAR);
192     fprintf(f, "\n");
193     continue;
194 nigel 85
195     case OP_CHARNC:
196 nigel 91 fprintf(f, " NC ");
197     do
198 nigel 85 {
199 nigel 91 code++;
200     code += 1 + print_char(f, code, utf8);
201 nigel 85 }
202 nigel 91 while (*code == OP_CHARNC);
203     fprintf(f, "\n");
204     continue;
205 nigel 85
206     case OP_KETRMAX:
207     case OP_KETRMIN:
208     case OP_ALT:
209     case OP_KET:
210     case OP_ASSERT:
211     case OP_ASSERT_NOT:
212     case OP_ASSERTBACK:
213     case OP_ASSERTBACK_NOT:
214     case OP_ONCE:
215     case OP_COND:
216     case OP_REVERSE:
217     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
218     break;
219    
220     case OP_BRANUMBER:
221     printf("%3d %s", GET2(code, 1), OP_names[*code]);
222     break;
223    
224     case OP_CREF:
225     if (GET2(code, 1) == CREF_RECURSE)
226     fprintf(f, " Cond recurse");
227     else
228     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
229     break;
230    
231     case OP_STAR:
232     case OP_MINSTAR:
233     case OP_PLUS:
234     case OP_MINPLUS:
235     case OP_QUERY:
236     case OP_MINQUERY:
237     case OP_TYPESTAR:
238     case OP_TYPEMINSTAR:
239     case OP_TYPEPLUS:
240     case OP_TYPEMINPLUS:
241     case OP_TYPEQUERY:
242     case OP_TYPEMINQUERY:
243     fprintf(f, " ");
244     if (*code >= OP_TYPESTAR)
245     {
246     fprintf(f, "%s", OP_names[code[1]]);
247     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
248     {
249 nigel 87 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
250     extra = 2;
251 nigel 85 }
252     }
253     else extra = print_char(f, code+1, utf8);
254     fprintf(f, "%s", OP_names[*code]);
255     break;
256    
257     case OP_EXACT:
258     case OP_UPTO:
259     case OP_MINUPTO:
260     fprintf(f, " ");
261     extra = print_char(f, code+3, utf8);
262     fprintf(f, "{");
263     if (*code != OP_EXACT) fprintf(f, ",");
264     fprintf(f, "%d}", GET2(code,1));
265     if (*code == OP_MINUPTO) fprintf(f, "?");
266     break;
267    
268     case OP_TYPEEXACT:
269     case OP_TYPEUPTO:
270     case OP_TYPEMINUPTO:
271     fprintf(f, " %s", OP_names[code[3]]);
272     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
273     {
274 nigel 87 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
275     extra = 2;
276 nigel 85 }
277     fprintf(f, "{");
278     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
279     fprintf(f, "%d}", GET2(code,1));
280     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
281     break;
282    
283     case OP_NOT:
284     if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
285     else fprintf(f, " [^\\x%02x]", c);
286     break;
287    
288     case OP_NOTSTAR:
289     case OP_NOTMINSTAR:
290     case OP_NOTPLUS:
291     case OP_NOTMINPLUS:
292     case OP_NOTQUERY:
293     case OP_NOTMINQUERY:
294     if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
295     else fprintf(f, " [^\\x%02x]", c);
296     fprintf(f, "%s", OP_names[*code]);
297     break;
298    
299     case OP_NOTEXACT:
300     case OP_NOTUPTO:
301     case OP_NOTMINUPTO:
302     if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
303     else fprintf(f, " [^\\x%02x]{", c);
304     if (*code != OP_NOTEXACT) fprintf(f, "0,");
305     fprintf(f, "%d}", GET2(code,1));
306     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
307     break;
308    
309     case OP_RECURSE:
310     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
311     break;
312    
313     case OP_REF:
314     fprintf(f, " \\%d", GET2(code,1));
315     ccode = code + _pcre_OP_lengths[*code];
316     goto CLASS_REF_REPEAT;
317    
318     case OP_CALLOUT:
319     fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
320     GET(code, 2 + LINK_SIZE));
321     break;
322    
323     case OP_PROP:
324     case OP_NOTPROP:
325 nigel 87 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
326 nigel 85 break;
327    
328     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
329     having this code always here, and it makes it less messy without all those
330     #ifdefs. */
331    
332     case OP_CLASS:
333     case OP_NCLASS:
334     case OP_XCLASS:
335     {
336     int i, min, max;
337     BOOL printmap;
338    
339     fprintf(f, " [");
340    
341     if (*code == OP_XCLASS)
342     {
343     extra = GET(code, 1);
344     ccode = code + LINK_SIZE + 1;
345     printmap = (*ccode & XCL_MAP) != 0;
346     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
347     }
348     else
349     {
350     printmap = TRUE;
351     ccode = code + 1;
352     }
353    
354     /* Print a bit map */
355    
356     if (printmap)
357     {
358     for (i = 0; i < 256; i++)
359     {
360     if ((ccode[i/8] & (1 << (i&7))) != 0)
361     {
362     int j;
363     for (j = i+1; j < 256; j++)
364     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
365     if (i == '-' || i == ']') fprintf(f, "\\");
366     if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
367     if (--j > i)
368     {
369     if (j != i + 1) fprintf(f, "-");
370     if (j == '-' || j == ']') fprintf(f, "\\");
371     if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
372     }
373     i = j;
374     }
375     }
376     ccode += 32;
377     }
378    
379     /* For an XCLASS there is always some additional data */
380    
381     if (*code == OP_XCLASS)
382     {
383     int ch;
384     while ((ch = *ccode++) != XCL_END)
385     {
386     if (ch == XCL_PROP)
387     {
388 nigel 87 int ptype = *ccode++;
389     int pvalue = *ccode++;
390     fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
391 nigel 85 }
392     else if (ch == XCL_NOTPROP)
393     {
394 nigel 87 int ptype = *ccode++;
395     int pvalue = *ccode++;
396     fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
397 nigel 85 }
398     else
399     {
400     ccode += 1 + print_char(f, ccode, TRUE);
401     if (ch == XCL_RANGE)
402     {
403     fprintf(f, "-");
404     ccode += 1 + print_char(f, ccode, TRUE);
405     }
406     }
407     }
408     }
409    
410     /* Indicate a non-UTF8 class which was created by negation */
411    
412     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
413    
414     /* Handle repeats after a class or a back reference */
415    
416     CLASS_REF_REPEAT:
417     switch(*ccode)
418     {
419     case OP_CRSTAR:
420     case OP_CRMINSTAR:
421     case OP_CRPLUS:
422     case OP_CRMINPLUS:
423     case OP_CRQUERY:
424     case OP_CRMINQUERY:
425     fprintf(f, "%s", OP_names[*ccode]);
426     extra += _pcre_OP_lengths[*ccode];
427     break;
428    
429     case OP_CRRANGE:
430     case OP_CRMINRANGE:
431     min = GET2(ccode,1);
432     max = GET2(ccode,3);
433     if (max == 0) fprintf(f, "{%d,}", min);
434     else fprintf(f, "{%d,%d}", min, max);
435     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
436     extra += _pcre_OP_lengths[*ccode];
437     break;
438 nigel 87
439     /* Do nothing if it's not a repeat; this code stops picky compilers
440     warning about the lack of a default code path. */
441    
442     default:
443     break;
444 nigel 85 }
445     }
446     break;
447    
448     /* Anything else is just an item with no data*/
449    
450     default:
451     fprintf(f, " %s", OP_names[*code]);
452     break;
453     }
454    
455     code += _pcre_OP_lengths[*code] + extra;
456     fprintf(f, "\n");
457     }
458     }
459    
460     /* End of pcre_printint.src */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12