/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 123 - (hide annotations) (download) (as text)
Mon Mar 12 15:19:06 2007 UTC (7 years, 4 months ago) by ph10
File MIME type: application/x-wais-source
File size: 14362 byte(s)
Removal of trailing spaces.

1 nigel 85 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 117 Copyright (c) 1997-2007 University of Cambridge
10 nigel 85
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains a PCRE private debugging function for printing out the
42     internal form of a compiled regular expression, along with some supporting
43     local functions. This source file is used in two places:
44    
45     (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46     (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47    
48     (2) It is always #included by pcretest.c, which can be asked to print out a
49     compiled regex for debugging purposes. */
50    
51    
52 nigel 93 /* Macro that decides whether a character should be output as a literal or in
53     hexadecimal. We don't use isprint() because that can vary from system to system
54     (even without the use of locales) and we want the output always to be the same,
55     for testing purposes. This macro is used in pcretest as well as in this file. */
56    
57     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
58    
59     /* The table of operator names. */
60    
61 nigel 85 static const char *OP_names[] = { OP_NAME_LIST };
62    
63    
64 nigel 93
65 nigel 85 /*************************************************
66     * Print single- or multi-byte character *
67     *************************************************/
68    
69     static int
70     print_char(FILE *f, uschar *ptr, BOOL utf8)
71     {
72     int c = *ptr;
73    
74 ph10 107 #ifndef SUPPORT_UTF8
75     utf8 = utf8; /* Avoid compiler warning */
76     if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
77     return 0;
78    
79     #else
80 nigel 85 if (!utf8 || (c & 0xc0) != 0xc0)
81     {
82 nigel 93 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
83 nigel 85 return 0;
84     }
85     else
86     {
87     int i;
88     int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
89     int s = 6*a;
90     c = (c & _pcre_utf8_table3[a]) << s;
91     for (i = 1; i <= a; i++)
92     {
93     /* This is a check for malformed UTF-8; it should only occur if the sanity
94     check has been turned off. Rather than swallow random bytes, just stop if
95     we hit a bad one. Print it with \X instead of \x as an indication. */
96    
97     if ((ptr[i] & 0xc0) != 0x80)
98     {
99     fprintf(f, "\\X{%x}", c);
100     return i - 1;
101     }
102    
103     /* The byte is OK */
104    
105     s -= 6;
106     c |= (ptr[i] & 0x3f) << s;
107     }
108     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
109     return a;
110     }
111 ph10 111 #endif
112 nigel 85 }
113    
114    
115    
116     /*************************************************
117     * Find Unicode property name *
118     *************************************************/
119    
120     static const char *
121 nigel 87 get_ucpname(int ptype, int pvalue)
122 nigel 85 {
123     #ifdef SUPPORT_UCP
124     int i;
125     for (i = _pcre_utt_size; i >= 0; i--)
126     {
127 nigel 87 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
128 nigel 85 }
129     return (i >= 0)? _pcre_utt[i].name : "??";
130     #else
131 nigel 91 /* It gets harder and harder to shut off unwanted compiler warnings. */
132     ptype = ptype * pvalue;
133     return (ptype == pvalue)? "??" : "??";
134 nigel 85 #endif
135     }
136    
137    
138    
139     /*************************************************
140     * Print compiled regex *
141     *************************************************/
142    
143     /* Make this function work for a regex with integers either byte order.
144 ph10 116 However, we assume that what we are passed is a compiled regex. The
145 ph10 123 print_lengths flag controls whether offsets and lengths of items are printed.
146 ph10 116 They can be turned off from pcretest so that automatic tests on bytecode can be
147     written that do not depend on the value of LINK_SIZE. */
148 nigel 85
149     static void
150 ph10 116 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
151 nigel 85 {
152     real_pcre *re = (real_pcre *)external_re;
153     uschar *codestart, *code;
154     BOOL utf8;
155    
156     unsigned int options = re->options;
157     int offset = re->name_table_offset;
158     int count = re->name_count;
159     int size = re->name_entry_size;
160    
161     if (re->magic_number != MAGIC_NUMBER)
162     {
163     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
164     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
165     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
166     options = ((options << 24) & 0xff000000) |
167     ((options << 8) & 0x00ff0000) |
168     ((options >> 8) & 0x0000ff00) |
169     ((options >> 24) & 0x000000ff);
170     }
171    
172     code = codestart = (uschar *)re + offset + count * size;
173     utf8 = (options & PCRE_UTF8) != 0;
174    
175     for(;;)
176     {
177     uschar *ccode;
178     int c;
179     int extra = 0;
180    
181 ph10 116 if (print_lengths)
182     fprintf(f, "%3d ", (int)(code - codestart));
183     else
184 ph10 123 fprintf(f, " ");
185 nigel 85
186     switch(*code)
187     {
188     case OP_END:
189     fprintf(f, " %s\n", OP_names[*code]);
190     fprintf(f, "------------------------------------------------------------------\n");
191     return;
192    
193     case OP_OPT:
194     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
195     break;
196    
197     case OP_CHAR:
198 nigel 91 fprintf(f, " ");
199     do
200 nigel 85 {
201 nigel 91 code++;
202     code += 1 + print_char(f, code, utf8);
203 nigel 85 }
204 nigel 91 while (*code == OP_CHAR);
205     fprintf(f, "\n");
206     continue;
207 nigel 85
208     case OP_CHARNC:
209 nigel 91 fprintf(f, " NC ");
210     do
211 nigel 85 {
212 nigel 91 code++;
213     code += 1 + print_char(f, code, utf8);
214 nigel 85 }
215 nigel 91 while (*code == OP_CHARNC);
216     fprintf(f, "\n");
217     continue;
218 nigel 85
219 nigel 93 case OP_CBRA:
220     case OP_SCBRA:
221 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
222 ph10 123 else fprintf(f, " ");
223 ph10 116 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
224 nigel 93 break;
225    
226     case OP_BRA:
227     case OP_SBRA:
228 nigel 85 case OP_KETRMAX:
229     case OP_KETRMIN:
230     case OP_ALT:
231     case OP_KET:
232     case OP_ASSERT:
233     case OP_ASSERT_NOT:
234     case OP_ASSERTBACK:
235     case OP_ASSERTBACK_NOT:
236     case OP_ONCE:
237     case OP_COND:
238 nigel 93 case OP_SCOND:
239 nigel 85 case OP_REVERSE:
240 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
241 ph10 123 else fprintf(f, " ");
242 ph10 116 fprintf(f, "%s", OP_names[*code]);
243 nigel 85 break;
244    
245 nigel 93 case OP_CREF:
246     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
247 nigel 85 break;
248    
249 nigel 93 case OP_RREF:
250     c = GET2(code, 1);
251     if (c == RREF_ANY)
252     fprintf(f, " Cond recurse any");
253 nigel 85 else
254 nigel 93 fprintf(f, " Cond recurse %d", c);
255 nigel 85 break;
256    
257 nigel 93 case OP_DEF:
258     fprintf(f, " Cond def");
259     break;
260    
261 nigel 85 case OP_STAR:
262     case OP_MINSTAR:
263 nigel 93 case OP_POSSTAR:
264 nigel 85 case OP_PLUS:
265     case OP_MINPLUS:
266 nigel 93 case OP_POSPLUS:
267 nigel 85 case OP_QUERY:
268     case OP_MINQUERY:
269 nigel 93 case OP_POSQUERY:
270 nigel 85 case OP_TYPESTAR:
271     case OP_TYPEMINSTAR:
272 nigel 93 case OP_TYPEPOSSTAR:
273 nigel 85 case OP_TYPEPLUS:
274     case OP_TYPEMINPLUS:
275 nigel 93 case OP_TYPEPOSPLUS:
276 nigel 85 case OP_TYPEQUERY:
277     case OP_TYPEMINQUERY:
278 nigel 93 case OP_TYPEPOSQUERY:
279 nigel 85 fprintf(f, " ");
280     if (*code >= OP_TYPESTAR)
281     {
282     fprintf(f, "%s", OP_names[code[1]]);
283     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
284     {
285 nigel 87 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
286     extra = 2;
287 nigel 85 }
288     }
289     else extra = print_char(f, code+1, utf8);
290     fprintf(f, "%s", OP_names[*code]);
291     break;
292    
293     case OP_EXACT:
294     case OP_UPTO:
295     case OP_MINUPTO:
296 nigel 93 case OP_POSUPTO:
297 nigel 85 fprintf(f, " ");
298     extra = print_char(f, code+3, utf8);
299     fprintf(f, "{");
300 nigel 93 if (*code != OP_EXACT) fprintf(f, "0,");
301 nigel 85 fprintf(f, "%d}", GET2(code,1));
302     if (*code == OP_MINUPTO) fprintf(f, "?");
303 nigel 93 else if (*code == OP_POSUPTO) fprintf(f, "+");
304 nigel 85 break;
305    
306     case OP_TYPEEXACT:
307     case OP_TYPEUPTO:
308     case OP_TYPEMINUPTO:
309 nigel 93 case OP_TYPEPOSUPTO:
310 nigel 85 fprintf(f, " %s", OP_names[code[3]]);
311     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
312     {
313 nigel 87 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
314     extra = 2;
315 nigel 85 }
316     fprintf(f, "{");
317     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
318     fprintf(f, "%d}", GET2(code,1));
319     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
320 nigel 93 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
321 nigel 85 break;
322    
323     case OP_NOT:
324 nigel 93 c = code[1];
325     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
326 nigel 85 else fprintf(f, " [^\\x%02x]", c);
327     break;
328    
329     case OP_NOTSTAR:
330     case OP_NOTMINSTAR:
331 nigel 93 case OP_NOTPOSSTAR:
332 nigel 85 case OP_NOTPLUS:
333     case OP_NOTMINPLUS:
334 nigel 93 case OP_NOTPOSPLUS:
335 nigel 85 case OP_NOTQUERY:
336     case OP_NOTMINQUERY:
337 nigel 93 case OP_NOTPOSQUERY:
338     c = code[1];
339     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
340 nigel 85 else fprintf(f, " [^\\x%02x]", c);
341     fprintf(f, "%s", OP_names[*code]);
342     break;
343    
344     case OP_NOTEXACT:
345     case OP_NOTUPTO:
346     case OP_NOTMINUPTO:
347 nigel 93 case OP_NOTPOSUPTO:
348     c = code[3];
349     if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
350 nigel 85 else fprintf(f, " [^\\x%02x]{", c);
351     if (*code != OP_NOTEXACT) fprintf(f, "0,");
352     fprintf(f, "%d}", GET2(code,1));
353     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
354 nigel 93 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
355 nigel 85 break;
356    
357     case OP_RECURSE:
358 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
359 ph10 123 else fprintf(f, " ");
360 ph10 116 fprintf(f, "%s", OP_names[*code]);
361 nigel 85 break;
362    
363     case OP_REF:
364     fprintf(f, " \\%d", GET2(code,1));
365     ccode = code + _pcre_OP_lengths[*code];
366     goto CLASS_REF_REPEAT;
367    
368     case OP_CALLOUT:
369     fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
370     GET(code, 2 + LINK_SIZE));
371     break;
372    
373     case OP_PROP:
374     case OP_NOTPROP:
375 nigel 87 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
376 nigel 85 break;
377    
378     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
379     having this code always here, and it makes it less messy without all those
380     #ifdefs. */
381    
382     case OP_CLASS:
383     case OP_NCLASS:
384     case OP_XCLASS:
385     {
386     int i, min, max;
387     BOOL printmap;
388    
389     fprintf(f, " [");
390    
391     if (*code == OP_XCLASS)
392     {
393     extra = GET(code, 1);
394     ccode = code + LINK_SIZE + 1;
395     printmap = (*ccode & XCL_MAP) != 0;
396     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
397     }
398     else
399     {
400     printmap = TRUE;
401     ccode = code + 1;
402     }
403    
404     /* Print a bit map */
405    
406     if (printmap)
407     {
408     for (i = 0; i < 256; i++)
409     {
410     if ((ccode[i/8] & (1 << (i&7))) != 0)
411     {
412     int j;
413     for (j = i+1; j < 256; j++)
414     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
415     if (i == '-' || i == ']') fprintf(f, "\\");
416 nigel 93 if (PRINTABLE(i)) fprintf(f, "%c", i);
417     else fprintf(f, "\\x%02x", i);
418 nigel 85 if (--j > i)
419     {
420     if (j != i + 1) fprintf(f, "-");
421     if (j == '-' || j == ']') fprintf(f, "\\");
422 nigel 93 if (PRINTABLE(j)) fprintf(f, "%c", j);
423     else fprintf(f, "\\x%02x", j);
424 nigel 85 }
425     i = j;
426     }
427     }
428     ccode += 32;
429     }
430    
431     /* For an XCLASS there is always some additional data */
432    
433     if (*code == OP_XCLASS)
434     {
435     int ch;
436     while ((ch = *ccode++) != XCL_END)
437     {
438     if (ch == XCL_PROP)
439     {
440 nigel 87 int ptype = *ccode++;
441     int pvalue = *ccode++;
442     fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
443 nigel 85 }
444     else if (ch == XCL_NOTPROP)
445     {
446 nigel 87 int ptype = *ccode++;
447     int pvalue = *ccode++;
448     fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
449 nigel 85 }
450     else
451     {
452     ccode += 1 + print_char(f, ccode, TRUE);
453     if (ch == XCL_RANGE)
454     {
455     fprintf(f, "-");
456     ccode += 1 + print_char(f, ccode, TRUE);
457     }
458     }
459     }
460     }
461    
462     /* Indicate a non-UTF8 class which was created by negation */
463    
464     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
465    
466     /* Handle repeats after a class or a back reference */
467    
468     CLASS_REF_REPEAT:
469     switch(*ccode)
470     {
471     case OP_CRSTAR:
472     case OP_CRMINSTAR:
473     case OP_CRPLUS:
474     case OP_CRMINPLUS:
475     case OP_CRQUERY:
476     case OP_CRMINQUERY:
477     fprintf(f, "%s", OP_names[*ccode]);
478     extra += _pcre_OP_lengths[*ccode];
479     break;
480    
481     case OP_CRRANGE:
482     case OP_CRMINRANGE:
483     min = GET2(ccode,1);
484     max = GET2(ccode,3);
485     if (max == 0) fprintf(f, "{%d,}", min);
486     else fprintf(f, "{%d,%d}", min, max);
487     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
488     extra += _pcre_OP_lengths[*ccode];
489     break;
490 nigel 87
491     /* Do nothing if it's not a repeat; this code stops picky compilers
492     warning about the lack of a default code path. */
493    
494     default:
495     break;
496 nigel 85 }
497     }
498     break;
499    
500     /* Anything else is just an item with no data*/
501    
502     default:
503     fprintf(f, " %s", OP_names[*code]);
504     break;
505     }
506    
507     code += _pcre_OP_lengths[*code] + extra;
508     fprintf(f, "\n");
509     }
510     }
511    
512     /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12