/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 475 - (hide annotations) (download) (as text)
Sat Jan 2 18:21:30 2010 UTC (4 years, 11 months ago) by ph10
File MIME type: application/x-wais-source
File size: 14754 byte(s)
Tidies to allow easier embedded compilation; avoid (double) where possible.

1 nigel 85 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 475 Copyright (c) 1997-2010 University of Cambridge
10 nigel 85
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains a PCRE private debugging function for printing out the
42     internal form of a compiled regular expression, along with some supporting
43     local functions. This source file is used in two places:
44    
45     (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46 ph10 475 (PCRE_DEBUG defined in pcre_internal.h). It is not included in production
47     compiles.
48 nigel 85
49     (2) It is always #included by pcretest.c, which can be asked to print out a
50     compiled regex for debugging purposes. */
51    
52    
53 nigel 93 /* Macro that decides whether a character should be output as a literal or in
54     hexadecimal. We don't use isprint() because that can vary from system to system
55     (even without the use of locales) and we want the output always to be the same,
56     for testing purposes. This macro is used in pcretest as well as in this file. */
57    
58 ph10 391 #ifdef EBCDIC
59     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
60     #else
61 nigel 93 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
62 ph10 391 #endif
63 nigel 93
64     /* The table of operator names. */
65    
66 nigel 85 static const char *OP_names[] = { OP_NAME_LIST };
67    
68    
69 nigel 93
70 nigel 85 /*************************************************
71     * Print single- or multi-byte character *
72     *************************************************/
73    
74     static int
75     print_char(FILE *f, uschar *ptr, BOOL utf8)
76     {
77     int c = *ptr;
78    
79 ph10 107 #ifndef SUPPORT_UTF8
80     utf8 = utf8; /* Avoid compiler warning */
81     if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
82     return 0;
83    
84     #else
85 nigel 85 if (!utf8 || (c & 0xc0) != 0xc0)
86     {
87 nigel 93 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
88 nigel 85 return 0;
89     }
90     else
91     {
92     int i;
93     int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
94     int s = 6*a;
95     c = (c & _pcre_utf8_table3[a]) << s;
96     for (i = 1; i <= a; i++)
97     {
98     /* This is a check for malformed UTF-8; it should only occur if the sanity
99     check has been turned off. Rather than swallow random bytes, just stop if
100     we hit a bad one. Print it with \X instead of \x as an indication. */
101    
102     if ((ptr[i] & 0xc0) != 0x80)
103     {
104     fprintf(f, "\\X{%x}", c);
105     return i - 1;
106     }
107    
108     /* The byte is OK */
109    
110     s -= 6;
111     c |= (ptr[i] & 0x3f) << s;
112     }
113     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
114     return a;
115     }
116 ph10 111 #endif
117 nigel 85 }
118    
119    
120    
121     /*************************************************
122     * Find Unicode property name *
123     *************************************************/
124    
125     static const char *
126 nigel 87 get_ucpname(int ptype, int pvalue)
127 nigel 85 {
128     #ifdef SUPPORT_UCP
129     int i;
130 ph10 217 for (i = _pcre_utt_size - 1; i >= 0; i--)
131 nigel 85 {
132 nigel 87 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
133 nigel 85 }
134 ph10 240 return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
135 nigel 85 #else
136 nigel 91 /* It gets harder and harder to shut off unwanted compiler warnings. */
137     ptype = ptype * pvalue;
138     return (ptype == pvalue)? "??" : "??";
139 nigel 85 #endif
140     }
141    
142    
143    
144     /*************************************************
145     * Print compiled regex *
146     *************************************************/
147    
148     /* Make this function work for a regex with integers either byte order.
149 ph10 116 However, we assume that what we are passed is a compiled regex. The
150 ph10 123 print_lengths flag controls whether offsets and lengths of items are printed.
151 ph10 116 They can be turned off from pcretest so that automatic tests on bytecode can be
152     written that do not depend on the value of LINK_SIZE. */
153 nigel 85
154     static void
155 ph10 116 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
156 nigel 85 {
157     real_pcre *re = (real_pcre *)external_re;
158     uschar *codestart, *code;
159     BOOL utf8;
160    
161     unsigned int options = re->options;
162     int offset = re->name_table_offset;
163     int count = re->name_count;
164     int size = re->name_entry_size;
165    
166     if (re->magic_number != MAGIC_NUMBER)
167     {
168     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
169     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
170     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
171     options = ((options << 24) & 0xff000000) |
172     ((options << 8) & 0x00ff0000) |
173     ((options >> 8) & 0x0000ff00) |
174     ((options >> 24) & 0x000000ff);
175     }
176    
177     code = codestart = (uschar *)re + offset + count * size;
178     utf8 = (options & PCRE_UTF8) != 0;
179    
180     for(;;)
181     {
182     uschar *ccode;
183     int c;
184     int extra = 0;
185    
186 ph10 116 if (print_lengths)
187     fprintf(f, "%3d ", (int)(code - codestart));
188     else
189 ph10 123 fprintf(f, " ");
190 nigel 85
191     switch(*code)
192     {
193     case OP_END:
194     fprintf(f, " %s\n", OP_names[*code]);
195     fprintf(f, "------------------------------------------------------------------\n");
196     return;
197    
198     case OP_OPT:
199     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
200     break;
201    
202     case OP_CHAR:
203 nigel 91 fprintf(f, " ");
204     do
205 nigel 85 {
206 nigel 91 code++;
207     code += 1 + print_char(f, code, utf8);
208 nigel 85 }
209 nigel 91 while (*code == OP_CHAR);
210     fprintf(f, "\n");
211     continue;
212 nigel 85
213     case OP_CHARNC:
214 nigel 91 fprintf(f, " NC ");
215     do
216 nigel 85 {
217 nigel 91 code++;
218     code += 1 + print_char(f, code, utf8);
219 nigel 85 }
220 nigel 91 while (*code == OP_CHARNC);
221     fprintf(f, "\n");
222     continue;
223 nigel 85
224 nigel 93 case OP_CBRA:
225     case OP_SCBRA:
226 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
227 ph10 123 else fprintf(f, " ");
228 ph10 116 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
229 nigel 93 break;
230    
231     case OP_BRA:
232     case OP_SBRA:
233 nigel 85 case OP_KETRMAX:
234     case OP_KETRMIN:
235     case OP_ALT:
236     case OP_KET:
237     case OP_ASSERT:
238     case OP_ASSERT_NOT:
239     case OP_ASSERTBACK:
240     case OP_ASSERTBACK_NOT:
241     case OP_ONCE:
242     case OP_COND:
243 nigel 93 case OP_SCOND:
244 nigel 85 case OP_REVERSE:
245 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
246 ph10 123 else fprintf(f, " ");
247 ph10 116 fprintf(f, "%s", OP_names[*code]);
248 nigel 85 break;
249 ph10 461
250 ph10 447 case OP_CLOSE:
251     fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
252 ph10 461 break;
253 nigel 85
254 nigel 93 case OP_CREF:
255 ph10 461 case OP_NCREF:
256 nigel 93 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
257 nigel 85 break;
258    
259 nigel 93 case OP_RREF:
260     c = GET2(code, 1);
261     if (c == RREF_ANY)
262     fprintf(f, " Cond recurse any");
263 nigel 85 else
264 nigel 93 fprintf(f, " Cond recurse %d", c);
265 nigel 85 break;
266    
267 ph10 459 case OP_NRREF:
268     c = GET2(code, 1);
269     if (c == RREF_ANY)
270     fprintf(f, " Cond nrecurse any");
271     else
272     fprintf(f, " Cond nrecurse %d", c);
273     break;
274    
275 nigel 93 case OP_DEF:
276     fprintf(f, " Cond def");
277     break;
278    
279 nigel 85 case OP_STAR:
280     case OP_MINSTAR:
281 nigel 93 case OP_POSSTAR:
282 nigel 85 case OP_PLUS:
283     case OP_MINPLUS:
284 nigel 93 case OP_POSPLUS:
285 nigel 85 case OP_QUERY:
286     case OP_MINQUERY:
287 nigel 93 case OP_POSQUERY:
288 nigel 85 case OP_TYPESTAR:
289     case OP_TYPEMINSTAR:
290 nigel 93 case OP_TYPEPOSSTAR:
291 nigel 85 case OP_TYPEPLUS:
292     case OP_TYPEMINPLUS:
293 nigel 93 case OP_TYPEPOSPLUS:
294 nigel 85 case OP_TYPEQUERY:
295     case OP_TYPEMINQUERY:
296 nigel 93 case OP_TYPEPOSQUERY:
297 nigel 85 fprintf(f, " ");
298     if (*code >= OP_TYPESTAR)
299     {
300     fprintf(f, "%s", OP_names[code[1]]);
301     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
302     {
303 nigel 87 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
304     extra = 2;
305 nigel 85 }
306     }
307     else extra = print_char(f, code+1, utf8);
308     fprintf(f, "%s", OP_names[*code]);
309     break;
310    
311     case OP_EXACT:
312     case OP_UPTO:
313     case OP_MINUPTO:
314 nigel 93 case OP_POSUPTO:
315 nigel 85 fprintf(f, " ");
316     extra = print_char(f, code+3, utf8);
317     fprintf(f, "{");
318 nigel 93 if (*code != OP_EXACT) fprintf(f, "0,");
319 nigel 85 fprintf(f, "%d}", GET2(code,1));
320     if (*code == OP_MINUPTO) fprintf(f, "?");
321 nigel 93 else if (*code == OP_POSUPTO) fprintf(f, "+");
322 nigel 85 break;
323    
324     case OP_TYPEEXACT:
325     case OP_TYPEUPTO:
326     case OP_TYPEMINUPTO:
327 nigel 93 case OP_TYPEPOSUPTO:
328 nigel 85 fprintf(f, " %s", OP_names[code[3]]);
329     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
330     {
331 nigel 87 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
332     extra = 2;
333 nigel 85 }
334     fprintf(f, "{");
335     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
336     fprintf(f, "%d}", GET2(code,1));
337     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
338 nigel 93 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
339 nigel 85 break;
340    
341     case OP_NOT:
342 nigel 93 c = code[1];
343     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
344 nigel 85 else fprintf(f, " [^\\x%02x]", c);
345     break;
346    
347     case OP_NOTSTAR:
348     case OP_NOTMINSTAR:
349 nigel 93 case OP_NOTPOSSTAR:
350 nigel 85 case OP_NOTPLUS:
351     case OP_NOTMINPLUS:
352 nigel 93 case OP_NOTPOSPLUS:
353 nigel 85 case OP_NOTQUERY:
354     case OP_NOTMINQUERY:
355 nigel 93 case OP_NOTPOSQUERY:
356     c = code[1];
357     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
358 nigel 85 else fprintf(f, " [^\\x%02x]", c);
359     fprintf(f, "%s", OP_names[*code]);
360     break;
361    
362     case OP_NOTEXACT:
363     case OP_NOTUPTO:
364     case OP_NOTMINUPTO:
365 nigel 93 case OP_NOTPOSUPTO:
366     c = code[3];
367     if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
368 nigel 85 else fprintf(f, " [^\\x%02x]{", c);
369     if (*code != OP_NOTEXACT) fprintf(f, "0,");
370     fprintf(f, "%d}", GET2(code,1));
371     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
372 nigel 93 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
373 nigel 85 break;
374    
375     case OP_RECURSE:
376 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
377 ph10 123 else fprintf(f, " ");
378 ph10 116 fprintf(f, "%s", OP_names[*code]);
379 nigel 85 break;
380    
381     case OP_REF:
382     fprintf(f, " \\%d", GET2(code,1));
383     ccode = code + _pcre_OP_lengths[*code];
384     goto CLASS_REF_REPEAT;
385    
386     case OP_CALLOUT:
387     fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
388     GET(code, 2 + LINK_SIZE));
389     break;
390    
391     case OP_PROP:
392     case OP_NOTPROP:
393 nigel 87 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
394 nigel 85 break;
395    
396     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
397     having this code always here, and it makes it less messy without all those
398     #ifdefs. */
399    
400     case OP_CLASS:
401     case OP_NCLASS:
402     case OP_XCLASS:
403     {
404     int i, min, max;
405     BOOL printmap;
406    
407     fprintf(f, " [");
408    
409     if (*code == OP_XCLASS)
410     {
411     extra = GET(code, 1);
412     ccode = code + LINK_SIZE + 1;
413     printmap = (*ccode & XCL_MAP) != 0;
414     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
415     }
416     else
417     {
418     printmap = TRUE;
419     ccode = code + 1;
420     }
421    
422     /* Print a bit map */
423    
424     if (printmap)
425     {
426     for (i = 0; i < 256; i++)
427     {
428     if ((ccode[i/8] & (1 << (i&7))) != 0)
429     {
430     int j;
431     for (j = i+1; j < 256; j++)
432     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
433     if (i == '-' || i == ']') fprintf(f, "\\");
434 nigel 93 if (PRINTABLE(i)) fprintf(f, "%c", i);
435     else fprintf(f, "\\x%02x", i);
436 nigel 85 if (--j > i)
437     {
438     if (j != i + 1) fprintf(f, "-");
439     if (j == '-' || j == ']') fprintf(f, "\\");
440 nigel 93 if (PRINTABLE(j)) fprintf(f, "%c", j);
441     else fprintf(f, "\\x%02x", j);
442 nigel 85 }
443     i = j;
444     }
445     }
446     ccode += 32;
447     }
448    
449     /* For an XCLASS there is always some additional data */
450    
451     if (*code == OP_XCLASS)
452     {
453     int ch;
454     while ((ch = *ccode++) != XCL_END)
455     {
456     if (ch == XCL_PROP)
457     {
458 nigel 87 int ptype = *ccode++;
459     int pvalue = *ccode++;
460     fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
461 nigel 85 }
462     else if (ch == XCL_NOTPROP)
463     {
464 nigel 87 int ptype = *ccode++;
465     int pvalue = *ccode++;
466     fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
467 nigel 85 }
468     else
469     {
470     ccode += 1 + print_char(f, ccode, TRUE);
471     if (ch == XCL_RANGE)
472     {
473     fprintf(f, "-");
474     ccode += 1 + print_char(f, ccode, TRUE);
475     }
476     }
477     }
478     }
479    
480     /* Indicate a non-UTF8 class which was created by negation */
481    
482     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
483    
484     /* Handle repeats after a class or a back reference */
485    
486     CLASS_REF_REPEAT:
487     switch(*ccode)
488     {
489     case OP_CRSTAR:
490     case OP_CRMINSTAR:
491     case OP_CRPLUS:
492     case OP_CRMINPLUS:
493     case OP_CRQUERY:
494     case OP_CRMINQUERY:
495     fprintf(f, "%s", OP_names[*ccode]);
496     extra += _pcre_OP_lengths[*ccode];
497     break;
498    
499     case OP_CRRANGE:
500     case OP_CRMINRANGE:
501     min = GET2(ccode,1);
502     max = GET2(ccode,3);
503     if (max == 0) fprintf(f, "{%d,}", min);
504     else fprintf(f, "{%d,%d}", min, max);
505     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
506     extra += _pcre_OP_lengths[*ccode];
507     break;
508 nigel 87
509     /* Do nothing if it's not a repeat; this code stops picky compilers
510     warning about the lack of a default code path. */
511    
512     default:
513     break;
514 nigel 85 }
515     }
516     break;
517    
518     /* Anything else is just an item with no data*/
519    
520     default:
521     fprintf(f, " %s", OP_names[*code]);
522     break;
523     }
524    
525     code += _pcre_OP_lengths[*code] + extra;
526     fprintf(f, "\n");
527     }
528     }
529    
530     /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12