/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 447 - (hide annotations) (download) (as text)
Tue Sep 15 18:17:54 2009 UTC (3 years, 8 months ago) by ph10
File MIME type: application/x-wais-source
File size: 14563 byte(s)
Capture data when (*ACCEPT) is inside capturing parentheses.

1 nigel 85 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 391 Copyright (c) 1997-2009 University of Cambridge
10 nigel 85
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains a PCRE private debugging function for printing out the
42     internal form of a compiled regular expression, along with some supporting
43     local functions. This source file is used in two places:
44    
45     (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46     (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47    
48     (2) It is always #included by pcretest.c, which can be asked to print out a
49     compiled regex for debugging purposes. */
50    
51    
52 nigel 93 /* Macro that decides whether a character should be output as a literal or in
53     hexadecimal. We don't use isprint() because that can vary from system to system
54     (even without the use of locales) and we want the output always to be the same,
55     for testing purposes. This macro is used in pcretest as well as in this file. */
56    
57 ph10 391 #ifdef EBCDIC
58     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
59     #else
60 nigel 93 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
61 ph10 391 #endif
62 nigel 93
63     /* The table of operator names. */
64    
65 nigel 85 static const char *OP_names[] = { OP_NAME_LIST };
66    
67    
68 nigel 93
69 nigel 85 /*************************************************
70     * Print single- or multi-byte character *
71     *************************************************/
72    
73     static int
74     print_char(FILE *f, uschar *ptr, BOOL utf8)
75     {
76     int c = *ptr;
77    
78 ph10 107 #ifndef SUPPORT_UTF8
79     utf8 = utf8; /* Avoid compiler warning */
80     if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
81     return 0;
82    
83     #else
84 nigel 85 if (!utf8 || (c & 0xc0) != 0xc0)
85     {
86 nigel 93 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
87 nigel 85 return 0;
88     }
89     else
90     {
91     int i;
92     int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
93     int s = 6*a;
94     c = (c & _pcre_utf8_table3[a]) << s;
95     for (i = 1; i <= a; i++)
96     {
97     /* This is a check for malformed UTF-8; it should only occur if the sanity
98     check has been turned off. Rather than swallow random bytes, just stop if
99     we hit a bad one. Print it with \X instead of \x as an indication. */
100    
101     if ((ptr[i] & 0xc0) != 0x80)
102     {
103     fprintf(f, "\\X{%x}", c);
104     return i - 1;
105     }
106    
107     /* The byte is OK */
108    
109     s -= 6;
110     c |= (ptr[i] & 0x3f) << s;
111     }
112     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
113     return a;
114     }
115 ph10 111 #endif
116 nigel 85 }
117    
118    
119    
120     /*************************************************
121     * Find Unicode property name *
122     *************************************************/
123    
124     static const char *
125 nigel 87 get_ucpname(int ptype, int pvalue)
126 nigel 85 {
127     #ifdef SUPPORT_UCP
128     int i;
129 ph10 217 for (i = _pcre_utt_size - 1; i >= 0; i--)
130 nigel 85 {
131 nigel 87 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
132 nigel 85 }
133 ph10 240 return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
134 nigel 85 #else
135 nigel 91 /* It gets harder and harder to shut off unwanted compiler warnings. */
136     ptype = ptype * pvalue;
137     return (ptype == pvalue)? "??" : "??";
138 nigel 85 #endif
139     }
140    
141    
142    
143     /*************************************************
144     * Print compiled regex *
145     *************************************************/
146    
147     /* Make this function work for a regex with integers either byte order.
148 ph10 116 However, we assume that what we are passed is a compiled regex. The
149 ph10 123 print_lengths flag controls whether offsets and lengths of items are printed.
150 ph10 116 They can be turned off from pcretest so that automatic tests on bytecode can be
151     written that do not depend on the value of LINK_SIZE. */
152 nigel 85
153     static void
154 ph10 116 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
155 nigel 85 {
156     real_pcre *re = (real_pcre *)external_re;
157     uschar *codestart, *code;
158     BOOL utf8;
159    
160     unsigned int options = re->options;
161     int offset = re->name_table_offset;
162     int count = re->name_count;
163     int size = re->name_entry_size;
164    
165     if (re->magic_number != MAGIC_NUMBER)
166     {
167     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
168     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
169     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
170     options = ((options << 24) & 0xff000000) |
171     ((options << 8) & 0x00ff0000) |
172     ((options >> 8) & 0x0000ff00) |
173     ((options >> 24) & 0x000000ff);
174     }
175    
176     code = codestart = (uschar *)re + offset + count * size;
177     utf8 = (options & PCRE_UTF8) != 0;
178    
179     for(;;)
180     {
181     uschar *ccode;
182     int c;
183     int extra = 0;
184    
185 ph10 116 if (print_lengths)
186     fprintf(f, "%3d ", (int)(code - codestart));
187     else
188 ph10 123 fprintf(f, " ");
189 nigel 85
190     switch(*code)
191     {
192     case OP_END:
193     fprintf(f, " %s\n", OP_names[*code]);
194     fprintf(f, "------------------------------------------------------------------\n");
195     return;
196    
197     case OP_OPT:
198     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
199     break;
200    
201     case OP_CHAR:
202 nigel 91 fprintf(f, " ");
203     do
204 nigel 85 {
205 nigel 91 code++;
206     code += 1 + print_char(f, code, utf8);
207 nigel 85 }
208 nigel 91 while (*code == OP_CHAR);
209     fprintf(f, "\n");
210     continue;
211 nigel 85
212     case OP_CHARNC:
213 nigel 91 fprintf(f, " NC ");
214     do
215 nigel 85 {
216 nigel 91 code++;
217     code += 1 + print_char(f, code, utf8);
218 nigel 85 }
219 nigel 91 while (*code == OP_CHARNC);
220     fprintf(f, "\n");
221     continue;
222 nigel 85
223 nigel 93 case OP_CBRA:
224     case OP_SCBRA:
225 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
226 ph10 123 else fprintf(f, " ");
227 ph10 116 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
228 nigel 93 break;
229    
230     case OP_BRA:
231     case OP_SBRA:
232 nigel 85 case OP_KETRMAX:
233     case OP_KETRMIN:
234     case OP_ALT:
235     case OP_KET:
236     case OP_ASSERT:
237     case OP_ASSERT_NOT:
238     case OP_ASSERTBACK:
239     case OP_ASSERTBACK_NOT:
240     case OP_ONCE:
241     case OP_COND:
242 nigel 93 case OP_SCOND:
243 nigel 85 case OP_REVERSE:
244 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
245 ph10 123 else fprintf(f, " ");
246 ph10 116 fprintf(f, "%s", OP_names[*code]);
247 nigel 85 break;
248 ph10 447
249     case OP_CLOSE:
250     fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
251     break;
252 nigel 85
253 nigel 93 case OP_CREF:
254     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
255 nigel 85 break;
256    
257 nigel 93 case OP_RREF:
258     c = GET2(code, 1);
259     if (c == RREF_ANY)
260     fprintf(f, " Cond recurse any");
261 nigel 85 else
262 nigel 93 fprintf(f, " Cond recurse %d", c);
263 nigel 85 break;
264    
265 nigel 93 case OP_DEF:
266     fprintf(f, " Cond def");
267     break;
268    
269 nigel 85 case OP_STAR:
270     case OP_MINSTAR:
271 nigel 93 case OP_POSSTAR:
272 nigel 85 case OP_PLUS:
273     case OP_MINPLUS:
274 nigel 93 case OP_POSPLUS:
275 nigel 85 case OP_QUERY:
276     case OP_MINQUERY:
277 nigel 93 case OP_POSQUERY:
278 nigel 85 case OP_TYPESTAR:
279     case OP_TYPEMINSTAR:
280 nigel 93 case OP_TYPEPOSSTAR:
281 nigel 85 case OP_TYPEPLUS:
282     case OP_TYPEMINPLUS:
283 nigel 93 case OP_TYPEPOSPLUS:
284 nigel 85 case OP_TYPEQUERY:
285     case OP_TYPEMINQUERY:
286 nigel 93 case OP_TYPEPOSQUERY:
287 nigel 85 fprintf(f, " ");
288     if (*code >= OP_TYPESTAR)
289     {
290     fprintf(f, "%s", OP_names[code[1]]);
291     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
292     {
293 nigel 87 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
294     extra = 2;
295 nigel 85 }
296     }
297     else extra = print_char(f, code+1, utf8);
298     fprintf(f, "%s", OP_names[*code]);
299     break;
300    
301     case OP_EXACT:
302     case OP_UPTO:
303     case OP_MINUPTO:
304 nigel 93 case OP_POSUPTO:
305 nigel 85 fprintf(f, " ");
306     extra = print_char(f, code+3, utf8);
307     fprintf(f, "{");
308 nigel 93 if (*code != OP_EXACT) fprintf(f, "0,");
309 nigel 85 fprintf(f, "%d}", GET2(code,1));
310     if (*code == OP_MINUPTO) fprintf(f, "?");
311 nigel 93 else if (*code == OP_POSUPTO) fprintf(f, "+");
312 nigel 85 break;
313    
314     case OP_TYPEEXACT:
315     case OP_TYPEUPTO:
316     case OP_TYPEMINUPTO:
317 nigel 93 case OP_TYPEPOSUPTO:
318 nigel 85 fprintf(f, " %s", OP_names[code[3]]);
319     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
320     {
321 nigel 87 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
322     extra = 2;
323 nigel 85 }
324     fprintf(f, "{");
325     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
326     fprintf(f, "%d}", GET2(code,1));
327     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
328 nigel 93 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
329 nigel 85 break;
330    
331     case OP_NOT:
332 nigel 93 c = code[1];
333     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
334 nigel 85 else fprintf(f, " [^\\x%02x]", c);
335     break;
336    
337     case OP_NOTSTAR:
338     case OP_NOTMINSTAR:
339 nigel 93 case OP_NOTPOSSTAR:
340 nigel 85 case OP_NOTPLUS:
341     case OP_NOTMINPLUS:
342 nigel 93 case OP_NOTPOSPLUS:
343 nigel 85 case OP_NOTQUERY:
344     case OP_NOTMINQUERY:
345 nigel 93 case OP_NOTPOSQUERY:
346     c = code[1];
347     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
348 nigel 85 else fprintf(f, " [^\\x%02x]", c);
349     fprintf(f, "%s", OP_names[*code]);
350     break;
351    
352     case OP_NOTEXACT:
353     case OP_NOTUPTO:
354     case OP_NOTMINUPTO:
355 nigel 93 case OP_NOTPOSUPTO:
356     c = code[3];
357     if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
358 nigel 85 else fprintf(f, " [^\\x%02x]{", c);
359     if (*code != OP_NOTEXACT) fprintf(f, "0,");
360     fprintf(f, "%d}", GET2(code,1));
361     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
362 nigel 93 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
363 nigel 85 break;
364    
365     case OP_RECURSE:
366 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
367 ph10 123 else fprintf(f, " ");
368 ph10 116 fprintf(f, "%s", OP_names[*code]);
369 nigel 85 break;
370    
371     case OP_REF:
372     fprintf(f, " \\%d", GET2(code,1));
373     ccode = code + _pcre_OP_lengths[*code];
374     goto CLASS_REF_REPEAT;
375    
376     case OP_CALLOUT:
377     fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
378     GET(code, 2 + LINK_SIZE));
379     break;
380    
381     case OP_PROP:
382     case OP_NOTPROP:
383 nigel 87 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
384 nigel 85 break;
385    
386     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
387     having this code always here, and it makes it less messy without all those
388     #ifdefs. */
389    
390     case OP_CLASS:
391     case OP_NCLASS:
392     case OP_XCLASS:
393     {
394     int i, min, max;
395     BOOL printmap;
396    
397     fprintf(f, " [");
398    
399     if (*code == OP_XCLASS)
400     {
401     extra = GET(code, 1);
402     ccode = code + LINK_SIZE + 1;
403     printmap = (*ccode & XCL_MAP) != 0;
404     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
405     }
406     else
407     {
408     printmap = TRUE;
409     ccode = code + 1;
410     }
411    
412     /* Print a bit map */
413    
414     if (printmap)
415     {
416     for (i = 0; i < 256; i++)
417     {
418     if ((ccode[i/8] & (1 << (i&7))) != 0)
419     {
420     int j;
421     for (j = i+1; j < 256; j++)
422     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
423     if (i == '-' || i == ']') fprintf(f, "\\");
424 nigel 93 if (PRINTABLE(i)) fprintf(f, "%c", i);
425     else fprintf(f, "\\x%02x", i);
426 nigel 85 if (--j > i)
427     {
428     if (j != i + 1) fprintf(f, "-");
429     if (j == '-' || j == ']') fprintf(f, "\\");
430 nigel 93 if (PRINTABLE(j)) fprintf(f, "%c", j);
431     else fprintf(f, "\\x%02x", j);
432 nigel 85 }
433     i = j;
434     }
435     }
436     ccode += 32;
437     }
438    
439     /* For an XCLASS there is always some additional data */
440    
441     if (*code == OP_XCLASS)
442     {
443     int ch;
444     while ((ch = *ccode++) != XCL_END)
445     {
446     if (ch == XCL_PROP)
447     {
448 nigel 87 int ptype = *ccode++;
449     int pvalue = *ccode++;
450     fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
451 nigel 85 }
452     else if (ch == XCL_NOTPROP)
453     {
454 nigel 87 int ptype = *ccode++;
455     int pvalue = *ccode++;
456     fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
457 nigel 85 }
458     else
459     {
460     ccode += 1 + print_char(f, ccode, TRUE);
461     if (ch == XCL_RANGE)
462     {
463     fprintf(f, "-");
464     ccode += 1 + print_char(f, ccode, TRUE);
465     }
466     }
467     }
468     }
469    
470     /* Indicate a non-UTF8 class which was created by negation */
471    
472     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
473    
474     /* Handle repeats after a class or a back reference */
475    
476     CLASS_REF_REPEAT:
477     switch(*ccode)
478     {
479     case OP_CRSTAR:
480     case OP_CRMINSTAR:
481     case OP_CRPLUS:
482     case OP_CRMINPLUS:
483     case OP_CRQUERY:
484     case OP_CRMINQUERY:
485     fprintf(f, "%s", OP_names[*ccode]);
486     extra += _pcre_OP_lengths[*ccode];
487     break;
488    
489     case OP_CRRANGE:
490     case OP_CRMINRANGE:
491     min = GET2(ccode,1);
492     max = GET2(ccode,3);
493     if (max == 0) fprintf(f, "{%d,}", min);
494     else fprintf(f, "{%d,%d}", min, max);
495     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
496     extra += _pcre_OP_lengths[*ccode];
497     break;
498 nigel 87
499     /* Do nothing if it's not a repeat; this code stops picky compilers
500     warning about the lack of a default code path. */
501    
502     default:
503     break;
504 nigel 85 }
505     }
506     break;
507    
508     /* Anything else is just an item with no data*/
509    
510     default:
511     fprintf(f, " %s", OP_names[*code]);
512     break;
513     }
514    
515     code += _pcre_OP_lengths[*code] + extra;
516     fprintf(f, "\n");
517     }
518     }
519    
520     /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12