/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 510 - (hide annotations) (download) (as text)
Sat Mar 27 17:45:29 2010 UTC (4 years ago) by ph10
File MIME type: application/x-wais-source
File size: 15926 byte(s)
Add support for *MARK and names for *PRUNE, *SKIP, *THEN.

1 nigel 85 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 475 Copyright (c) 1997-2010 University of Cambridge
10 nigel 85
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains a PCRE private debugging function for printing out the
42     internal form of a compiled regular expression, along with some supporting
43     local functions. This source file is used in two places:
44    
45     (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46 ph10 475 (PCRE_DEBUG defined in pcre_internal.h). It is not included in production
47     compiles.
48 nigel 85
49     (2) It is always #included by pcretest.c, which can be asked to print out a
50     compiled regex for debugging purposes. */
51    
52    
53 nigel 93 /* Macro that decides whether a character should be output as a literal or in
54     hexadecimal. We don't use isprint() because that can vary from system to system
55     (even without the use of locales) and we want the output always to be the same,
56     for testing purposes. This macro is used in pcretest as well as in this file. */
57    
58 ph10 391 #ifdef EBCDIC
59     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
60     #else
61 nigel 93 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
62 ph10 391 #endif
63 nigel 93
64     /* The table of operator names. */
65    
66 nigel 85 static const char *OP_names[] = { OP_NAME_LIST };
67    
68    
69 nigel 93
70 nigel 85 /*************************************************
71     * Print single- or multi-byte character *
72     *************************************************/
73    
74     static int
75     print_char(FILE *f, uschar *ptr, BOOL utf8)
76     {
77     int c = *ptr;
78    
79 ph10 107 #ifndef SUPPORT_UTF8
80     utf8 = utf8; /* Avoid compiler warning */
81     if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
82     return 0;
83    
84     #else
85 nigel 85 if (!utf8 || (c & 0xc0) != 0xc0)
86     {
87 nigel 93 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
88 nigel 85 return 0;
89     }
90     else
91     {
92     int i;
93     int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
94     int s = 6*a;
95     c = (c & _pcre_utf8_table3[a]) << s;
96     for (i = 1; i <= a; i++)
97     {
98     /* This is a check for malformed UTF-8; it should only occur if the sanity
99     check has been turned off. Rather than swallow random bytes, just stop if
100     we hit a bad one. Print it with \X instead of \x as an indication. */
101    
102     if ((ptr[i] & 0xc0) != 0x80)
103     {
104     fprintf(f, "\\X{%x}", c);
105     return i - 1;
106     }
107    
108     /* The byte is OK */
109    
110     s -= 6;
111     c |= (ptr[i] & 0x3f) << s;
112     }
113     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
114     return a;
115     }
116 ph10 111 #endif
117 nigel 85 }
118    
119    
120    
121     /*************************************************
122     * Find Unicode property name *
123     *************************************************/
124    
125     static const char *
126 nigel 87 get_ucpname(int ptype, int pvalue)
127 nigel 85 {
128     #ifdef SUPPORT_UCP
129     int i;
130 ph10 217 for (i = _pcre_utt_size - 1; i >= 0; i--)
131 nigel 85 {
132 nigel 87 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
133 nigel 85 }
134 ph10 240 return (i >= 0)? _pcre_utt_names + _pcre_utt[i].name_offset : "??";
135 nigel 85 #else
136 nigel 91 /* It gets harder and harder to shut off unwanted compiler warnings. */
137     ptype = ptype * pvalue;
138     return (ptype == pvalue)? "??" : "??";
139 nigel 85 #endif
140     }
141    
142    
143    
144     /*************************************************
145     * Print compiled regex *
146     *************************************************/
147    
148     /* Make this function work for a regex with integers either byte order.
149 ph10 116 However, we assume that what we are passed is a compiled regex. The
150 ph10 123 print_lengths flag controls whether offsets and lengths of items are printed.
151 ph10 116 They can be turned off from pcretest so that automatic tests on bytecode can be
152     written that do not depend on the value of LINK_SIZE. */
153 nigel 85
154     static void
155 ph10 116 pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
156 nigel 85 {
157     real_pcre *re = (real_pcre *)external_re;
158     uschar *codestart, *code;
159     BOOL utf8;
160    
161     unsigned int options = re->options;
162     int offset = re->name_table_offset;
163     int count = re->name_count;
164     int size = re->name_entry_size;
165    
166     if (re->magic_number != MAGIC_NUMBER)
167     {
168     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
169     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
170     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
171     options = ((options << 24) & 0xff000000) |
172     ((options << 8) & 0x00ff0000) |
173     ((options >> 8) & 0x0000ff00) |
174     ((options >> 24) & 0x000000ff);
175     }
176    
177     code = codestart = (uschar *)re + offset + count * size;
178     utf8 = (options & PCRE_UTF8) != 0;
179    
180     for(;;)
181     {
182     uschar *ccode;
183     int c;
184     int extra = 0;
185    
186 ph10 116 if (print_lengths)
187     fprintf(f, "%3d ", (int)(code - codestart));
188     else
189 ph10 123 fprintf(f, " ");
190 nigel 85
191     switch(*code)
192     {
193 ph10 498 /* ========================================================================== */
194     /* These cases are never obeyed. This is a fudge that causes a compile-
195     time error if the vectors OP_names or _pcre_OP_lengths, which are indexed
196     by opcode, are not the correct length. It seems to be the only way to do
197     such a check at compile time, as the sizeof() operator does not work in
198     the C preprocessor. We do this while compiling pcretest, because that
199     #includes pcre_tables.c, which holds _pcre_OP_lengths. We can't do this
200     when building pcre_compile.c with PCRE_DEBUG set, because it doesn't then
201     know the size of _pcre_OP_lengths. */
202    
203     #ifdef COMPILING_PCRETEST
204     case OP_TABLE_LENGTH:
205     case OP_TABLE_LENGTH +
206     ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
207     (sizeof(_pcre_OP_lengths) == OP_TABLE_LENGTH)):
208     break;
209     #endif
210     /* ========================================================================== */
211    
212 nigel 85 case OP_END:
213     fprintf(f, " %s\n", OP_names[*code]);
214     fprintf(f, "------------------------------------------------------------------\n");
215     return;
216    
217     case OP_OPT:
218     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
219     break;
220    
221     case OP_CHAR:
222 nigel 91 fprintf(f, " ");
223     do
224 nigel 85 {
225 nigel 91 code++;
226     code += 1 + print_char(f, code, utf8);
227 nigel 85 }
228 nigel 91 while (*code == OP_CHAR);
229     fprintf(f, "\n");
230     continue;
231 nigel 85
232     case OP_CHARNC:
233 nigel 91 fprintf(f, " NC ");
234     do
235 nigel 85 {
236 nigel 91 code++;
237     code += 1 + print_char(f, code, utf8);
238 nigel 85 }
239 nigel 91 while (*code == OP_CHARNC);
240     fprintf(f, "\n");
241     continue;
242 nigel 85
243 nigel 93 case OP_CBRA:
244     case OP_SCBRA:
245 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
246 ph10 123 else fprintf(f, " ");
247 ph10 116 fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));
248 nigel 93 break;
249    
250     case OP_BRA:
251     case OP_SBRA:
252 nigel 85 case OP_KETRMAX:
253     case OP_KETRMIN:
254     case OP_ALT:
255     case OP_KET:
256     case OP_ASSERT:
257     case OP_ASSERT_NOT:
258     case OP_ASSERTBACK:
259     case OP_ASSERTBACK_NOT:
260     case OP_ONCE:
261     case OP_COND:
262 nigel 93 case OP_SCOND:
263 nigel 85 case OP_REVERSE:
264 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
265 ph10 123 else fprintf(f, " ");
266 ph10 116 fprintf(f, "%s", OP_names[*code]);
267 nigel 85 break;
268 ph10 461
269 ph10 447 case OP_CLOSE:
270     fprintf(f, " %s %d", OP_names[*code], GET2(code, 1));
271 ph10 461 break;
272 nigel 85
273 nigel 93 case OP_CREF:
274 ph10 461 case OP_NCREF:
275 nigel 93 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
276 nigel 85 break;
277    
278 nigel 93 case OP_RREF:
279     c = GET2(code, 1);
280     if (c == RREF_ANY)
281     fprintf(f, " Cond recurse any");
282 nigel 85 else
283 nigel 93 fprintf(f, " Cond recurse %d", c);
284 nigel 85 break;
285    
286 ph10 459 case OP_NRREF:
287     c = GET2(code, 1);
288     if (c == RREF_ANY)
289     fprintf(f, " Cond nrecurse any");
290     else
291     fprintf(f, " Cond nrecurse %d", c);
292     break;
293    
294 nigel 93 case OP_DEF:
295     fprintf(f, " Cond def");
296     break;
297    
298 nigel 85 case OP_STAR:
299     case OP_MINSTAR:
300 nigel 93 case OP_POSSTAR:
301 nigel 85 case OP_PLUS:
302     case OP_MINPLUS:
303 nigel 93 case OP_POSPLUS:
304 nigel 85 case OP_QUERY:
305     case OP_MINQUERY:
306 nigel 93 case OP_POSQUERY:
307 nigel 85 case OP_TYPESTAR:
308     case OP_TYPEMINSTAR:
309 nigel 93 case OP_TYPEPOSSTAR:
310 nigel 85 case OP_TYPEPLUS:
311     case OP_TYPEMINPLUS:
312 nigel 93 case OP_TYPEPOSPLUS:
313 nigel 85 case OP_TYPEQUERY:
314     case OP_TYPEMINQUERY:
315 nigel 93 case OP_TYPEPOSQUERY:
316 nigel 85 fprintf(f, " ");
317     if (*code >= OP_TYPESTAR)
318     {
319     fprintf(f, "%s", OP_names[code[1]]);
320     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
321     {
322 nigel 87 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
323     extra = 2;
324 nigel 85 }
325     }
326     else extra = print_char(f, code+1, utf8);
327     fprintf(f, "%s", OP_names[*code]);
328     break;
329    
330     case OP_EXACT:
331     case OP_UPTO:
332     case OP_MINUPTO:
333 nigel 93 case OP_POSUPTO:
334 nigel 85 fprintf(f, " ");
335     extra = print_char(f, code+3, utf8);
336     fprintf(f, "{");
337 nigel 93 if (*code != OP_EXACT) fprintf(f, "0,");
338 nigel 85 fprintf(f, "%d}", GET2(code,1));
339     if (*code == OP_MINUPTO) fprintf(f, "?");
340 nigel 93 else if (*code == OP_POSUPTO) fprintf(f, "+");
341 nigel 85 break;
342    
343     case OP_TYPEEXACT:
344     case OP_TYPEUPTO:
345     case OP_TYPEMINUPTO:
346 nigel 93 case OP_TYPEPOSUPTO:
347 nigel 85 fprintf(f, " %s", OP_names[code[3]]);
348     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
349     {
350 nigel 87 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
351     extra = 2;
352 nigel 85 }
353     fprintf(f, "{");
354     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
355     fprintf(f, "%d}", GET2(code,1));
356     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
357 nigel 93 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
358 nigel 85 break;
359    
360     case OP_NOT:
361 nigel 93 c = code[1];
362     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
363 nigel 85 else fprintf(f, " [^\\x%02x]", c);
364     break;
365    
366     case OP_NOTSTAR:
367     case OP_NOTMINSTAR:
368 nigel 93 case OP_NOTPOSSTAR:
369 nigel 85 case OP_NOTPLUS:
370     case OP_NOTMINPLUS:
371 nigel 93 case OP_NOTPOSPLUS:
372 nigel 85 case OP_NOTQUERY:
373     case OP_NOTMINQUERY:
374 nigel 93 case OP_NOTPOSQUERY:
375     c = code[1];
376     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
377 nigel 85 else fprintf(f, " [^\\x%02x]", c);
378     fprintf(f, "%s", OP_names[*code]);
379     break;
380    
381     case OP_NOTEXACT:
382     case OP_NOTUPTO:
383     case OP_NOTMINUPTO:
384 nigel 93 case OP_NOTPOSUPTO:
385     c = code[3];
386     if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
387 nigel 85 else fprintf(f, " [^\\x%02x]{", c);
388     if (*code != OP_NOTEXACT) fprintf(f, "0,");
389     fprintf(f, "%d}", GET2(code,1));
390     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
391 nigel 93 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
392 nigel 85 break;
393    
394     case OP_RECURSE:
395 ph10 116 if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
396 ph10 123 else fprintf(f, " ");
397 ph10 116 fprintf(f, "%s", OP_names[*code]);
398 nigel 85 break;
399    
400     case OP_REF:
401     fprintf(f, " \\%d", GET2(code,1));
402     ccode = code + _pcre_OP_lengths[*code];
403     goto CLASS_REF_REPEAT;
404    
405     case OP_CALLOUT:
406     fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
407     GET(code, 2 + LINK_SIZE));
408     break;
409    
410     case OP_PROP:
411     case OP_NOTPROP:
412 nigel 87 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
413 nigel 85 break;
414    
415     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
416     having this code always here, and it makes it less messy without all those
417     #ifdefs. */
418    
419     case OP_CLASS:
420     case OP_NCLASS:
421     case OP_XCLASS:
422     {
423     int i, min, max;
424     BOOL printmap;
425    
426     fprintf(f, " [");
427    
428     if (*code == OP_XCLASS)
429     {
430     extra = GET(code, 1);
431     ccode = code + LINK_SIZE + 1;
432     printmap = (*ccode & XCL_MAP) != 0;
433     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
434     }
435     else
436     {
437     printmap = TRUE;
438     ccode = code + 1;
439     }
440    
441     /* Print a bit map */
442    
443     if (printmap)
444     {
445     for (i = 0; i < 256; i++)
446     {
447     if ((ccode[i/8] & (1 << (i&7))) != 0)
448     {
449     int j;
450     for (j = i+1; j < 256; j++)
451     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
452     if (i == '-' || i == ']') fprintf(f, "\\");
453 nigel 93 if (PRINTABLE(i)) fprintf(f, "%c", i);
454     else fprintf(f, "\\x%02x", i);
455 nigel 85 if (--j > i)
456     {
457     if (j != i + 1) fprintf(f, "-");
458     if (j == '-' || j == ']') fprintf(f, "\\");
459 nigel 93 if (PRINTABLE(j)) fprintf(f, "%c", j);
460     else fprintf(f, "\\x%02x", j);
461 nigel 85 }
462     i = j;
463     }
464     }
465     ccode += 32;
466     }
467    
468     /* For an XCLASS there is always some additional data */
469    
470     if (*code == OP_XCLASS)
471     {
472     int ch;
473     while ((ch = *ccode++) != XCL_END)
474     {
475     if (ch == XCL_PROP)
476     {
477 nigel 87 int ptype = *ccode++;
478     int pvalue = *ccode++;
479     fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
480 nigel 85 }
481     else if (ch == XCL_NOTPROP)
482     {
483 nigel 87 int ptype = *ccode++;
484     int pvalue = *ccode++;
485     fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
486 nigel 85 }
487     else
488     {
489     ccode += 1 + print_char(f, ccode, TRUE);
490     if (ch == XCL_RANGE)
491     {
492     fprintf(f, "-");
493     ccode += 1 + print_char(f, ccode, TRUE);
494     }
495     }
496     }
497     }
498    
499     /* Indicate a non-UTF8 class which was created by negation */
500    
501     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
502    
503     /* Handle repeats after a class or a back reference */
504    
505     CLASS_REF_REPEAT:
506     switch(*ccode)
507     {
508     case OP_CRSTAR:
509     case OP_CRMINSTAR:
510     case OP_CRPLUS:
511     case OP_CRMINPLUS:
512     case OP_CRQUERY:
513     case OP_CRMINQUERY:
514     fprintf(f, "%s", OP_names[*ccode]);
515     extra += _pcre_OP_lengths[*ccode];
516     break;
517    
518     case OP_CRRANGE:
519     case OP_CRMINRANGE:
520     min = GET2(ccode,1);
521     max = GET2(ccode,3);
522     if (max == 0) fprintf(f, "{%d,}", min);
523     else fprintf(f, "{%d,%d}", min, max);
524     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
525     extra += _pcre_OP_lengths[*ccode];
526     break;
527 nigel 87
528     /* Do nothing if it's not a repeat; this code stops picky compilers
529     warning about the lack of a default code path. */
530    
531     default:
532     break;
533 nigel 85 }
534     }
535     break;
536 ph10 510
537     case OP_MARK:
538     case OP_PRUNE_ARG:
539     case OP_SKIP_ARG:
540     case OP_THEN_ARG:
541     fprintf(f, " %s %s", OP_names[*code], code + 2);
542     extra += code[1];
543     break;
544 nigel 85
545     /* Anything else is just an item with no data*/
546    
547     default:
548     fprintf(f, " %s", OP_names[*code]);
549     break;
550     }
551    
552     code += _pcre_OP_lengths[*code] + extra;
553     fprintf(f, "\n");
554     }
555     }
556    
557     /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12