/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 96 - (hide annotations) (download) (as text)
Fri Mar 2 13:10:43 2007 UTC (7 years, 9 months ago) by nigel
File MIME type: application/x-wais-source
File size: 13710 byte(s)
 r6896@hex:  nm | 2007-03-02 13:09:14 +0000
 Added EOL and keywork properties throughout

1 nigel 85 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9     Copyright (c) 1997-2005 University of Cambridge
10    
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains a PCRE private debugging function for printing out the
42     internal form of a compiled regular expression, along with some supporting
43     local functions. This source file is used in two places:
44    
45     (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46     (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47    
48     (2) It is always #included by pcretest.c, which can be asked to print out a
49     compiled regex for debugging purposes. */
50    
51    
52 nigel 93 /* Macro that decides whether a character should be output as a literal or in
53     hexadecimal. We don't use isprint() because that can vary from system to system
54     (even without the use of locales) and we want the output always to be the same,
55     for testing purposes. This macro is used in pcretest as well as in this file. */
56    
57     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
58    
59     /* The table of operator names. */
60    
61 nigel 85 static const char *OP_names[] = { OP_NAME_LIST };
62    
63    
64 nigel 93
65 nigel 85 /*************************************************
66     * Print single- or multi-byte character *
67     *************************************************/
68    
69     static int
70     print_char(FILE *f, uschar *ptr, BOOL utf8)
71     {
72     int c = *ptr;
73    
74     if (!utf8 || (c & 0xc0) != 0xc0)
75     {
76 nigel 93 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
77 nigel 85 return 0;
78     }
79     else
80     {
81     int i;
82     int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
83     int s = 6*a;
84     c = (c & _pcre_utf8_table3[a]) << s;
85     for (i = 1; i <= a; i++)
86     {
87     /* This is a check for malformed UTF-8; it should only occur if the sanity
88     check has been turned off. Rather than swallow random bytes, just stop if
89     we hit a bad one. Print it with \X instead of \x as an indication. */
90    
91     if ((ptr[i] & 0xc0) != 0x80)
92     {
93     fprintf(f, "\\X{%x}", c);
94     return i - 1;
95     }
96    
97     /* The byte is OK */
98    
99     s -= 6;
100     c |= (ptr[i] & 0x3f) << s;
101     }
102     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
103     return a;
104     }
105     }
106    
107    
108    
109     /*************************************************
110     * Find Unicode property name *
111     *************************************************/
112    
113     static const char *
114 nigel 87 get_ucpname(int ptype, int pvalue)
115 nigel 85 {
116     #ifdef SUPPORT_UCP
117     int i;
118     for (i = _pcre_utt_size; i >= 0; i--)
119     {
120 nigel 87 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
121 nigel 85 }
122     return (i >= 0)? _pcre_utt[i].name : "??";
123     #else
124 nigel 91 /* It gets harder and harder to shut off unwanted compiler warnings. */
125     ptype = ptype * pvalue;
126     return (ptype == pvalue)? "??" : "??";
127 nigel 85 #endif
128     }
129    
130    
131    
132     /*************************************************
133     * Print compiled regex *
134     *************************************************/
135    
136     /* Make this function work for a regex with integers either byte order.
137     However, we assume that what we are passed is a compiled regex. */
138    
139     static void
140     pcre_printint(pcre *external_re, FILE *f)
141     {
142     real_pcre *re = (real_pcre *)external_re;
143     uschar *codestart, *code;
144     BOOL utf8;
145    
146     unsigned int options = re->options;
147     int offset = re->name_table_offset;
148     int count = re->name_count;
149     int size = re->name_entry_size;
150    
151     if (re->magic_number != MAGIC_NUMBER)
152     {
153     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
154     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
155     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
156     options = ((options << 24) & 0xff000000) |
157     ((options << 8) & 0x00ff0000) |
158     ((options >> 8) & 0x0000ff00) |
159     ((options >> 24) & 0x000000ff);
160     }
161    
162     code = codestart = (uschar *)re + offset + count * size;
163     utf8 = (options & PCRE_UTF8) != 0;
164    
165     for(;;)
166     {
167     uschar *ccode;
168     int c;
169     int extra = 0;
170    
171     fprintf(f, "%3d ", (int)(code - codestart));
172    
173     switch(*code)
174     {
175     case OP_END:
176     fprintf(f, " %s\n", OP_names[*code]);
177     fprintf(f, "------------------------------------------------------------------\n");
178     return;
179    
180     case OP_OPT:
181     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
182     break;
183    
184     case OP_CHAR:
185 nigel 91 fprintf(f, " ");
186     do
187 nigel 85 {
188 nigel 91 code++;
189     code += 1 + print_char(f, code, utf8);
190 nigel 85 }
191 nigel 91 while (*code == OP_CHAR);
192     fprintf(f, "\n");
193     continue;
194 nigel 85
195     case OP_CHARNC:
196 nigel 91 fprintf(f, " NC ");
197     do
198 nigel 85 {
199 nigel 91 code++;
200     code += 1 + print_char(f, code, utf8);
201 nigel 85 }
202 nigel 91 while (*code == OP_CHARNC);
203     fprintf(f, "\n");
204     continue;
205 nigel 85
206 nigel 93 case OP_CBRA:
207     case OP_SCBRA:
208     fprintf(f, "%3d %s %d", GET(code, 1), OP_names[*code],
209     GET2(code, 1+LINK_SIZE));
210     break;
211    
212     case OP_BRA:
213     case OP_SBRA:
214 nigel 85 case OP_KETRMAX:
215     case OP_KETRMIN:
216     case OP_ALT:
217     case OP_KET:
218     case OP_ASSERT:
219     case OP_ASSERT_NOT:
220     case OP_ASSERTBACK:
221     case OP_ASSERTBACK_NOT:
222     case OP_ONCE:
223     case OP_COND:
224 nigel 93 case OP_SCOND:
225 nigel 85 case OP_REVERSE:
226     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
227     break;
228    
229 nigel 93 case OP_CREF:
230     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
231 nigel 85 break;
232    
233 nigel 93 case OP_RREF:
234     c = GET2(code, 1);
235     if (c == RREF_ANY)
236     fprintf(f, " Cond recurse any");
237 nigel 85 else
238 nigel 93 fprintf(f, " Cond recurse %d", c);
239 nigel 85 break;
240    
241 nigel 93 case OP_DEF:
242     fprintf(f, " Cond def");
243     break;
244    
245 nigel 85 case OP_STAR:
246     case OP_MINSTAR:
247 nigel 93 case OP_POSSTAR:
248 nigel 85 case OP_PLUS:
249     case OP_MINPLUS:
250 nigel 93 case OP_POSPLUS:
251 nigel 85 case OP_QUERY:
252     case OP_MINQUERY:
253 nigel 93 case OP_POSQUERY:
254 nigel 85 case OP_TYPESTAR:
255     case OP_TYPEMINSTAR:
256 nigel 93 case OP_TYPEPOSSTAR:
257 nigel 85 case OP_TYPEPLUS:
258     case OP_TYPEMINPLUS:
259 nigel 93 case OP_TYPEPOSPLUS:
260 nigel 85 case OP_TYPEQUERY:
261     case OP_TYPEMINQUERY:
262 nigel 93 case OP_TYPEPOSQUERY:
263 nigel 85 fprintf(f, " ");
264     if (*code >= OP_TYPESTAR)
265     {
266     fprintf(f, "%s", OP_names[code[1]]);
267     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
268     {
269 nigel 87 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
270     extra = 2;
271 nigel 85 }
272     }
273     else extra = print_char(f, code+1, utf8);
274     fprintf(f, "%s", OP_names[*code]);
275     break;
276    
277     case OP_EXACT:
278     case OP_UPTO:
279     case OP_MINUPTO:
280 nigel 93 case OP_POSUPTO:
281 nigel 85 fprintf(f, " ");
282     extra = print_char(f, code+3, utf8);
283     fprintf(f, "{");
284 nigel 93 if (*code != OP_EXACT) fprintf(f, "0,");
285 nigel 85 fprintf(f, "%d}", GET2(code,1));
286     if (*code == OP_MINUPTO) fprintf(f, "?");
287 nigel 93 else if (*code == OP_POSUPTO) fprintf(f, "+");
288 nigel 85 break;
289    
290     case OP_TYPEEXACT:
291     case OP_TYPEUPTO:
292     case OP_TYPEMINUPTO:
293 nigel 93 case OP_TYPEPOSUPTO:
294 nigel 85 fprintf(f, " %s", OP_names[code[3]]);
295     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
296     {
297 nigel 87 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
298     extra = 2;
299 nigel 85 }
300     fprintf(f, "{");
301     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
302     fprintf(f, "%d}", GET2(code,1));
303     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
304 nigel 93 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
305 nigel 85 break;
306    
307     case OP_NOT:
308 nigel 93 c = code[1];
309     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
310 nigel 85 else fprintf(f, " [^\\x%02x]", c);
311     break;
312    
313     case OP_NOTSTAR:
314     case OP_NOTMINSTAR:
315 nigel 93 case OP_NOTPOSSTAR:
316 nigel 85 case OP_NOTPLUS:
317     case OP_NOTMINPLUS:
318 nigel 93 case OP_NOTPOSPLUS:
319 nigel 85 case OP_NOTQUERY:
320     case OP_NOTMINQUERY:
321 nigel 93 case OP_NOTPOSQUERY:
322     c = code[1];
323     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
324 nigel 85 else fprintf(f, " [^\\x%02x]", c);
325     fprintf(f, "%s", OP_names[*code]);
326     break;
327    
328     case OP_NOTEXACT:
329     case OP_NOTUPTO:
330     case OP_NOTMINUPTO:
331 nigel 93 case OP_NOTPOSUPTO:
332     c = code[3];
333     if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
334 nigel 85 else fprintf(f, " [^\\x%02x]{", c);
335     if (*code != OP_NOTEXACT) fprintf(f, "0,");
336     fprintf(f, "%d}", GET2(code,1));
337     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
338 nigel 93 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
339 nigel 85 break;
340    
341     case OP_RECURSE:
342     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
343     break;
344    
345     case OP_REF:
346     fprintf(f, " \\%d", GET2(code,1));
347     ccode = code + _pcre_OP_lengths[*code];
348     goto CLASS_REF_REPEAT;
349    
350     case OP_CALLOUT:
351     fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
352     GET(code, 2 + LINK_SIZE));
353     break;
354    
355     case OP_PROP:
356     case OP_NOTPROP:
357 nigel 87 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
358 nigel 85 break;
359    
360     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
361     having this code always here, and it makes it less messy without all those
362     #ifdefs. */
363    
364     case OP_CLASS:
365     case OP_NCLASS:
366     case OP_XCLASS:
367     {
368     int i, min, max;
369     BOOL printmap;
370    
371     fprintf(f, " [");
372    
373     if (*code == OP_XCLASS)
374     {
375     extra = GET(code, 1);
376     ccode = code + LINK_SIZE + 1;
377     printmap = (*ccode & XCL_MAP) != 0;
378     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
379     }
380     else
381     {
382     printmap = TRUE;
383     ccode = code + 1;
384     }
385    
386     /* Print a bit map */
387    
388     if (printmap)
389     {
390     for (i = 0; i < 256; i++)
391     {
392     if ((ccode[i/8] & (1 << (i&7))) != 0)
393     {
394     int j;
395     for (j = i+1; j < 256; j++)
396     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
397     if (i == '-' || i == ']') fprintf(f, "\\");
398 nigel 93 if (PRINTABLE(i)) fprintf(f, "%c", i);
399     else fprintf(f, "\\x%02x", i);
400 nigel 85 if (--j > i)
401     {
402     if (j != i + 1) fprintf(f, "-");
403     if (j == '-' || j == ']') fprintf(f, "\\");
404 nigel 93 if (PRINTABLE(j)) fprintf(f, "%c", j);
405     else fprintf(f, "\\x%02x", j);
406 nigel 85 }
407     i = j;
408     }
409     }
410     ccode += 32;
411     }
412    
413     /* For an XCLASS there is always some additional data */
414    
415     if (*code == OP_XCLASS)
416     {
417     int ch;
418     while ((ch = *ccode++) != XCL_END)
419     {
420     if (ch == XCL_PROP)
421     {
422 nigel 87 int ptype = *ccode++;
423     int pvalue = *ccode++;
424     fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
425 nigel 85 }
426     else if (ch == XCL_NOTPROP)
427     {
428 nigel 87 int ptype = *ccode++;
429     int pvalue = *ccode++;
430     fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
431 nigel 85 }
432     else
433     {
434     ccode += 1 + print_char(f, ccode, TRUE);
435     if (ch == XCL_RANGE)
436     {
437     fprintf(f, "-");
438     ccode += 1 + print_char(f, ccode, TRUE);
439     }
440     }
441     }
442     }
443    
444     /* Indicate a non-UTF8 class which was created by negation */
445    
446     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
447    
448     /* Handle repeats after a class or a back reference */
449    
450     CLASS_REF_REPEAT:
451     switch(*ccode)
452     {
453     case OP_CRSTAR:
454     case OP_CRMINSTAR:
455     case OP_CRPLUS:
456     case OP_CRMINPLUS:
457     case OP_CRQUERY:
458     case OP_CRMINQUERY:
459     fprintf(f, "%s", OP_names[*ccode]);
460     extra += _pcre_OP_lengths[*ccode];
461     break;
462    
463     case OP_CRRANGE:
464     case OP_CRMINRANGE:
465     min = GET2(ccode,1);
466     max = GET2(ccode,3);
467     if (max == 0) fprintf(f, "{%d,}", min);
468     else fprintf(f, "{%d,%d}", min, max);
469     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
470     extra += _pcre_OP_lengths[*ccode];
471     break;
472 nigel 87
473     /* Do nothing if it's not a repeat; this code stops picky compilers
474     warning about the lack of a default code path. */
475    
476     default:
477     break;
478 nigel 85 }
479     }
480     break;
481    
482     /* Anything else is just an item with no data*/
483    
484     default:
485     fprintf(f, " %s", OP_names[*code]);
486     break;
487     }
488    
489     code += _pcre_OP_lengths[*code] + extra;
490     fprintf(f, "\n");
491     }
492     }
493    
494     /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12