/[pcre]/code/trunk/pcre_printint.src
ViewVC logotype

Contents of /code/trunk/pcre_printint.src

Parent Directory Parent Directory | Revision Log Revision Log


Revision 111 - (hide annotations) (download) (as text)
Thu Mar 8 16:53:09 2007 UTC (7 years, 8 months ago) by ph10
File MIME type: application/x-wais-source
File size: 13868 byte(s)
Create the PrepareRelease script to process the documentation and create the 
.generic files for distribution, also to remove trailing spaces. Update a lot 
more of the build-time documentation. Arrange for PrepareRelease and its 
sub-scripts to be distributed.

1 nigel 85 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9     Copyright (c) 1997-2005 University of Cambridge
10    
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains a PCRE private debugging function for printing out the
42     internal form of a compiled regular expression, along with some supporting
43     local functions. This source file is used in two places:
44    
45     (1) It is #included by pcre_compile.c when it is compiled in debugging mode
46     (DEBUG defined in pcre_internal.h). It is not included in production compiles.
47    
48     (2) It is always #included by pcretest.c, which can be asked to print out a
49     compiled regex for debugging purposes. */
50    
51    
52 nigel 93 /* Macro that decides whether a character should be output as a literal or in
53     hexadecimal. We don't use isprint() because that can vary from system to system
54     (even without the use of locales) and we want the output always to be the same,
55     for testing purposes. This macro is used in pcretest as well as in this file. */
56    
57     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
58    
59     /* The table of operator names. */
60    
61 nigel 85 static const char *OP_names[] = { OP_NAME_LIST };
62    
63    
64 nigel 93
65 nigel 85 /*************************************************
66     * Print single- or multi-byte character *
67     *************************************************/
68    
69     static int
70     print_char(FILE *f, uschar *ptr, BOOL utf8)
71     {
72     int c = *ptr;
73    
74 ph10 107 #ifndef SUPPORT_UTF8
75     utf8 = utf8; /* Avoid compiler warning */
76     if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
77     return 0;
78    
79     #else
80 nigel 85 if (!utf8 || (c & 0xc0) != 0xc0)
81     {
82 nigel 93 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
83 nigel 85 return 0;
84     }
85     else
86     {
87     int i;
88     int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
89     int s = 6*a;
90     c = (c & _pcre_utf8_table3[a]) << s;
91     for (i = 1; i <= a; i++)
92     {
93     /* This is a check for malformed UTF-8; it should only occur if the sanity
94     check has been turned off. Rather than swallow random bytes, just stop if
95     we hit a bad one. Print it with \X instead of \x as an indication. */
96    
97     if ((ptr[i] & 0xc0) != 0x80)
98     {
99     fprintf(f, "\\X{%x}", c);
100     return i - 1;
101     }
102    
103     /* The byte is OK */
104    
105     s -= 6;
106     c |= (ptr[i] & 0x3f) << s;
107     }
108     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
109     return a;
110     }
111 ph10 111 #endif
112 nigel 85 }
113    
114    
115    
116     /*************************************************
117     * Find Unicode property name *
118     *************************************************/
119    
120     static const char *
121 nigel 87 get_ucpname(int ptype, int pvalue)
122 nigel 85 {
123     #ifdef SUPPORT_UCP
124     int i;
125     for (i = _pcre_utt_size; i >= 0; i--)
126     {
127 nigel 87 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
128 nigel 85 }
129     return (i >= 0)? _pcre_utt[i].name : "??";
130     #else
131 nigel 91 /* It gets harder and harder to shut off unwanted compiler warnings. */
132     ptype = ptype * pvalue;
133     return (ptype == pvalue)? "??" : "??";
134 nigel 85 #endif
135     }
136    
137    
138    
139     /*************************************************
140     * Print compiled regex *
141     *************************************************/
142    
143     /* Make this function work for a regex with integers either byte order.
144     However, we assume that what we are passed is a compiled regex. */
145    
146     static void
147     pcre_printint(pcre *external_re, FILE *f)
148     {
149     real_pcre *re = (real_pcre *)external_re;
150     uschar *codestart, *code;
151     BOOL utf8;
152    
153     unsigned int options = re->options;
154     int offset = re->name_table_offset;
155     int count = re->name_count;
156     int size = re->name_entry_size;
157    
158     if (re->magic_number != MAGIC_NUMBER)
159     {
160     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
161     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
162     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
163     options = ((options << 24) & 0xff000000) |
164     ((options << 8) & 0x00ff0000) |
165     ((options >> 8) & 0x0000ff00) |
166     ((options >> 24) & 0x000000ff);
167     }
168    
169     code = codestart = (uschar *)re + offset + count * size;
170     utf8 = (options & PCRE_UTF8) != 0;
171    
172     for(;;)
173     {
174     uschar *ccode;
175     int c;
176     int extra = 0;
177    
178     fprintf(f, "%3d ", (int)(code - codestart));
179    
180     switch(*code)
181     {
182     case OP_END:
183     fprintf(f, " %s\n", OP_names[*code]);
184     fprintf(f, "------------------------------------------------------------------\n");
185     return;
186    
187     case OP_OPT:
188     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
189     break;
190    
191     case OP_CHAR:
192 nigel 91 fprintf(f, " ");
193     do
194 nigel 85 {
195 nigel 91 code++;
196     code += 1 + print_char(f, code, utf8);
197 nigel 85 }
198 nigel 91 while (*code == OP_CHAR);
199     fprintf(f, "\n");
200     continue;
201 nigel 85
202     case OP_CHARNC:
203 nigel 91 fprintf(f, " NC ");
204     do
205 nigel 85 {
206 nigel 91 code++;
207     code += 1 + print_char(f, code, utf8);
208 nigel 85 }
209 nigel 91 while (*code == OP_CHARNC);
210     fprintf(f, "\n");
211     continue;
212 nigel 85
213 nigel 93 case OP_CBRA:
214     case OP_SCBRA:
215     fprintf(f, "%3d %s %d", GET(code, 1), OP_names[*code],
216     GET2(code, 1+LINK_SIZE));
217     break;
218    
219     case OP_BRA:
220     case OP_SBRA:
221 nigel 85 case OP_KETRMAX:
222     case OP_KETRMIN:
223     case OP_ALT:
224     case OP_KET:
225     case OP_ASSERT:
226     case OP_ASSERT_NOT:
227     case OP_ASSERTBACK:
228     case OP_ASSERTBACK_NOT:
229     case OP_ONCE:
230     case OP_COND:
231 nigel 93 case OP_SCOND:
232 nigel 85 case OP_REVERSE:
233     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
234     break;
235    
236 nigel 93 case OP_CREF:
237     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
238 nigel 85 break;
239    
240 nigel 93 case OP_RREF:
241     c = GET2(code, 1);
242     if (c == RREF_ANY)
243     fprintf(f, " Cond recurse any");
244 nigel 85 else
245 nigel 93 fprintf(f, " Cond recurse %d", c);
246 nigel 85 break;
247    
248 nigel 93 case OP_DEF:
249     fprintf(f, " Cond def");
250     break;
251    
252 nigel 85 case OP_STAR:
253     case OP_MINSTAR:
254 nigel 93 case OP_POSSTAR:
255 nigel 85 case OP_PLUS:
256     case OP_MINPLUS:
257 nigel 93 case OP_POSPLUS:
258 nigel 85 case OP_QUERY:
259     case OP_MINQUERY:
260 nigel 93 case OP_POSQUERY:
261 nigel 85 case OP_TYPESTAR:
262     case OP_TYPEMINSTAR:
263 nigel 93 case OP_TYPEPOSSTAR:
264 nigel 85 case OP_TYPEPLUS:
265     case OP_TYPEMINPLUS:
266 nigel 93 case OP_TYPEPOSPLUS:
267 nigel 85 case OP_TYPEQUERY:
268     case OP_TYPEMINQUERY:
269 nigel 93 case OP_TYPEPOSQUERY:
270 nigel 85 fprintf(f, " ");
271     if (*code >= OP_TYPESTAR)
272     {
273     fprintf(f, "%s", OP_names[code[1]]);
274     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
275     {
276 nigel 87 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
277     extra = 2;
278 nigel 85 }
279     }
280     else extra = print_char(f, code+1, utf8);
281     fprintf(f, "%s", OP_names[*code]);
282     break;
283    
284     case OP_EXACT:
285     case OP_UPTO:
286     case OP_MINUPTO:
287 nigel 93 case OP_POSUPTO:
288 nigel 85 fprintf(f, " ");
289     extra = print_char(f, code+3, utf8);
290     fprintf(f, "{");
291 nigel 93 if (*code != OP_EXACT) fprintf(f, "0,");
292 nigel 85 fprintf(f, "%d}", GET2(code,1));
293     if (*code == OP_MINUPTO) fprintf(f, "?");
294 nigel 93 else if (*code == OP_POSUPTO) fprintf(f, "+");
295 nigel 85 break;
296    
297     case OP_TYPEEXACT:
298     case OP_TYPEUPTO:
299     case OP_TYPEMINUPTO:
300 nigel 93 case OP_TYPEPOSUPTO:
301 nigel 85 fprintf(f, " %s", OP_names[code[3]]);
302     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
303     {
304 nigel 87 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
305     extra = 2;
306 nigel 85 }
307     fprintf(f, "{");
308     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
309     fprintf(f, "%d}", GET2(code,1));
310     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
311 nigel 93 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
312 nigel 85 break;
313    
314     case OP_NOT:
315 nigel 93 c = code[1];
316     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
317 nigel 85 else fprintf(f, " [^\\x%02x]", c);
318     break;
319    
320     case OP_NOTSTAR:
321     case OP_NOTMINSTAR:
322 nigel 93 case OP_NOTPOSSTAR:
323 nigel 85 case OP_NOTPLUS:
324     case OP_NOTMINPLUS:
325 nigel 93 case OP_NOTPOSPLUS:
326 nigel 85 case OP_NOTQUERY:
327     case OP_NOTMINQUERY:
328 nigel 93 case OP_NOTPOSQUERY:
329     c = code[1];
330     if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
331 nigel 85 else fprintf(f, " [^\\x%02x]", c);
332     fprintf(f, "%s", OP_names[*code]);
333     break;
334    
335     case OP_NOTEXACT:
336     case OP_NOTUPTO:
337     case OP_NOTMINUPTO:
338 nigel 93 case OP_NOTPOSUPTO:
339     c = code[3];
340     if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
341 nigel 85 else fprintf(f, " [^\\x%02x]{", c);
342     if (*code != OP_NOTEXACT) fprintf(f, "0,");
343     fprintf(f, "%d}", GET2(code,1));
344     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
345 nigel 93 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
346 nigel 85 break;
347    
348     case OP_RECURSE:
349     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
350     break;
351    
352     case OP_REF:
353     fprintf(f, " \\%d", GET2(code,1));
354     ccode = code + _pcre_OP_lengths[*code];
355     goto CLASS_REF_REPEAT;
356    
357     case OP_CALLOUT:
358     fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
359     GET(code, 2 + LINK_SIZE));
360     break;
361    
362     case OP_PROP:
363     case OP_NOTPROP:
364 nigel 87 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
365 nigel 85 break;
366    
367     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
368     having this code always here, and it makes it less messy without all those
369     #ifdefs. */
370    
371     case OP_CLASS:
372     case OP_NCLASS:
373     case OP_XCLASS:
374     {
375     int i, min, max;
376     BOOL printmap;
377    
378     fprintf(f, " [");
379    
380     if (*code == OP_XCLASS)
381     {
382     extra = GET(code, 1);
383     ccode = code + LINK_SIZE + 1;
384     printmap = (*ccode & XCL_MAP) != 0;
385     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
386     }
387     else
388     {
389     printmap = TRUE;
390     ccode = code + 1;
391     }
392    
393     /* Print a bit map */
394    
395     if (printmap)
396     {
397     for (i = 0; i < 256; i++)
398     {
399     if ((ccode[i/8] & (1 << (i&7))) != 0)
400     {
401     int j;
402     for (j = i+1; j < 256; j++)
403     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
404     if (i == '-' || i == ']') fprintf(f, "\\");
405 nigel 93 if (PRINTABLE(i)) fprintf(f, "%c", i);
406     else fprintf(f, "\\x%02x", i);
407 nigel 85 if (--j > i)
408     {
409     if (j != i + 1) fprintf(f, "-");
410     if (j == '-' || j == ']') fprintf(f, "\\");
411 nigel 93 if (PRINTABLE(j)) fprintf(f, "%c", j);
412     else fprintf(f, "\\x%02x", j);
413 nigel 85 }
414     i = j;
415     }
416     }
417     ccode += 32;
418     }
419    
420     /* For an XCLASS there is always some additional data */
421    
422     if (*code == OP_XCLASS)
423     {
424     int ch;
425     while ((ch = *ccode++) != XCL_END)
426     {
427     if (ch == XCL_PROP)
428     {
429 nigel 87 int ptype = *ccode++;
430     int pvalue = *ccode++;
431     fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
432 nigel 85 }
433     else if (ch == XCL_NOTPROP)
434     {
435 nigel 87 int ptype = *ccode++;
436     int pvalue = *ccode++;
437     fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
438 nigel 85 }
439     else
440     {
441     ccode += 1 + print_char(f, ccode, TRUE);
442     if (ch == XCL_RANGE)
443     {
444     fprintf(f, "-");
445     ccode += 1 + print_char(f, ccode, TRUE);
446     }
447     }
448     }
449     }
450    
451     /* Indicate a non-UTF8 class which was created by negation */
452    
453     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
454    
455     /* Handle repeats after a class or a back reference */
456    
457     CLASS_REF_REPEAT:
458     switch(*ccode)
459     {
460     case OP_CRSTAR:
461     case OP_CRMINSTAR:
462     case OP_CRPLUS:
463     case OP_CRMINPLUS:
464     case OP_CRQUERY:
465     case OP_CRMINQUERY:
466     fprintf(f, "%s", OP_names[*ccode]);
467     extra += _pcre_OP_lengths[*ccode];
468     break;
469    
470     case OP_CRRANGE:
471     case OP_CRMINRANGE:
472     min = GET2(ccode,1);
473     max = GET2(ccode,3);
474     if (max == 0) fprintf(f, "{%d,}", min);
475     else fprintf(f, "{%d,%d}", min, max);
476     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
477     extra += _pcre_OP_lengths[*ccode];
478     break;
479 nigel 87
480     /* Do nothing if it's not a repeat; this code stops picky compilers
481     warning about the lack of a default code path. */
482    
483     default:
484     break;
485 nigel 85 }
486     }
487     break;
488    
489     /* Anything else is just an item with no data*/
490    
491     default:
492     fprintf(f, " %s", OP_names[*code]);
493     break;
494     }
495    
496     code += _pcre_OP_lengths[*code] + extra;
497     fprintf(f, "\n");
498     }
499     }
500    
501     /* End of pcre_printint.src */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12