/[pcre]/code/trunk/printint.c
ViewVC logotype

Contents of /code/trunk/printint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 75 - (hide annotations) (download)
Sat Feb 24 21:40:37 2007 UTC (7 years, 6 months ago) by nigel
File MIME type: text/plain
File size: 12606 byte(s)
Load pcre-5.0 into code/trunk.

1 nigel 63 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /*
6     This is a library of functions to support regular expressions whose syntax
7     and semantics are as close as possible to those of the Perl 5 language. See
8     the file Tech.Notes for some information on the internals.
9    
10     Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12 nigel 75 Copyright (c) 1997-2004 University of Cambridge
13 nigel 63
14     -----------------------------------------------------------------------------
15 nigel 75 Redistribution and use in source and binary forms, with or without
16     modification, are permitted provided that the following conditions are met:
17 nigel 63
18 nigel 75 * Redistributions of source code must retain the above copyright notice,
19     this list of conditions and the following disclaimer.
20 nigel 63
21 nigel 75 * Redistributions in binary form must reproduce the above copyright
22     notice, this list of conditions and the following disclaimer in the
23     documentation and/or other materials provided with the distribution.
24 nigel 63
25 nigel 75 * Neither the name of the University of Cambridge nor the names of its
26     contributors may be used to endorse or promote products derived from
27     this software without specific prior written permission.
28 nigel 63
29 nigel 75 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39     POSSIBILITY OF SUCH DAMAGE.
40 nigel 63 -----------------------------------------------------------------------------
41     */
42    
43    
44     /* This module contains a debugging function for printing out the internal form
45     of a compiled regular expression. It is kept in a separate file so that it can
46     be #included both in the pcretest program, and in the library itself when
47     compiled with the debugging switch. */
48    
49    
50     static const char *OP_names[] = { OP_NAME_LIST };
51    
52    
53     /*************************************************
54     * Print single- or multi-byte character *
55     *************************************************/
56    
57     /* These tables are actually copies of ones in pcre.c. If we compile the
58     library with debugging, they are included twice, but that isn't really a
59     problem - compiling with debugging is pretty rare and these are very small. */
60    
61 nigel 73 static const int utf8_t3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
62 nigel 63
63 nigel 73 static const uschar utf8_t4[] = {
64 nigel 63 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
65     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
66     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
67     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
68    
69     static int
70     print_char(FILE *f, uschar *ptr, BOOL utf8)
71     {
72     int c = *ptr;
73    
74     if (!utf8 || (c & 0xc0) != 0xc0)
75     {
76     if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
77     return 0;
78     }
79     else
80     {
81     int i;
82     int a = utf8_t4[c & 0x3f]; /* Number of additional bytes */
83     int s = 6*a;
84     c = (c & utf8_t3[a]) << s;
85     for (i = 1; i <= a; i++)
86     {
87 nigel 75 /* This is a check for malformed UTF-8; it should only occur if the sanity
88     check has been turned off. Rather than swallow random bytes, just stop if
89     we hit a bad one. Print it with \X instead of \x as an indication. */
90    
91     if ((ptr[i] & 0xc0) != 0x80)
92     {
93     fprintf(f, "\\X{%x}", c);
94     return i - 1;
95     }
96    
97     /* The byte is OK */
98    
99 nigel 63 s -= 6;
100     c |= (ptr[i] & 0x3f) << s;
101     }
102     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
103     return a;
104     }
105     }
106    
107    
108    
109    
110     /*************************************************
111 nigel 75 * Find Unicode property name *
112     *************************************************/
113    
114     static const char *
115     get_ucpname(int property)
116     {
117     int i;
118     for (i = sizeof(utt)/sizeof(ucp_type_table); i >= 0; i--)
119     {
120     if (property == utt[i].value) break;
121     }
122     return (i >= 0)? utt[i].name : "??";
123     }
124    
125    
126    
127     /*************************************************
128 nigel 63 * Print compiled regex *
129     *************************************************/
130    
131 nigel 75 /* Make this function work for a regex with integers either byte order.
132     However, we assume that what we are passed is a compiled regex. */
133    
134 nigel 63 static void
135     print_internals(pcre *external_re, FILE *f)
136     {
137     real_pcre *re = (real_pcre *)external_re;
138 nigel 75 uschar *codestart, *code;
139     BOOL utf8;
140 nigel 63
141 nigel 75 unsigned int options = re->options;
142     int offset = re->name_table_offset;
143     int count = re->name_count;
144     int size = re->name_entry_size;
145    
146     if (re->magic_number != MAGIC_NUMBER)
147     {
148     offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
149     count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
150     size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
151     options = ((options << 24) & 0xff000000) |
152     ((options << 8) & 0x00ff0000) |
153     ((options >> 8) & 0x0000ff00) |
154     ((options >> 24) & 0x000000ff);
155     }
156    
157     code = codestart = (uschar *)re + offset + count * size;
158     utf8 = (options & PCRE_UTF8) != 0;
159    
160 nigel 63 for(;;)
161     {
162     uschar *ccode;
163     int c;
164     int extra = 0;
165    
166     fprintf(f, "%3d ", code - codestart);
167    
168     if (*code >= OP_BRA)
169     {
170     if (*code - OP_BRA > EXTRACT_BASIC_MAX)
171     fprintf(f, "%3d Bra extra\n", GET(code, 1));
172     else
173     fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
174     code += OP_lengths[OP_BRA];
175     continue;
176     }
177    
178     switch(*code)
179     {
180     case OP_END:
181     fprintf(f, " %s\n", OP_names[*code]);
182     fprintf(f, "------------------------------------------------------------------\n");
183     return;
184    
185     case OP_OPT:
186     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
187     break;
188    
189 nigel 75 case OP_CHAR:
190 nigel 63 {
191 nigel 75 fprintf(f, " ");
192     do
193 nigel 63 {
194 nigel 75 code++;
195     code += 1 + print_char(f, code, utf8);
196 nigel 63 }
197 nigel 75 while (*code == OP_CHAR);
198     fprintf(f, "\n");
199     continue;
200 nigel 63 }
201     break;
202    
203 nigel 75 case OP_CHARNC:
204     {
205     fprintf(f, " NC ");
206     do
207     {
208     code++;
209     code += 1 + print_char(f, code, utf8);
210     }
211     while (*code == OP_CHARNC);
212     fprintf(f, "\n");
213     continue;
214     }
215     break;
216    
217 nigel 63 case OP_KETRMAX:
218     case OP_KETRMIN:
219     case OP_ALT:
220     case OP_KET:
221     case OP_ASSERT:
222     case OP_ASSERT_NOT:
223     case OP_ASSERTBACK:
224     case OP_ASSERTBACK_NOT:
225     case OP_ONCE:
226     case OP_COND:
227     case OP_REVERSE:
228     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
229     break;
230    
231     case OP_BRANUMBER:
232     printf("%3d %s", GET2(code, 1), OP_names[*code]);
233     break;
234    
235     case OP_CREF:
236     if (GET2(code, 1) == CREF_RECURSE)
237     fprintf(f, " Cond recurse");
238     else
239     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
240     break;
241    
242     case OP_STAR:
243     case OP_MINSTAR:
244     case OP_PLUS:
245     case OP_MINPLUS:
246     case OP_QUERY:
247     case OP_MINQUERY:
248     case OP_TYPESTAR:
249     case OP_TYPEMINSTAR:
250     case OP_TYPEPLUS:
251     case OP_TYPEMINPLUS:
252     case OP_TYPEQUERY:
253     case OP_TYPEMINQUERY:
254     fprintf(f, " ");
255 nigel 75 if (*code >= OP_TYPESTAR)
256     {
257     fprintf(f, "%s", OP_names[code[1]]);
258     if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
259     {
260     fprintf(f, " %s ", get_ucpname(code[2]));
261     extra = 1;
262     }
263     }
264     else extra = print_char(f, code+1, utf8);
265 nigel 63 fprintf(f, "%s", OP_names[*code]);
266     break;
267    
268     case OP_EXACT:
269     case OP_UPTO:
270     case OP_MINUPTO:
271     fprintf(f, " ");
272     extra = print_char(f, code+3, utf8);
273     fprintf(f, "{");
274     if (*code != OP_EXACT) fprintf(f, ",");
275     fprintf(f, "%d}", GET2(code,1));
276     if (*code == OP_MINUPTO) fprintf(f, "?");
277     break;
278    
279     case OP_TYPEEXACT:
280     case OP_TYPEUPTO:
281     case OP_TYPEMINUPTO:
282 nigel 75 fprintf(f, " %s", OP_names[code[3]]);
283     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
284     {
285     fprintf(f, " %s ", get_ucpname(code[4]));
286     extra = 1;
287     }
288     fprintf(f, "{");
289 nigel 63 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
290     fprintf(f, "%d}", GET2(code,1));
291     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
292     break;
293    
294     case OP_NOT:
295     if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
296     else fprintf(f, " [^\\x%02x]", c);
297     break;
298    
299     case OP_NOTSTAR:
300     case OP_NOTMINSTAR:
301     case OP_NOTPLUS:
302     case OP_NOTMINPLUS:
303     case OP_NOTQUERY:
304     case OP_NOTMINQUERY:
305     if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
306     else fprintf(f, " [^\\x%02x]", c);
307     fprintf(f, "%s", OP_names[*code]);
308     break;
309    
310     case OP_NOTEXACT:
311     case OP_NOTUPTO:
312     case OP_NOTMINUPTO:
313     if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
314     else fprintf(f, " [^\\x%02x]{", c);
315     if (*code != OP_NOTEXACT) fprintf(f, ",");
316     fprintf(f, "%d}", GET2(code,1));
317     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
318     break;
319    
320     case OP_RECURSE:
321     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
322     break;
323    
324     case OP_REF:
325     fprintf(f, " \\%d", GET2(code,1));
326     ccode = code + OP_lengths[*code];
327     goto CLASS_REF_REPEAT;
328    
329     case OP_CALLOUT:
330 nigel 75 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
331     GET(code, 2 + LINK_SIZE));
332 nigel 63 break;
333    
334 nigel 75 case OP_PROP:
335     case OP_NOTPROP:
336     fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
337     break;
338    
339 nigel 63 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
340     having this code always here, and it makes it less messy without all those
341     #ifdefs. */
342    
343     case OP_CLASS:
344     case OP_NCLASS:
345     case OP_XCLASS:
346     {
347     int i, min, max;
348     BOOL printmap;
349    
350     fprintf(f, " [");
351    
352     if (*code == OP_XCLASS)
353     {
354     extra = GET(code, 1);
355     ccode = code + LINK_SIZE + 1;
356     printmap = (*ccode & XCL_MAP) != 0;
357     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
358     }
359     else
360     {
361     printmap = TRUE;
362     ccode = code + 1;
363     }
364    
365     /* Print a bit map */
366    
367     if (printmap)
368     {
369     for (i = 0; i < 256; i++)
370     {
371     if ((ccode[i/8] & (1 << (i&7))) != 0)
372     {
373     int j;
374     for (j = i+1; j < 256; j++)
375     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
376     if (i == '-' || i == ']') fprintf(f, "\\");
377     if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
378     if (--j > i)
379     {
380 nigel 75 if (j != i + 1) fprintf(f, "-");
381 nigel 63 if (j == '-' || j == ']') fprintf(f, "\\");
382     if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
383     }
384     i = j;
385     }
386     }
387     ccode += 32;
388     }
389    
390     /* For an XCLASS there is always some additional data */
391    
392     if (*code == OP_XCLASS)
393     {
394 nigel 67 int ch;
395     while ((ch = *ccode++) != XCL_END)
396 nigel 63 {
397 nigel 75 if (ch == XCL_PROP)
398 nigel 63 {
399 nigel 75 fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
400     }
401     else if (ch == XCL_NOTPROP)
402     {
403     fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
404     }
405     else
406     {
407 nigel 63 ccode += 1 + print_char(f, ccode, TRUE);
408 nigel 75 if (ch == XCL_RANGE)
409     {
410     fprintf(f, "-");
411     ccode += 1 + print_char(f, ccode, TRUE);
412     }
413 nigel 63 }
414     }
415     }
416    
417     /* Indicate a non-UTF8 class which was created by negation */
418    
419     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
420    
421     /* Handle repeats after a class or a back reference */
422    
423     CLASS_REF_REPEAT:
424     switch(*ccode)
425     {
426     case OP_CRSTAR:
427     case OP_CRMINSTAR:
428     case OP_CRPLUS:
429     case OP_CRMINPLUS:
430     case OP_CRQUERY:
431     case OP_CRMINQUERY:
432     fprintf(f, "%s", OP_names[*ccode]);
433 nigel 75 extra += OP_lengths[*ccode];
434 nigel 63 break;
435    
436     case OP_CRRANGE:
437     case OP_CRMINRANGE:
438     min = GET2(ccode,1);
439     max = GET2(ccode,3);
440     if (max == 0) fprintf(f, "{%d,}", min);
441     else fprintf(f, "{%d,%d}", min, max);
442     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
443 nigel 75 extra += OP_lengths[*ccode];
444 nigel 63 break;
445     }
446     }
447     break;
448    
449     /* Anything else is just an item with no data*/
450    
451     default:
452     fprintf(f, " %s", OP_names[*code]);
453     break;
454     }
455    
456     code += OP_lengths[*code] + extra;
457     fprintf(f, "\n");
458     }
459     }
460    
461     /* End of printint.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12