/[pcre]/code/tags/pcre-4.2/printint.c
ViewVC logotype

Contents of /code/tags/pcre-4.2/printint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 67 - (hide annotations) (download)
Sat Feb 24 21:40:13 2007 UTC (7 years, 8 months ago) by nigel
Original Path: code/trunk/printint.c
File MIME type: text/plain
File size: 9610 byte(s)
Load pcre-4.2 into code/trunk.

1 nigel 63 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /*
6     This is a library of functions to support regular expressions whose syntax
7     and semantics are as close as possible to those of the Perl 5 language. See
8     the file Tech.Notes for some information on the internals.
9    
10     Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12     Copyright (c) 1997-2003 University of Cambridge
13    
14     -----------------------------------------------------------------------------
15     Permission is granted to anyone to use this software for any purpose on any
16     computer system, and to redistribute it freely, subject to the following
17     restrictions:
18    
19     1. This software is distributed in the hope that it will be useful,
20     but WITHOUT ANY WARRANTY; without even the implied warranty of
21     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22    
23     2. The origin of this software must not be misrepresented, either by
24     explicit claim or by omission.
25    
26     3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.
28    
29     4. If PCRE is embedded in any software that is released under the GNU
30     General Purpose Licence (GPL), then the terms of that licence shall
31     supersede any condition above with which it is incompatible.
32     -----------------------------------------------------------------------------
33     */
34    
35    
36     /* This module contains a debugging function for printing out the internal form
37     of a compiled regular expression. It is kept in a separate file so that it can
38     be #included both in the pcretest program, and in the library itself when
39     compiled with the debugging switch. */
40    
41    
42     static const char *OP_names[] = { OP_NAME_LIST };
43    
44    
45     /*************************************************
46     * Print single- or multi-byte character *
47     *************************************************/
48    
49     /* These tables are actually copies of ones in pcre.c. If we compile the
50     library with debugging, they are included twice, but that isn't really a
51     problem - compiling with debugging is pretty rare and these are very small. */
52    
53     static int utf8_t3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
54    
55     static uschar utf8_t4[] = {
56     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
57     1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
58     2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
59     3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
60    
61     static int
62     print_char(FILE *f, uschar *ptr, BOOL utf8)
63     {
64     int c = *ptr;
65    
66     if (!utf8 || (c & 0xc0) != 0xc0)
67     {
68     if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
69     return 0;
70     }
71     else
72     {
73     int i;
74     int a = utf8_t4[c & 0x3f]; /* Number of additional bytes */
75     int s = 6*a;
76     c = (c & utf8_t3[a]) << s;
77     for (i = 1; i <= a; i++)
78     {
79     s -= 6;
80     c |= (ptr[i] & 0x3f) << s;
81     }
82     if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
83     return a;
84     }
85     }
86    
87    
88    
89    
90     /*************************************************
91     * Print compiled regex *
92     *************************************************/
93    
94     static void
95     print_internals(pcre *external_re, FILE *f)
96     {
97     real_pcre *re = (real_pcre *)external_re;
98     uschar *codestart =
99     (uschar *)re + sizeof(real_pcre) + re->name_count * re->name_entry_size;
100     uschar *code = codestart;
101     BOOL utf8 = (re->options & PCRE_UTF8) != 0;
102    
103     for(;;)
104     {
105     uschar *ccode;
106     int c;
107     int extra = 0;
108    
109     fprintf(f, "%3d ", code - codestart);
110    
111     if (*code >= OP_BRA)
112     {
113     if (*code - OP_BRA > EXTRACT_BASIC_MAX)
114     fprintf(f, "%3d Bra extra\n", GET(code, 1));
115     else
116     fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
117     code += OP_lengths[OP_BRA];
118     continue;
119     }
120    
121     switch(*code)
122     {
123     case OP_END:
124     fprintf(f, " %s\n", OP_names[*code]);
125     fprintf(f, "------------------------------------------------------------------\n");
126     return;
127    
128     case OP_OPT:
129     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
130     break;
131    
132     case OP_CHARS:
133     {
134     int charlength = code[1];
135     ccode = code + 2;
136     extra = charlength;
137     fprintf(f, "%3d ", charlength);
138     while (charlength > 0)
139     {
140 nigel 67 int extrabytes = print_char(f, ccode, utf8);
141     ccode += 1 + extrabytes;
142     charlength -= 1 + extrabytes;
143 nigel 63 }
144     }
145     break;
146    
147     case OP_KETRMAX:
148     case OP_KETRMIN:
149     case OP_ALT:
150     case OP_KET:
151     case OP_ASSERT:
152     case OP_ASSERT_NOT:
153     case OP_ASSERTBACK:
154     case OP_ASSERTBACK_NOT:
155     case OP_ONCE:
156     case OP_COND:
157     case OP_REVERSE:
158     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
159     break;
160    
161     case OP_BRANUMBER:
162     printf("%3d %s", GET2(code, 1), OP_names[*code]);
163     break;
164    
165     case OP_CREF:
166     if (GET2(code, 1) == CREF_RECURSE)
167     fprintf(f, " Cond recurse");
168     else
169     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
170     break;
171    
172     case OP_STAR:
173     case OP_MINSTAR:
174     case OP_PLUS:
175     case OP_MINPLUS:
176     case OP_QUERY:
177     case OP_MINQUERY:
178     case OP_TYPESTAR:
179     case OP_TYPEMINSTAR:
180     case OP_TYPEPLUS:
181     case OP_TYPEMINPLUS:
182     case OP_TYPEQUERY:
183     case OP_TYPEMINQUERY:
184     fprintf(f, " ");
185     if (*code >= OP_TYPESTAR) fprintf(f, "%s", OP_names[code[1]]);
186     else extra = print_char(f, code+1, utf8);
187     fprintf(f, "%s", OP_names[*code]);
188     break;
189    
190     case OP_EXACT:
191     case OP_UPTO:
192     case OP_MINUPTO:
193     fprintf(f, " ");
194     extra = print_char(f, code+3, utf8);
195     fprintf(f, "{");
196     if (*code != OP_EXACT) fprintf(f, ",");
197     fprintf(f, "%d}", GET2(code,1));
198     if (*code == OP_MINUPTO) fprintf(f, "?");
199     break;
200    
201     case OP_TYPEEXACT:
202     case OP_TYPEUPTO:
203     case OP_TYPEMINUPTO:
204     fprintf(f, " %s{", OP_names[code[3]]);
205     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
206     fprintf(f, "%d}", GET2(code,1));
207     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
208     break;
209    
210     case OP_NOT:
211     if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
212     else fprintf(f, " [^\\x%02x]", c);
213     break;
214    
215     case OP_NOTSTAR:
216     case OP_NOTMINSTAR:
217     case OP_NOTPLUS:
218     case OP_NOTMINPLUS:
219     case OP_NOTQUERY:
220     case OP_NOTMINQUERY:
221     if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
222     else fprintf(f, " [^\\x%02x]", c);
223     fprintf(f, "%s", OP_names[*code]);
224     break;
225    
226     case OP_NOTEXACT:
227     case OP_NOTUPTO:
228     case OP_NOTMINUPTO:
229     if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
230     else fprintf(f, " [^\\x%02x]{", c);
231     if (*code != OP_NOTEXACT) fprintf(f, ",");
232     fprintf(f, "%d}", GET2(code,1));
233     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
234     break;
235    
236     case OP_RECURSE:
237     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
238     break;
239    
240     case OP_REF:
241     fprintf(f, " \\%d", GET2(code,1));
242     ccode = code + OP_lengths[*code];
243     goto CLASS_REF_REPEAT;
244    
245     case OP_CALLOUT:
246     fprintf(f, " %s %d", OP_names[*code], code[1]);
247     break;
248    
249     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
250     having this code always here, and it makes it less messy without all those
251     #ifdefs. */
252    
253     case OP_CLASS:
254     case OP_NCLASS:
255     case OP_XCLASS:
256     {
257     int i, min, max;
258     BOOL printmap;
259    
260     fprintf(f, " [");
261    
262     if (*code == OP_XCLASS)
263     {
264     extra = GET(code, 1);
265     ccode = code + LINK_SIZE + 1;
266     printmap = (*ccode & XCL_MAP) != 0;
267     if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
268     }
269     else
270     {
271     printmap = TRUE;
272     ccode = code + 1;
273     }
274    
275     /* Print a bit map */
276    
277     if (printmap)
278     {
279     for (i = 0; i < 256; i++)
280     {
281     if ((ccode[i/8] & (1 << (i&7))) != 0)
282     {
283     int j;
284     for (j = i+1; j < 256; j++)
285     if ((ccode[j/8] & (1 << (j&7))) == 0) break;
286     if (i == '-' || i == ']') fprintf(f, "\\");
287     if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
288     if (--j > i)
289     {
290     fprintf(f, "-");
291     if (j == '-' || j == ']') fprintf(f, "\\");
292     if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
293     }
294     i = j;
295     }
296     }
297     ccode += 32;
298     }
299    
300     /* For an XCLASS there is always some additional data */
301    
302     if (*code == OP_XCLASS)
303     {
304 nigel 67 int ch;
305     while ((ch = *ccode++) != XCL_END)
306 nigel 63 {
307     ccode += 1 + print_char(f, ccode, TRUE);
308 nigel 67 if (ch == XCL_RANGE)
309 nigel 63 {
310     fprintf(f, "-");
311     ccode += 1 + print_char(f, ccode, TRUE);
312     }
313     }
314     }
315    
316     /* Indicate a non-UTF8 class which was created by negation */
317    
318     fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
319    
320     /* Handle repeats after a class or a back reference */
321    
322     CLASS_REF_REPEAT:
323     switch(*ccode)
324     {
325     case OP_CRSTAR:
326     case OP_CRMINSTAR:
327     case OP_CRPLUS:
328     case OP_CRMINPLUS:
329     case OP_CRQUERY:
330     case OP_CRMINQUERY:
331     fprintf(f, "%s", OP_names[*ccode]);
332     extra = OP_lengths[*ccode];
333     break;
334    
335     case OP_CRRANGE:
336     case OP_CRMINRANGE:
337     min = GET2(ccode,1);
338     max = GET2(ccode,3);
339     if (max == 0) fprintf(f, "{%d,}", min);
340     else fprintf(f, "{%d,%d}", min, max);
341     if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
342     extra = OP_lengths[*ccode];
343     break;
344     }
345     }
346     break;
347    
348     /* Anything else is just an item with no data*/
349    
350     default:
351     fprintf(f, " %s", OP_names[*code]);
352     break;
353     }
354    
355     code += OP_lengths[*code] + extra;
356     fprintf(f, "\n");
357     }
358     }
359    
360     /* End of printint.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12