/[pcre]/code/trunk/printint.c
ViewVC logotype

Contents of /code/trunk/printint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 63 - (show annotations) (download)
Sat Feb 24 21:40:03 2007 UTC (7 years, 6 months ago) by nigel
File MIME type: text/plain
File size: 9592 byte(s)
Load pcre-4.0 into code/trunk.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /*
6 This is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language. See
8 the file Tech.Notes for some information on the internals.
9
10 Written by: Philip Hazel <ph10@cam.ac.uk>
11
12 Copyright (c) 1997-2003 University of Cambridge
13
14 -----------------------------------------------------------------------------
15 Permission is granted to anyone to use this software for any purpose on any
16 computer system, and to redistribute it freely, subject to the following
17 restrictions:
18
19 1. This software is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22
23 2. The origin of this software must not be misrepresented, either by
24 explicit claim or by omission.
25
26 3. Altered versions must be plainly marked as such, and must not be
27 misrepresented as being the original software.
28
29 4. If PCRE is embedded in any software that is released under the GNU
30 General Purpose Licence (GPL), then the terms of that licence shall
31 supersede any condition above with which it is incompatible.
32 -----------------------------------------------------------------------------
33 */
34
35
36 /* This module contains a debugging function for printing out the internal form
37 of a compiled regular expression. It is kept in a separate file so that it can
38 be #included both in the pcretest program, and in the library itself when
39 compiled with the debugging switch. */
40
41
42 static const char *OP_names[] = { OP_NAME_LIST };
43
44
45 /*************************************************
46 * Print single- or multi-byte character *
47 *************************************************/
48
49 /* These tables are actually copies of ones in pcre.c. If we compile the
50 library with debugging, they are included twice, but that isn't really a
51 problem - compiling with debugging is pretty rare and these are very small. */
52
53 static int utf8_t3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
54
55 static uschar utf8_t4[] = {
56 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
57 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
58 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
59 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
60
61 static int
62 print_char(FILE *f, uschar *ptr, BOOL utf8)
63 {
64 int c = *ptr;
65
66 if (!utf8 || (c & 0xc0) != 0xc0)
67 {
68 if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
69 return 0;
70 }
71 else
72 {
73 int i;
74 int a = utf8_t4[c & 0x3f]; /* Number of additional bytes */
75 int s = 6*a;
76 c = (c & utf8_t3[a]) << s;
77 for (i = 1; i <= a; i++)
78 {
79 s -= 6;
80 c |= (ptr[i] & 0x3f) << s;
81 }
82 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
83 return a;
84 }
85 }
86
87
88
89
90 /*************************************************
91 * Print compiled regex *
92 *************************************************/
93
94 static void
95 print_internals(pcre *external_re, FILE *f)
96 {
97 real_pcre *re = (real_pcre *)external_re;
98 uschar *codestart =
99 (uschar *)re + sizeof(real_pcre) + re->name_count * re->name_entry_size;
100 uschar *code = codestart;
101 BOOL utf8 = (re->options & PCRE_UTF8) != 0;
102
103 for(;;)
104 {
105 uschar *ccode;
106 int c;
107 int extra = 0;
108
109 fprintf(f, "%3d ", code - codestart);
110
111 if (*code >= OP_BRA)
112 {
113 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
114 fprintf(f, "%3d Bra extra\n", GET(code, 1));
115 else
116 fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
117 code += OP_lengths[OP_BRA];
118 continue;
119 }
120
121 switch(*code)
122 {
123 case OP_END:
124 fprintf(f, " %s\n", OP_names[*code]);
125 fprintf(f, "------------------------------------------------------------------\n");
126 return;
127
128 case OP_OPT:
129 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
130 break;
131
132 case OP_CHARS:
133 {
134 int charlength = code[1];
135 ccode = code + 2;
136 extra = charlength;
137 fprintf(f, "%3d ", charlength);
138 while (charlength > 0)
139 {
140 int extra = print_char(f, ccode, utf8);
141 ccode += 1 + extra;
142 charlength -= 1 + extra;
143 }
144 }
145 break;
146
147 case OP_KETRMAX:
148 case OP_KETRMIN:
149 case OP_ALT:
150 case OP_KET:
151 case OP_ASSERT:
152 case OP_ASSERT_NOT:
153 case OP_ASSERTBACK:
154 case OP_ASSERTBACK_NOT:
155 case OP_ONCE:
156 case OP_COND:
157 case OP_REVERSE:
158 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
159 break;
160
161 case OP_BRANUMBER:
162 printf("%3d %s", GET2(code, 1), OP_names[*code]);
163 break;
164
165 case OP_CREF:
166 if (GET2(code, 1) == CREF_RECURSE)
167 fprintf(f, " Cond recurse");
168 else
169 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
170 break;
171
172 case OP_STAR:
173 case OP_MINSTAR:
174 case OP_PLUS:
175 case OP_MINPLUS:
176 case OP_QUERY:
177 case OP_MINQUERY:
178 case OP_TYPESTAR:
179 case OP_TYPEMINSTAR:
180 case OP_TYPEPLUS:
181 case OP_TYPEMINPLUS:
182 case OP_TYPEQUERY:
183 case OP_TYPEMINQUERY:
184 fprintf(f, " ");
185 if (*code >= OP_TYPESTAR) fprintf(f, "%s", OP_names[code[1]]);
186 else extra = print_char(f, code+1, utf8);
187 fprintf(f, "%s", OP_names[*code]);
188 break;
189
190 case OP_EXACT:
191 case OP_UPTO:
192 case OP_MINUPTO:
193 fprintf(f, " ");
194 extra = print_char(f, code+3, utf8);
195 fprintf(f, "{");
196 if (*code != OP_EXACT) fprintf(f, ",");
197 fprintf(f, "%d}", GET2(code,1));
198 if (*code == OP_MINUPTO) fprintf(f, "?");
199 break;
200
201 case OP_TYPEEXACT:
202 case OP_TYPEUPTO:
203 case OP_TYPEMINUPTO:
204 fprintf(f, " %s{", OP_names[code[3]]);
205 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
206 fprintf(f, "%d}", GET2(code,1));
207 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
208 break;
209
210 case OP_NOT:
211 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
212 else fprintf(f, " [^\\x%02x]", c);
213 break;
214
215 case OP_NOTSTAR:
216 case OP_NOTMINSTAR:
217 case OP_NOTPLUS:
218 case OP_NOTMINPLUS:
219 case OP_NOTQUERY:
220 case OP_NOTMINQUERY:
221 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
222 else fprintf(f, " [^\\x%02x]", c);
223 fprintf(f, "%s", OP_names[*code]);
224 break;
225
226 case OP_NOTEXACT:
227 case OP_NOTUPTO:
228 case OP_NOTMINUPTO:
229 if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
230 else fprintf(f, " [^\\x%02x]{", c);
231 if (*code != OP_NOTEXACT) fprintf(f, ",");
232 fprintf(f, "%d}", GET2(code,1));
233 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
234 break;
235
236 case OP_RECURSE:
237 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
238 break;
239
240 case OP_REF:
241 fprintf(f, " \\%d", GET2(code,1));
242 ccode = code + OP_lengths[*code];
243 goto CLASS_REF_REPEAT;
244
245 case OP_CALLOUT:
246 fprintf(f, " %s %d", OP_names[*code], code[1]);
247 break;
248
249 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
250 having this code always here, and it makes it less messy without all those
251 #ifdefs. */
252
253 case OP_CLASS:
254 case OP_NCLASS:
255 case OP_XCLASS:
256 {
257 int i, min, max;
258 BOOL printmap;
259
260 fprintf(f, " [");
261
262 if (*code == OP_XCLASS)
263 {
264 extra = GET(code, 1);
265 ccode = code + LINK_SIZE + 1;
266 printmap = (*ccode & XCL_MAP) != 0;
267 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
268 }
269 else
270 {
271 printmap = TRUE;
272 ccode = code + 1;
273 }
274
275 /* Print a bit map */
276
277 if (printmap)
278 {
279 for (i = 0; i < 256; i++)
280 {
281 if ((ccode[i/8] & (1 << (i&7))) != 0)
282 {
283 int j;
284 for (j = i+1; j < 256; j++)
285 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
286 if (i == '-' || i == ']') fprintf(f, "\\");
287 if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
288 if (--j > i)
289 {
290 fprintf(f, "-");
291 if (j == '-' || j == ']') fprintf(f, "\\");
292 if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
293 }
294 i = j;
295 }
296 }
297 ccode += 32;
298 }
299
300 /* For an XCLASS there is always some additional data */
301
302 if (*code == OP_XCLASS)
303 {
304 int c;
305 while ((c = *ccode++) != XCL_END)
306 {
307 ccode += 1 + print_char(f, ccode, TRUE);
308 if (c == XCL_RANGE)
309 {
310 fprintf(f, "-");
311 ccode += 1 + print_char(f, ccode, TRUE);
312 }
313 }
314 }
315
316 /* Indicate a non-UTF8 class which was created by negation */
317
318 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
319
320 /* Handle repeats after a class or a back reference */
321
322 CLASS_REF_REPEAT:
323 switch(*ccode)
324 {
325 case OP_CRSTAR:
326 case OP_CRMINSTAR:
327 case OP_CRPLUS:
328 case OP_CRMINPLUS:
329 case OP_CRQUERY:
330 case OP_CRMINQUERY:
331 fprintf(f, "%s", OP_names[*ccode]);
332 extra = OP_lengths[*ccode];
333 break;
334
335 case OP_CRRANGE:
336 case OP_CRMINRANGE:
337 min = GET2(ccode,1);
338 max = GET2(ccode,3);
339 if (max == 0) fprintf(f, "{%d,}", min);
340 else fprintf(f, "{%d,%d}", min, max);
341 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
342 extra = OP_lengths[*ccode];
343 break;
344 }
345 }
346 break;
347
348 /* Anything else is just an item with no data*/
349
350 default:
351 fprintf(f, " %s", OP_names[*code]);
352 break;
353 }
354
355 code += OP_lengths[*code] + extra;
356 fprintf(f, "\n");
357 }
358 }
359
360 /* End of printint.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12