/[pcre]/code/tags/pcre-5.0/printint.c
ViewVC logotype

Contents of /code/tags/pcre-5.0/printint.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 75 - (show annotations) (download)
Sat Feb 24 21:40:37 2007 UTC (7 years, 7 months ago) by nigel
Original Path: code/trunk/printint.c
File MIME type: text/plain
File size: 12606 byte(s)
Load pcre-5.0 into code/trunk.

1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4
5 /*
6 This is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language. See
8 the file Tech.Notes for some information on the internals.
9
10 Written by: Philip Hazel <ph10@cam.ac.uk>
11
12 Copyright (c) 1997-2004 University of Cambridge
13
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
17
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
20
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
24
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
28
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
41 */
42
43
44 /* This module contains a debugging function for printing out the internal form
45 of a compiled regular expression. It is kept in a separate file so that it can
46 be #included both in the pcretest program, and in the library itself when
47 compiled with the debugging switch. */
48
49
50 static const char *OP_names[] = { OP_NAME_LIST };
51
52
53 /*************************************************
54 * Print single- or multi-byte character *
55 *************************************************/
56
57 /* These tables are actually copies of ones in pcre.c. If we compile the
58 library with debugging, they are included twice, but that isn't really a
59 problem - compiling with debugging is pretty rare and these are very small. */
60
61 static const int utf8_t3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
62
63 static const uschar utf8_t4[] = {
64 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
65 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
66 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
67 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
68
69 static int
70 print_char(FILE *f, uschar *ptr, BOOL utf8)
71 {
72 int c = *ptr;
73
74 if (!utf8 || (c & 0xc0) != 0xc0)
75 {
76 if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
77 return 0;
78 }
79 else
80 {
81 int i;
82 int a = utf8_t4[c & 0x3f]; /* Number of additional bytes */
83 int s = 6*a;
84 c = (c & utf8_t3[a]) << s;
85 for (i = 1; i <= a; i++)
86 {
87 /* This is a check for malformed UTF-8; it should only occur if the sanity
88 check has been turned off. Rather than swallow random bytes, just stop if
89 we hit a bad one. Print it with \X instead of \x as an indication. */
90
91 if ((ptr[i] & 0xc0) != 0x80)
92 {
93 fprintf(f, "\\X{%x}", c);
94 return i - 1;
95 }
96
97 /* The byte is OK */
98
99 s -= 6;
100 c |= (ptr[i] & 0x3f) << s;
101 }
102 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
103 return a;
104 }
105 }
106
107
108
109
110 /*************************************************
111 * Find Unicode property name *
112 *************************************************/
113
114 static const char *
115 get_ucpname(int property)
116 {
117 int i;
118 for (i = sizeof(utt)/sizeof(ucp_type_table); i >= 0; i--)
119 {
120 if (property == utt[i].value) break;
121 }
122 return (i >= 0)? utt[i].name : "??";
123 }
124
125
126
127 /*************************************************
128 * Print compiled regex *
129 *************************************************/
130
131 /* Make this function work for a regex with integers either byte order.
132 However, we assume that what we are passed is a compiled regex. */
133
134 static void
135 print_internals(pcre *external_re, FILE *f)
136 {
137 real_pcre *re = (real_pcre *)external_re;
138 uschar *codestart, *code;
139 BOOL utf8;
140
141 unsigned int options = re->options;
142 int offset = re->name_table_offset;
143 int count = re->name_count;
144 int size = re->name_entry_size;
145
146 if (re->magic_number != MAGIC_NUMBER)
147 {
148 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
149 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
150 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
151 options = ((options << 24) & 0xff000000) |
152 ((options << 8) & 0x00ff0000) |
153 ((options >> 8) & 0x0000ff00) |
154 ((options >> 24) & 0x000000ff);
155 }
156
157 code = codestart = (uschar *)re + offset + count * size;
158 utf8 = (options & PCRE_UTF8) != 0;
159
160 for(;;)
161 {
162 uschar *ccode;
163 int c;
164 int extra = 0;
165
166 fprintf(f, "%3d ", code - codestart);
167
168 if (*code >= OP_BRA)
169 {
170 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
171 fprintf(f, "%3d Bra extra\n", GET(code, 1));
172 else
173 fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
174 code += OP_lengths[OP_BRA];
175 continue;
176 }
177
178 switch(*code)
179 {
180 case OP_END:
181 fprintf(f, " %s\n", OP_names[*code]);
182 fprintf(f, "------------------------------------------------------------------\n");
183 return;
184
185 case OP_OPT:
186 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
187 break;
188
189 case OP_CHAR:
190 {
191 fprintf(f, " ");
192 do
193 {
194 code++;
195 code += 1 + print_char(f, code, utf8);
196 }
197 while (*code == OP_CHAR);
198 fprintf(f, "\n");
199 continue;
200 }
201 break;
202
203 case OP_CHARNC:
204 {
205 fprintf(f, " NC ");
206 do
207 {
208 code++;
209 code += 1 + print_char(f, code, utf8);
210 }
211 while (*code == OP_CHARNC);
212 fprintf(f, "\n");
213 continue;
214 }
215 break;
216
217 case OP_KETRMAX:
218 case OP_KETRMIN:
219 case OP_ALT:
220 case OP_KET:
221 case OP_ASSERT:
222 case OP_ASSERT_NOT:
223 case OP_ASSERTBACK:
224 case OP_ASSERTBACK_NOT:
225 case OP_ONCE:
226 case OP_COND:
227 case OP_REVERSE:
228 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
229 break;
230
231 case OP_BRANUMBER:
232 printf("%3d %s", GET2(code, 1), OP_names[*code]);
233 break;
234
235 case OP_CREF:
236 if (GET2(code, 1) == CREF_RECURSE)
237 fprintf(f, " Cond recurse");
238 else
239 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
240 break;
241
242 case OP_STAR:
243 case OP_MINSTAR:
244 case OP_PLUS:
245 case OP_MINPLUS:
246 case OP_QUERY:
247 case OP_MINQUERY:
248 case OP_TYPESTAR:
249 case OP_TYPEMINSTAR:
250 case OP_TYPEPLUS:
251 case OP_TYPEMINPLUS:
252 case OP_TYPEQUERY:
253 case OP_TYPEMINQUERY:
254 fprintf(f, " ");
255 if (*code >= OP_TYPESTAR)
256 {
257 fprintf(f, "%s", OP_names[code[1]]);
258 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
259 {
260 fprintf(f, " %s ", get_ucpname(code[2]));
261 extra = 1;
262 }
263 }
264 else extra = print_char(f, code+1, utf8);
265 fprintf(f, "%s", OP_names[*code]);
266 break;
267
268 case OP_EXACT:
269 case OP_UPTO:
270 case OP_MINUPTO:
271 fprintf(f, " ");
272 extra = print_char(f, code+3, utf8);
273 fprintf(f, "{");
274 if (*code != OP_EXACT) fprintf(f, ",");
275 fprintf(f, "%d}", GET2(code,1));
276 if (*code == OP_MINUPTO) fprintf(f, "?");
277 break;
278
279 case OP_TYPEEXACT:
280 case OP_TYPEUPTO:
281 case OP_TYPEMINUPTO:
282 fprintf(f, " %s", OP_names[code[3]]);
283 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
284 {
285 fprintf(f, " %s ", get_ucpname(code[4]));
286 extra = 1;
287 }
288 fprintf(f, "{");
289 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
290 fprintf(f, "%d}", GET2(code,1));
291 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
292 break;
293
294 case OP_NOT:
295 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
296 else fprintf(f, " [^\\x%02x]", c);
297 break;
298
299 case OP_NOTSTAR:
300 case OP_NOTMINSTAR:
301 case OP_NOTPLUS:
302 case OP_NOTMINPLUS:
303 case OP_NOTQUERY:
304 case OP_NOTMINQUERY:
305 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
306 else fprintf(f, " [^\\x%02x]", c);
307 fprintf(f, "%s", OP_names[*code]);
308 break;
309
310 case OP_NOTEXACT:
311 case OP_NOTUPTO:
312 case OP_NOTMINUPTO:
313 if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
314 else fprintf(f, " [^\\x%02x]{", c);
315 if (*code != OP_NOTEXACT) fprintf(f, ",");
316 fprintf(f, "%d}", GET2(code,1));
317 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
318 break;
319
320 case OP_RECURSE:
321 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
322 break;
323
324 case OP_REF:
325 fprintf(f, " \\%d", GET2(code,1));
326 ccode = code + OP_lengths[*code];
327 goto CLASS_REF_REPEAT;
328
329 case OP_CALLOUT:
330 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
331 GET(code, 2 + LINK_SIZE));
332 break;
333
334 case OP_PROP:
335 case OP_NOTPROP:
336 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
337 break;
338
339 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
340 having this code always here, and it makes it less messy without all those
341 #ifdefs. */
342
343 case OP_CLASS:
344 case OP_NCLASS:
345 case OP_XCLASS:
346 {
347 int i, min, max;
348 BOOL printmap;
349
350 fprintf(f, " [");
351
352 if (*code == OP_XCLASS)
353 {
354 extra = GET(code, 1);
355 ccode = code + LINK_SIZE + 1;
356 printmap = (*ccode & XCL_MAP) != 0;
357 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
358 }
359 else
360 {
361 printmap = TRUE;
362 ccode = code + 1;
363 }
364
365 /* Print a bit map */
366
367 if (printmap)
368 {
369 for (i = 0; i < 256; i++)
370 {
371 if ((ccode[i/8] & (1 << (i&7))) != 0)
372 {
373 int j;
374 for (j = i+1; j < 256; j++)
375 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
376 if (i == '-' || i == ']') fprintf(f, "\\");
377 if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
378 if (--j > i)
379 {
380 if (j != i + 1) fprintf(f, "-");
381 if (j == '-' || j == ']') fprintf(f, "\\");
382 if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
383 }
384 i = j;
385 }
386 }
387 ccode += 32;
388 }
389
390 /* For an XCLASS there is always some additional data */
391
392 if (*code == OP_XCLASS)
393 {
394 int ch;
395 while ((ch = *ccode++) != XCL_END)
396 {
397 if (ch == XCL_PROP)
398 {
399 fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
400 }
401 else if (ch == XCL_NOTPROP)
402 {
403 fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
404 }
405 else
406 {
407 ccode += 1 + print_char(f, ccode, TRUE);
408 if (ch == XCL_RANGE)
409 {
410 fprintf(f, "-");
411 ccode += 1 + print_char(f, ccode, TRUE);
412 }
413 }
414 }
415 }
416
417 /* Indicate a non-UTF8 class which was created by negation */
418
419 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
420
421 /* Handle repeats after a class or a back reference */
422
423 CLASS_REF_REPEAT:
424 switch(*ccode)
425 {
426 case OP_CRSTAR:
427 case OP_CRMINSTAR:
428 case OP_CRPLUS:
429 case OP_CRMINPLUS:
430 case OP_CRQUERY:
431 case OP_CRMINQUERY:
432 fprintf(f, "%s", OP_names[*ccode]);
433 extra += OP_lengths[*ccode];
434 break;
435
436 case OP_CRRANGE:
437 case OP_CRMINRANGE:
438 min = GET2(ccode,1);
439 max = GET2(ccode,3);
440 if (max == 0) fprintf(f, "{%d,}", min);
441 else fprintf(f, "{%d,%d}", min, max);
442 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
443 extra += OP_lengths[*ccode];
444 break;
445 }
446 }
447 break;
448
449 /* Anything else is just an item with no data*/
450
451 default:
452 fprintf(f, " %s", OP_names[*code]);
453 break;
454 }
455
456 code += OP_lengths[*code] + extra;
457 fprintf(f, "\n");
458 }
459 }
460
461 /* End of printint.c */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12