| 21 |
#endif |
#endif |
| 22 |
#endif |
#endif |
| 23 |
|
|
| 24 |
|
#define LOOPREPEAT 10000 |
| 25 |
|
|
| 26 |
|
|
| 27 |
static FILE *outfile; |
static FILE *outfile; |
| 28 |
static int log_store = 0; |
static int log_store = 0; |
| 34 |
|
|
| 35 |
static const char *OP_names[] = { |
static const char *OP_names[] = { |
| 36 |
"End", "\\A", "\\B", "\\b", "\\D", "\\d", |
"End", "\\A", "\\B", "\\b", "\\D", "\\d", |
| 37 |
"\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars", |
"\\S", "\\s", "\\W", "\\w", "\\Z", "\\z", |
| 38 |
"not", |
"Opt", "^", "$", "Any", "chars", "not", |
| 39 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
| 40 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
| 41 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
| 42 |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
| 43 |
"class", "negclass", "Ref", |
"class", "Ref", |
| 44 |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once", |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", |
| 45 |
|
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", |
| 46 |
"Brazero", "Braminzero", "Bra" |
"Brazero", "Braminzero", "Bra" |
| 47 |
}; |
}; |
| 48 |
|
|
| 49 |
|
|
| 50 |
static void print_internals(pcre *re) |
static void print_internals(pcre *re, FILE *outfile) |
| 51 |
{ |
{ |
| 52 |
unsigned char *code = ((real_pcre *)re)->code; |
unsigned char *code = ((real_pcre *)re)->code; |
| 53 |
|
|
| 54 |
printf("------------------------------------------------------------------\n"); |
fprintf(outfile, "------------------------------------------------------------------\n"); |
| 55 |
|
|
| 56 |
for(;;) |
for(;;) |
| 57 |
{ |
{ |
| 58 |
int c; |
int c; |
| 59 |
int charlength; |
int charlength; |
| 60 |
|
|
| 61 |
printf("%3d ", code - ((real_pcre *)re)->code); |
fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code)); |
| 62 |
|
|
| 63 |
if (*code >= OP_BRA) |
if (*code >= OP_BRA) |
| 64 |
{ |
{ |
| 65 |
printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); |
fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); |
| 66 |
code += 2; |
code += 2; |
| 67 |
} |
} |
| 68 |
|
|
| 69 |
else switch(*code) |
else switch(*code) |
| 70 |
{ |
{ |
| 71 |
case OP_END: |
case OP_END: |
| 72 |
printf(" %s\n", OP_names[*code]); |
fprintf(outfile, " %s\n", OP_names[*code]); |
| 73 |
printf("------------------------------------------------------------------\n"); |
fprintf(outfile, "------------------------------------------------------------------\n"); |
| 74 |
return; |
return; |
| 75 |
|
|
| 76 |
|
case OP_OPT: |
| 77 |
|
fprintf(outfile, " %.2x %s", code[1], OP_names[*code]); |
| 78 |
|
code++; |
| 79 |
|
break; |
| 80 |
|
|
| 81 |
|
case OP_COND: |
| 82 |
|
fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]); |
| 83 |
|
code += 2; |
| 84 |
|
break; |
| 85 |
|
|
| 86 |
|
case OP_CREF: |
| 87 |
|
fprintf(outfile, " %.2d %s", code[1], OP_names[*code]); |
| 88 |
|
code++; |
| 89 |
|
break; |
| 90 |
|
|
| 91 |
case OP_CHARS: |
case OP_CHARS: |
| 92 |
charlength = *(++code); |
charlength = *(++code); |
| 93 |
printf("%3d ", charlength); |
fprintf(outfile, "%3d ", charlength); |
| 94 |
while (charlength-- > 0) |
while (charlength-- > 0) |
| 95 |
if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c); |
if (isprint(c = *(++code))) fprintf(outfile, "%c", c); |
| 96 |
|
else fprintf(outfile, "\\x%02x", c); |
| 97 |
break; |
break; |
| 98 |
|
|
| 99 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 102 |
case OP_KET: |
case OP_KET: |
| 103 |
case OP_ASSERT: |
case OP_ASSERT: |
| 104 |
case OP_ASSERT_NOT: |
case OP_ASSERT_NOT: |
| 105 |
|
case OP_ASSERTBACK: |
| 106 |
|
case OP_ASSERTBACK_NOT: |
| 107 |
case OP_ONCE: |
case OP_ONCE: |
| 108 |
printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]); |
fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]); |
| 109 |
|
code += 2; |
| 110 |
|
break; |
| 111 |
|
|
| 112 |
|
case OP_REVERSE: |
| 113 |
|
fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]); |
| 114 |
code += 2; |
code += 2; |
| 115 |
break; |
break; |
| 116 |
|
|
| 127 |
case OP_TYPEQUERY: |
case OP_TYPEQUERY: |
| 128 |
case OP_TYPEMINQUERY: |
case OP_TYPEMINQUERY: |
| 129 |
if (*code >= OP_TYPESTAR) |
if (*code >= OP_TYPESTAR) |
| 130 |
printf(" %s", OP_names[code[1]]); |
fprintf(outfile, " %s", OP_names[code[1]]); |
| 131 |
else if (isprint(c = code[1])) printf(" %c", c); |
else if (isprint(c = code[1])) fprintf(outfile, " %c", c); |
| 132 |
else printf(" \\x%02x", c); |
else fprintf(outfile, " \\x%02x", c); |
| 133 |
printf("%s", OP_names[*code++]); |
fprintf(outfile, "%s", OP_names[*code++]); |
| 134 |
break; |
break; |
| 135 |
|
|
| 136 |
case OP_EXACT: |
case OP_EXACT: |
| 137 |
case OP_UPTO: |
case OP_UPTO: |
| 138 |
case OP_MINUPTO: |
case OP_MINUPTO: |
| 139 |
if (isprint(c = code[3])) printf(" %c{", c); |
if (isprint(c = code[3])) fprintf(outfile, " %c{", c); |
| 140 |
else printf(" \\x%02x{", c); |
else fprintf(outfile, " \\x%02x{", c); |
| 141 |
if (*code != OP_EXACT) printf(","); |
if (*code != OP_EXACT) fprintf(outfile, ","); |
| 142 |
printf("%d}", (code[1] << 8) + code[2]); |
fprintf(outfile, "%d}", (code[1] << 8) + code[2]); |
| 143 |
if (*code == OP_MINUPTO) printf("?"); |
if (*code == OP_MINUPTO) fprintf(outfile, "?"); |
| 144 |
code += 3; |
code += 3; |
| 145 |
break; |
break; |
| 146 |
|
|
| 147 |
case OP_TYPEEXACT: |
case OP_TYPEEXACT: |
| 148 |
case OP_TYPEUPTO: |
case OP_TYPEUPTO: |
| 149 |
case OP_TYPEMINUPTO: |
case OP_TYPEMINUPTO: |
| 150 |
printf(" %s{", OP_names[code[3]]); |
fprintf(outfile, " %s{", OP_names[code[3]]); |
| 151 |
if (*code != OP_TYPEEXACT) printf("0,"); |
if (*code != OP_TYPEEXACT) fprintf(outfile, "0,"); |
| 152 |
printf("%d}", (code[1] << 8) + code[2]); |
fprintf(outfile, "%d}", (code[1] << 8) + code[2]); |
| 153 |
if (*code == OP_TYPEMINUPTO) printf("?"); |
if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?"); |
| 154 |
code += 3; |
code += 3; |
| 155 |
break; |
break; |
| 156 |
|
|
| 157 |
case OP_NOT: |
case OP_NOT: |
| 158 |
if (isprint(c = *(++code))) printf(" [^%c]", c); |
if (isprint(c = *(++code))) fprintf(outfile, " [^%c]", c); |
| 159 |
else printf(" [^\\x%02x]", c); |
else fprintf(outfile, " [^\\x%02x]", c); |
| 160 |
break; |
break; |
| 161 |
|
|
| 162 |
case OP_NOTSTAR: |
case OP_NOTSTAR: |
| 165 |
case OP_NOTMINPLUS: |
case OP_NOTMINPLUS: |
| 166 |
case OP_NOTQUERY: |
case OP_NOTQUERY: |
| 167 |
case OP_NOTMINQUERY: |
case OP_NOTMINQUERY: |
| 168 |
if (isprint(c = code[1])) printf(" [^%c]", c); |
if (isprint(c = code[1])) fprintf(outfile, " [^%c]", c); |
| 169 |
else printf(" [^\\x%02x]", c); |
else fprintf(outfile, " [^\\x%02x]", c); |
| 170 |
printf("%s", OP_names[*code++]); |
fprintf(outfile, "%s", OP_names[*code++]); |
| 171 |
break; |
break; |
| 172 |
|
|
| 173 |
case OP_NOTEXACT: |
case OP_NOTEXACT: |
| 174 |
case OP_NOTUPTO: |
case OP_NOTUPTO: |
| 175 |
case OP_NOTMINUPTO: |
case OP_NOTMINUPTO: |
| 176 |
if (isprint(c = code[3])) printf(" [^%c]{", c); |
if (isprint(c = code[3])) fprintf(outfile, " [^%c]{", c); |
| 177 |
else printf(" [^\\x%02x]{", c); |
else fprintf(outfile, " [^\\x%02x]{", c); |
| 178 |
if (*code != OP_NOTEXACT) printf(","); |
if (*code != OP_NOTEXACT) fprintf(outfile, ","); |
| 179 |
printf("%d}", (code[1] << 8) + code[2]); |
fprintf(outfile, "%d}", (code[1] << 8) + code[2]); |
| 180 |
if (*code == OP_NOTMINUPTO) printf("?"); |
if (*code == OP_NOTMINUPTO) fprintf(outfile, "?"); |
| 181 |
code += 3; |
code += 3; |
| 182 |
break; |
break; |
| 183 |
|
|
| 184 |
case OP_REF: |
case OP_REF: |
| 185 |
printf(" \\%d", *(++code)); |
fprintf(outfile, " \\%d", *(++code)); |
| 186 |
code++; |
code++; |
| 187 |
goto CLASS_REF_REPEAT; |
goto CLASS_REF_REPEAT; |
| 188 |
|
|
| 189 |
case OP_CLASS: |
case OP_CLASS: |
|
case OP_NEGCLASS: |
|
| 190 |
{ |
{ |
| 191 |
int i, min, max; |
int i, min, max; |
| 192 |
if (*code++ == OP_CLASS) printf(" ["); |
code++; |
| 193 |
else printf(" ^["); |
fprintf(outfile, " ["); |
| 194 |
|
|
| 195 |
for (i = 0; i < 256; i++) |
for (i = 0; i < 256; i++) |
| 196 |
{ |
{ |
| 199 |
int j; |
int j; |
| 200 |
for (j = i+1; j < 256; j++) |
for (j = i+1; j < 256; j++) |
| 201 |
if ((code[j/8] & (1 << (j&7))) == 0) break; |
if ((code[j/8] & (1 << (j&7))) == 0) break; |
| 202 |
if (i == '-' || i == ']') printf("\\"); |
if (i == '-' || i == ']') fprintf(outfile, "\\"); |
| 203 |
if (isprint(i)) printf("%c", i); else printf("\\x%02x", i); |
if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i); |
| 204 |
if (--j > i) |
if (--j > i) |
| 205 |
{ |
{ |
| 206 |
printf("-"); |
fprintf(outfile, "-"); |
| 207 |
if (j == '-' || j == ']') printf("\\"); |
if (j == '-' || j == ']') fprintf(outfile, "\\"); |
| 208 |
if (isprint(j)) printf("%c", j); else printf("\\x%02x", j); |
if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j); |
| 209 |
} |
} |
| 210 |
i = j; |
i = j; |
| 211 |
} |
} |
| 212 |
} |
} |
| 213 |
printf("]"); |
fprintf(outfile, "]"); |
| 214 |
code += 32; |
code += 32; |
| 215 |
|
|
| 216 |
CLASS_REF_REPEAT: |
CLASS_REF_REPEAT: |
| 223 |
case OP_CRMINPLUS: |
case OP_CRMINPLUS: |
| 224 |
case OP_CRQUERY: |
case OP_CRQUERY: |
| 225 |
case OP_CRMINQUERY: |
case OP_CRMINQUERY: |
| 226 |
printf("%s", OP_names[*code]); |
fprintf(outfile, "%s", OP_names[*code]); |
| 227 |
break; |
break; |
| 228 |
|
|
| 229 |
case OP_CRRANGE: |
case OP_CRRANGE: |
| 230 |
case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
| 231 |
min = (code[1] << 8) + code[2]; |
min = (code[1] << 8) + code[2]; |
| 232 |
max = (code[3] << 8) + code[4]; |
max = (code[3] << 8) + code[4]; |
| 233 |
if (max == 0) printf("{%d,}", min); |
if (max == 0) fprintf(outfile, "{%d,}", min); |
| 234 |
else printf("{%d,%d}", min, max); |
else fprintf(outfile, "{%d,%d}", min, max); |
| 235 |
if (*code == OP_CRMINRANGE) printf("?"); |
if (*code == OP_CRMINRANGE) fprintf(outfile, "?"); |
| 236 |
code += 4; |
code += 4; |
| 237 |
break; |
break; |
| 238 |
|
|
| 245 |
/* Anything else is just a one-node item */ |
/* Anything else is just a one-node item */ |
| 246 |
|
|
| 247 |
default: |
default: |
| 248 |
printf(" %s", OP_names[*code]); |
fprintf(outfile, " %s", OP_names[*code]); |
| 249 |
break; |
break; |
| 250 |
} |
} |
| 251 |
|
|
| 252 |
code++; |
code++; |
| 253 |
printf("\n"); |
fprintf(outfile, "\n"); |
| 254 |
} |
} |
| 255 |
} |
} |
| 256 |
|
|
| 345 |
|
|
| 346 |
pcre_malloc = new_malloc; |
pcre_malloc = new_malloc; |
| 347 |
|
|
| 348 |
/* Heading line, then prompt for first re if stdin */ |
/* Heading line, then prompt for first regex if stdin */ |
| 349 |
|
|
|
fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n"); |
|
| 350 |
fprintf(outfile, "PCRE version %s\n\n", pcre_version()); |
fprintf(outfile, "PCRE version %s\n\n", pcre_version()); |
| 351 |
|
|
| 352 |
/* Main loop */ |
/* Main loop */ |
| 365 |
|
|
| 366 |
if (infile == stdin) printf(" re> "); |
if (infile == stdin) printf(" re> "); |
| 367 |
if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break; |
if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break; |
| 368 |
if (infile != stdin) fprintf(outfile, (char *)buffer); |
if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); |
| 369 |
|
|
| 370 |
p = buffer; |
p = buffer; |
| 371 |
while (isspace(*p)) p++; |
while (isspace(*p)) p++; |
| 403 |
done = 1; |
done = 1; |
| 404 |
goto CONTINUE; |
goto CONTINUE; |
| 405 |
} |
} |
| 406 |
if (infile != stdin) fprintf(outfile, (char *)pp); |
if (infile != stdin) fprintf(outfile, "%s", (char *)pp); |
| 407 |
} |
} |
| 408 |
|
|
| 409 |
/* Terminate the pattern at the delimiter */ |
/* Terminate the pattern at the delimiter */ |
| 427 |
case 'E': options |= PCRE_DOLLAR_ENDONLY; break; |
case 'E': options |= PCRE_DOLLAR_ENDONLY; break; |
| 428 |
case 'P': do_posix = 1; break; |
case 'P': do_posix = 1; break; |
| 429 |
case 'S': do_study = 1; break; |
case 'S': do_study = 1; break; |
|
case 'I': study_options |= PCRE_CASELESS; break; |
|
| 430 |
case 'U': options |= PCRE_UNGREEDY; break; |
case 'U': options |= PCRE_UNGREEDY; break; |
| 431 |
case 'X': options |= PCRE_EXTRA; break; |
case 'X': options |= PCRE_EXTRA; break; |
| 432 |
case '\n': case ' ': break; |
case '\n': case ' ': break; |
| 467 |
register int i; |
register int i; |
| 468 |
clock_t time_taken; |
clock_t time_taken; |
| 469 |
clock_t start_time = clock(); |
clock_t start_time = clock(); |
| 470 |
for (i = 0; i < 4000; i++) |
for (i = 0; i < LOOPREPEAT; i++) |
| 471 |
{ |
{ |
| 472 |
re = pcre_compile((char *)p, options, &error, &erroroffset); |
re = pcre_compile((char *)p, options, &error, &erroroffset); |
| 473 |
if (re != NULL) free(re); |
if (re != NULL) free(re); |
| 510 |
{ |
{ |
| 511 |
int first_char, count; |
int first_char, count; |
| 512 |
|
|
| 513 |
if (debug || do_debug) print_internals(re); |
if (debug || do_debug) print_internals(re, outfile); |
| 514 |
|
|
| 515 |
count = pcre_info(re, &options, &first_char); |
count = pcre_info(re, &options, &first_char); |
| 516 |
if (count < 0) fprintf(outfile, |
if (count < 0) fprintf(outfile, |
| 556 |
register int i; |
register int i; |
| 557 |
clock_t time_taken; |
clock_t time_taken; |
| 558 |
clock_t start_time = clock(); |
clock_t start_time = clock(); |
| 559 |
for (i = 0; i < 4000; i++) |
for (i = 0; i < LOOPREPEAT; i++) |
| 560 |
extra = pcre_study(re, study_options, &error); |
extra = pcre_study(re, study_options, &error); |
| 561 |
time_taken = clock() - start_time; |
time_taken = clock() - start_time; |
| 562 |
if (extra != NULL) free(extra); |
if (extra != NULL) free(extra); |
| 616 |
{ |
{ |
| 617 |
unsigned char *q; |
unsigned char *q; |
| 618 |
int count, c; |
int count, c; |
| 619 |
int offsets[30]; |
int offsets[45]; |
| 620 |
int size_offsets = sizeof(offsets)/sizeof(int); |
int size_offsets = sizeof(offsets)/sizeof(int); |
| 621 |
|
|
| 622 |
options = 0; |
options = 0; |
| 627 |
done = 1; |
done = 1; |
| 628 |
goto CONTINUE; |
goto CONTINUE; |
| 629 |
} |
} |
| 630 |
if (infile != stdin) fprintf(outfile, (char *)buffer); |
if (infile != stdin) fprintf(outfile, "%s", (char *)buffer); |
| 631 |
|
|
| 632 |
len = (int)strlen((char *)buffer); |
len = (int)strlen((char *)buffer); |
| 633 |
while (len > 0 && isspace(buffer[len-1])) len--; |
while (len > 0 && isspace(buffer[len-1])) len--; |
| 681 |
options |= PCRE_NOTBOL; |
options |= PCRE_NOTBOL; |
| 682 |
continue; |
continue; |
| 683 |
|
|
|
case 'E': |
|
|
options |= PCRE_DOLLAR_ENDONLY; |
|
|
continue; |
|
|
|
|
|
case 'I': |
|
|
options |= PCRE_CASELESS; |
|
|
continue; |
|
|
|
|
|
case 'M': |
|
|
options |= PCRE_MULTILINE; |
|
|
continue; |
|
|
|
|
|
case 'S': |
|
|
options |= PCRE_DOTALL; |
|
|
continue; |
|
|
|
|
| 684 |
case 'O': |
case 'O': |
| 685 |
while(isdigit(*p)) n = n * 10 + *p++ - '0'; |
while(isdigit(*p)) n = n * 10 + *p++ - '0'; |
| 686 |
if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n; |
if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n; |
| 721 |
{ |
{ |
| 722 |
if (pmatch[i].rm_so >= 0) |
if (pmatch[i].rm_so >= 0) |
| 723 |
{ |
{ |
| 724 |
fprintf(outfile, "%2d: ", i); |
fprintf(outfile, "%2d: ", (int)i); |
| 725 |
pchars(dbuffer + pmatch[i].rm_so, |
pchars(dbuffer + pmatch[i].rm_so, |
| 726 |
pmatch[i].rm_eo - pmatch[i].rm_so); |
pmatch[i].rm_eo - pmatch[i].rm_so); |
| 727 |
fprintf(outfile, "\n"); |
fprintf(outfile, "\n"); |
| 753 |
if (count == 0) |
if (count == 0) |
| 754 |
{ |
{ |
| 755 |
fprintf(outfile, "Matched, but too many substrings\n"); |
fprintf(outfile, "Matched, but too many substrings\n"); |
| 756 |
count = size_offsets/2; |
count = size_offsets/3; |
| 757 |
} |
} |
| 758 |
|
|
| 759 |
if (count >= 0) |
if (count >= 0) |