| 71 |
#define INPUT_MODE "r" |
#define INPUT_MODE "r" |
| 72 |
#define OUTPUT_MODE "wb" |
#define OUTPUT_MODE "wb" |
| 73 |
|
|
| 74 |
#define isatty _isatty /* This is what Windows calls them, I'm told */ |
#ifndef isatty |
| 75 |
|
#define isatty _isatty /* This is what Windows calls them, I'm told, */ |
| 76 |
|
#endif /* though in some environments they seem to */ |
| 77 |
|
/* be already defined, hence the #ifndefs. */ |
| 78 |
|
#ifndef fileno |
| 79 |
#define fileno _fileno |
#define fileno _fileno |
| 80 |
|
#endif |
| 81 |
|
|
| 82 |
#else |
#else |
| 83 |
#include <sys/time.h> /* These two includes are needed */ |
#include <sys/time.h> /* These two includes are needed */ |
| 118 |
|
|
| 119 |
/* We also need the pcre_printint() function for printing out compiled |
/* We also need the pcre_printint() function for printing out compiled |
| 120 |
patterns. This function is in a separate file so that it can be included in |
patterns. This function is in a separate file so that it can be included in |
| 121 |
pcre_compile.c when that module is compiled with debugging enabled. |
pcre_compile.c when that module is compiled with debugging enabled. It needs to |
| 122 |
|
know which case is being compiled. */ |
|
The definition of the macro PRINTABLE, which determines whether to print an |
|
|
output character as-is or as a hex value when showing compiled patterns, is |
|
|
contained in this file. We uses it here also, in cases when the locale has not |
|
|
been explicitly changed, so as to get consistent output from systems that |
|
|
differ in their output from isprint() even in the "C" locale. */ |
|
| 123 |
|
|
| 124 |
|
#define COMPILING_PCRETEST |
| 125 |
#include "pcre_printint.src" |
#include "pcre_printint.src" |
| 126 |
|
|
| 127 |
#define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c)) |
/* The definition of the macro PRINTABLE, which determines whether to print an |
| 128 |
|
output character as-is or as a hex value when showing compiled patterns, is |
| 129 |
|
contained in the printint.src file. We uses it here also, in cases when the |
| 130 |
|
locale has not been explicitly changed, so as to get consistent output from |
| 131 |
|
systems that differ in their output from isprint() even in the "C" locale. */ |
| 132 |
|
|
| 133 |
|
#define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c)) |
| 134 |
|
|
| 135 |
/* It is possible to compile this test program without including support for |
/* It is possible to compile this test program without including support for |
| 136 |
testing the POSIX interface, though this is not available via the standard |
testing the POSIX interface, though this is not available via the standard |
| 881 |
else if (strcmp(argv[op], "-b") == 0) debug = 1; |
else if (strcmp(argv[op], "-b") == 0) debug = 1; |
| 882 |
else if (strcmp(argv[op], "-i") == 0) showinfo = 1; |
else if (strcmp(argv[op], "-i") == 0) showinfo = 1; |
| 883 |
else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1; |
else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1; |
| 884 |
else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE; |
else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE; |
| 885 |
#if !defined NODFA |
#if !defined NODFA |
| 886 |
else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1; |
else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1; |
| 887 |
#endif |
#endif |
| 934 |
else if (strcmp(argv[op], "-C") == 0) |
else if (strcmp(argv[op], "-C") == 0) |
| 935 |
{ |
{ |
| 936 |
int rc; |
int rc; |
| 937 |
unsigned long int lrc; |
unsigned long int lrc; |
| 938 |
printf("PCRE version %s\n", pcre_version()); |
printf("PCRE version %s\n", pcre_version()); |
| 939 |
printf("Compiled with\n"); |
printf("Compiled with\n"); |
| 940 |
(void)pcre_config(PCRE_CONFIG_UTF8, &rc); |
(void)pcre_config(PCRE_CONFIG_UTF8, &rc); |
| 943 |
printf(" %sUnicode properties support\n", rc? "" : "No "); |
printf(" %sUnicode properties support\n", rc? "" : "No "); |
| 944 |
(void)pcre_config(PCRE_CONFIG_NEWLINE, &rc); |
(void)pcre_config(PCRE_CONFIG_NEWLINE, &rc); |
| 945 |
/* Note that these values are always the ASCII values, even |
/* Note that these values are always the ASCII values, even |
| 946 |
in EBCDIC environments. CR is 13 and NL is 10. */ |
in EBCDIC environments. CR is 13 and NL is 10. */ |
| 947 |
printf(" Newline sequence is %s\n", (rc == 13)? "CR" : |
printf(" Newline sequence is %s\n", (rc == 13)? "CR" : |
| 948 |
(rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" : |
(rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" : |
| 949 |
(rc == -2)? "ANYCRLF" : |
(rc == -2)? "ANYCRLF" : |
| 1040 |
#endif |
#endif |
| 1041 |
|
|
| 1042 |
const char *error; |
const char *error; |
| 1043 |
|
unsigned char *markptr; |
| 1044 |
unsigned char *p, *pp, *ppp; |
unsigned char *p, *pp, *ppp; |
| 1045 |
unsigned char *to_file = NULL; |
unsigned char *to_file = NULL; |
| 1046 |
const unsigned char *tables = NULL; |
const unsigned char *tables = NULL; |
| 1047 |
unsigned long int true_size, true_study_size = 0; |
unsigned long int true_size, true_study_size = 0; |
| 1048 |
size_t size, regex_gotten_store; |
size_t size, regex_gotten_store; |
| 1049 |
|
int do_mark = 0; |
| 1050 |
int do_study = 0; |
int do_study = 0; |
| 1051 |
int do_debug = debug; |
int do_debug = debug; |
| 1052 |
int do_G = 0; |
int do_G = 0; |
| 1228 |
case 'G': do_G = 1; break; |
case 'G': do_G = 1; break; |
| 1229 |
case 'I': do_showinfo = 1; break; |
case 'I': do_showinfo = 1; break; |
| 1230 |
case 'J': options |= PCRE_DUPNAMES; break; |
case 'J': options |= PCRE_DUPNAMES; break; |
| 1231 |
|
case 'K': do_mark = 1; break; |
| 1232 |
case 'M': log_store = 1; break; |
case 'M': log_store = 1; break; |
| 1233 |
case 'N': options |= PCRE_NO_AUTO_CAPTURE; break; |
case 'N': options |= PCRE_NO_AUTO_CAPTURE; break; |
| 1234 |
|
|
| 1309 |
if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL; |
if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL; |
| 1310 |
if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB; |
if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB; |
| 1311 |
if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8; |
if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8; |
| 1312 |
|
if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY; |
| 1313 |
|
|
| 1314 |
rc = regcomp(&preg, (char *)p, cflags); |
rc = regcomp(&preg, (char *)p, cflags); |
| 1315 |
|
|
| 1330 |
#endif /* !defined NOPOSIX */ |
#endif /* !defined NOPOSIX */ |
| 1331 |
|
|
| 1332 |
{ |
{ |
| 1333 |
|
unsigned long int get_options; |
| 1334 |
|
|
| 1335 |
if (timeit > 0) |
if (timeit > 0) |
| 1336 |
{ |
{ |
| 1337 |
register int i; |
register int i; |
| 1375 |
goto CONTINUE; |
goto CONTINUE; |
| 1376 |
} |
} |
| 1377 |
|
|
| 1378 |
/* Compilation succeeded; print data if required. There are now two |
/* Compilation succeeded. It is now possible to set the UTF-8 option from |
| 1379 |
info-returning functions. The old one has a limited interface and |
within the regex; check for this so that we know how to process the data |
| 1380 |
returns only limited data. Check that it agrees with the newer one. */ |
lines. */ |
| 1381 |
|
|
| 1382 |
|
new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options); |
| 1383 |
|
if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1; |
| 1384 |
|
|
| 1385 |
|
/* Print information if required. There are now two info-returning |
| 1386 |
|
functions. The old one has a limited interface and returns only limited |
| 1387 |
|
data. Check that it agrees with the newer one. */ |
| 1388 |
|
|
| 1389 |
if (log_store) |
if (log_store) |
| 1390 |
fprintf(outfile, "Memory allocation (code space): %d\n", |
fprintf(outfile, "Memory allocation (code space): %d\n", |
| 1423 |
true_study_size = ((pcre_study_data *)(extra->study_data))->size; |
true_study_size = ((pcre_study_data *)(extra->study_data))->size; |
| 1424 |
} |
} |
| 1425 |
|
|
| 1426 |
|
/* If /K was present, we set up for handling MARK data. */ |
| 1427 |
|
|
| 1428 |
|
if (do_mark) |
| 1429 |
|
{ |
| 1430 |
|
if (extra == NULL) |
| 1431 |
|
{ |
| 1432 |
|
extra = (pcre_extra *)malloc(sizeof(pcre_extra)); |
| 1433 |
|
extra->flags = 0; |
| 1434 |
|
} |
| 1435 |
|
extra->mark = &markptr; |
| 1436 |
|
extra->flags |= PCRE_EXTRA_MARK; |
| 1437 |
|
} |
| 1438 |
|
|
| 1439 |
/* If the 'F' option was present, we flip the bytes of all the integer |
/* If the 'F' option was present, we flip the bytes of all the integer |
| 1440 |
fields in the regex data block and the study block. This is to make it |
fields in the regex data block and the study block. This is to make it |
| 1441 |
possible to test PCRE's handling of byte-flipped patterns, e.g. those |
possible to test PCRE's handling of byte-flipped patterns, e.g. those |
| 1468 |
{ |
{ |
| 1469 |
pcre_study_data *rsd = (pcre_study_data *)(extra->study_data); |
pcre_study_data *rsd = (pcre_study_data *)(extra->study_data); |
| 1470 |
rsd->size = byteflip(rsd->size, sizeof(rsd->size)); |
rsd->size = byteflip(rsd->size, sizeof(rsd->size)); |
| 1471 |
rsd->options = byteflip(rsd->options, sizeof(rsd->options)); |
rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags)); |
| 1472 |
|
rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength)); |
| 1473 |
} |
} |
| 1474 |
} |
} |
| 1475 |
|
|
| 1483 |
pcre_printint(re, outfile, debug_lengths); |
pcre_printint(re, outfile, debug_lengths); |
| 1484 |
} |
} |
| 1485 |
|
|
| 1486 |
|
/* We already have the options in get_options (see above) */ |
| 1487 |
|
|
| 1488 |
if (do_showinfo) |
if (do_showinfo) |
| 1489 |
{ |
{ |
| 1490 |
unsigned long int get_options, all_options; |
unsigned long int all_options; |
| 1491 |
#if !defined NOINFOCHECK |
#if !defined NOINFOCHECK |
| 1492 |
int old_first_char, old_options, old_count; |
int old_first_char, old_options, old_count; |
| 1493 |
#endif |
#endif |
| 1496 |
int nameentrysize, namecount; |
int nameentrysize, namecount; |
| 1497 |
const uschar *nametable; |
const uschar *nametable; |
| 1498 |
|
|
|
new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options); |
|
| 1499 |
new_info(re, NULL, PCRE_INFO_SIZE, &size); |
new_info(re, NULL, PCRE_INFO_SIZE, &size); |
| 1500 |
new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count); |
new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count); |
| 1501 |
new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax); |
new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax); |
| 1646 |
else |
else |
| 1647 |
{ |
{ |
| 1648 |
uschar *start_bits = NULL; |
uschar *start_bits = NULL; |
| 1649 |
new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits); |
int minlength; |
| 1650 |
|
|
| 1651 |
|
new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength); |
| 1652 |
|
fprintf(outfile, "Subject length lower bound = %d\n", minlength); |
| 1653 |
|
|
| 1654 |
|
new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits); |
| 1655 |
if (start_bits == NULL) |
if (start_bits == NULL) |
| 1656 |
fprintf(outfile, "No starting byte set\n"); |
fprintf(outfile, "No set of starting bytes\n"); |
| 1657 |
else |
else |
| 1658 |
{ |
{ |
| 1659 |
int i; |
int i; |
| 1992 |
continue; |
continue; |
| 1993 |
|
|
| 1994 |
case 'N': |
case 'N': |
| 1995 |
options |= PCRE_NOTEMPTY; |
if ((options & PCRE_NOTEMPTY) != 0) |
| 1996 |
|
options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART; |
| 1997 |
|
else |
| 1998 |
|
options |= PCRE_NOTEMPTY; |
| 1999 |
continue; |
continue; |
| 2000 |
|
|
| 2001 |
case 'O': |
case 'O': |
| 2018 |
continue; |
continue; |
| 2019 |
|
|
| 2020 |
case 'P': |
case 'P': |
| 2021 |
options |= PCRE_PARTIAL; |
options |= ((options & PCRE_PARTIAL_SOFT) == 0)? |
| 2022 |
|
PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD; |
| 2023 |
continue; |
continue; |
| 2024 |
|
|
| 2025 |
case 'Q': |
case 'Q': |
| 2053 |
case 'S': |
case 'S': |
| 2054 |
show_malloc = 1; |
show_malloc = 1; |
| 2055 |
continue; |
continue; |
| 2056 |
|
|
| 2057 |
case 'Y': |
case 'Y': |
| 2058 |
options |= PCRE_NO_START_OPTIMIZE; |
options |= PCRE_NO_START_OPTIMIZE; |
| 2059 |
continue; |
continue; |
| 2060 |
|
|
| 2061 |
case 'Z': |
case 'Z': |
| 2062 |
options |= PCRE_NOTEOL; |
options |= PCRE_NOTEOL; |
| 2116 |
pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets); |
pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets); |
| 2117 |
if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL; |
if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL; |
| 2118 |
if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL; |
if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL; |
| 2119 |
if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY; |
if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY; |
| 2120 |
|
|
| 2121 |
rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags); |
rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags); |
| 2122 |
|
|
| 2161 |
|
|
| 2162 |
for (;; gmatched++) /* Loop for /g or /G */ |
for (;; gmatched++) /* Loop for /g or /G */ |
| 2163 |
{ |
{ |
| 2164 |
|
markptr = NULL; |
| 2165 |
|
|
| 2166 |
if (timeitm > 0) |
if (timeitm > 0) |
| 2167 |
{ |
{ |
| 2168 |
register int i; |
register int i; |
| 2174 |
{ |
{ |
| 2175 |
int workspace[1000]; |
int workspace[1000]; |
| 2176 |
for (i = 0; i < timeitm; i++) |
for (i = 0; i < timeitm; i++) |
| 2177 |
count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset, |
count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, |
| 2178 |
options | g_notempty, use_offsets, use_size_offsets, workspace, |
options | g_notempty, use_offsets, use_size_offsets, workspace, |
| 2179 |
sizeof(workspace)/sizeof(int)); |
sizeof(workspace)/sizeof(int)); |
| 2180 |
} |
} |
| 2237 |
else if (all_use_dfa || use_dfa) |
else if (all_use_dfa || use_dfa) |
| 2238 |
{ |
{ |
| 2239 |
int workspace[1000]; |
int workspace[1000]; |
| 2240 |
count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset, |
count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, |
| 2241 |
options | g_notempty, use_offsets, use_size_offsets, workspace, |
options | g_notempty, use_offsets, use_size_offsets, workspace, |
| 2242 |
sizeof(workspace)/sizeof(int)); |
sizeof(workspace)/sizeof(int)); |
| 2243 |
if (count == 0) |
if (count == 0) |
| 2308 |
} |
} |
| 2309 |
} |
} |
| 2310 |
|
|
| 2311 |
|
if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr); |
| 2312 |
|
|
| 2313 |
for (i = 0; i < 32; i++) |
for (i = 0; i < 32; i++) |
| 2314 |
{ |
{ |
| 2315 |
if ((copystrings & (1 << i)) != 0) |
if ((copystrings & (1 << i)) != 0) |
| 2393 |
|
|
| 2394 |
else if (count == PCRE_ERROR_PARTIAL) |
else if (count == PCRE_ERROR_PARTIAL) |
| 2395 |
{ |
{ |
| 2396 |
fprintf(outfile, "Partial match"); |
if (markptr == NULL) fprintf(outfile, "Partial match"); |
| 2397 |
#if !defined NODFA |
else fprintf(outfile, "Partial match, mark=%s", markptr); |
| 2398 |
if ((all_use_dfa || use_dfa) && use_size_offsets > 2) |
if (use_size_offsets > 1) |
| 2399 |
fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0], |
{ |
| 2400 |
bptr + use_offsets[0]); |
fprintf(outfile, ": "); |
| 2401 |
#endif |
pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0], |
| 2402 |
|
outfile); |
| 2403 |
|
} |
| 2404 |
fprintf(outfile, "\n"); |
fprintf(outfile, "\n"); |
| 2405 |
break; /* Out of the /g loop */ |
break; /* Out of the /g loop */ |
| 2406 |
} |
} |
| 2461 |
{ |
{ |
| 2462 |
if (count == PCRE_ERROR_NOMATCH) |
if (count == PCRE_ERROR_NOMATCH) |
| 2463 |
{ |
{ |
| 2464 |
if (gmatched == 0) fprintf(outfile, "No match\n"); |
if (gmatched == 0) |
| 2465 |
|
{ |
| 2466 |
|
if (markptr == NULL) fprintf(outfile, "No match\n"); |
| 2467 |
|
else fprintf(outfile, "No match, mark = %s\n", markptr); |
| 2468 |
|
} |
| 2469 |
} |
} |
| 2470 |
else fprintf(outfile, "Error %d\n", count); |
else fprintf(outfile, "Error %d\n", count); |
| 2471 |
break; /* Out of the /g loop */ |
break; /* Out of the /g loop */ |
| 2477 |
if (!do_g && !do_G) break; |
if (!do_g && !do_G) break; |
| 2478 |
|
|
| 2479 |
/* If we have matched an empty string, first check to see if we are at |
/* If we have matched an empty string, first check to see if we are at |
| 2480 |
the end of the subject. If so, the /g loop is over. Otherwise, mimic |
the end of the subject. If so, the /g loop is over. Otherwise, mimic what |
| 2481 |
what Perl's /g options does. This turns out to be rather cunning. First |
Perl's /g options does. This turns out to be rather cunning. First we set |
| 2482 |
we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the |
PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the |
| 2483 |
same point. If this fails (picked up above) we advance to the next |
same point. If this fails (picked up above) we advance to the next |
| 2484 |
character. */ |
character. */ |
| 2485 |
|
|
| 2488 |
if (use_offsets[0] == use_offsets[1]) |
if (use_offsets[0] == use_offsets[1]) |
| 2489 |
{ |
{ |
| 2490 |
if (use_offsets[0] == len) break; |
if (use_offsets[0] == len) break; |
| 2491 |
g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED; |
g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED; |
| 2492 |
} |
} |
| 2493 |
|
|
| 2494 |
/* For /g, update the start offset, leaving the rest alone */ |
/* For /g, update the start offset, leaving the rest alone */ |