| 57 |
// Special object that stands-in for no argument |
// Special object that stands-in for no argument |
| 58 |
Arg RE::no_arg((void*)NULL); |
Arg RE::no_arg((void*)NULL); |
| 59 |
|
|
| 60 |
|
// This is for ABI compatibility with old versions of pcre (pre-7.6), |
| 61 |
|
// which defined a global no_arg variable instead of putting it in the |
| 62 |
|
// RE class. This works on GCC >= 3, at least. It definitely works |
| 63 |
|
// for ELF, but may not for other object formats (Mach-O, for |
| 64 |
|
// instance, does not support aliases.) We could probably have a more |
| 65 |
|
// inclusive test if we ever needed it. (Note that not only the |
| 66 |
|
// __attribute__ syntax, but also __USER_LABEL_PREFIX__, are |
| 67 |
|
// gnu-specific.) |
| 68 |
|
#if defined(__GNUC__) && __GNUC__ >= 3 && defined(__ELF__) |
| 69 |
|
# define ULP_AS_STRING(x) ULP_AS_STRING_INTERNAL(x) |
| 70 |
|
# define ULP_AS_STRING_INTERNAL(x) #x |
| 71 |
|
# define USER_LABEL_PREFIX_STR ULP_AS_STRING(__USER_LABEL_PREFIX__) |
| 72 |
|
extern Arg no_arg |
| 73 |
|
__attribute__((alias(USER_LABEL_PREFIX_STR "_ZN7pcrecpp2RE6no_argE"))); |
| 74 |
|
#endif |
| 75 |
|
|
| 76 |
// If a regular expression has no error, its error_ field points here |
// If a regular expression has no error, its error_ field points here |
| 77 |
static const string empty_string; |
static const string empty_string; |
| 78 |
|
|
| 457 |
// Note that it's legal to escape a character even if it has no |
// Note that it's legal to escape a character even if it has no |
| 458 |
// special meaning in a regular expression -- so this function does |
// special meaning in a regular expression -- so this function does |
| 459 |
// that. (This also makes it identical to the perl function of the |
// that. (This also makes it identical to the perl function of the |
| 460 |
// same name; see `perldoc -f quotemeta`.) |
// same name; see `perldoc -f quotemeta`.) The one exception is |
| 461 |
|
// escaping NUL: rather than doing backslash + NUL, like perl does, |
| 462 |
|
// we do '\0', because pcre itself doesn't take embedded NUL chars. |
| 463 |
for (int ii = 0; ii < unquoted.size(); ++ii) { |
for (int ii = 0; ii < unquoted.size(); ++ii) { |
| 464 |
// Note that using 'isalnum' here raises the benchmark time from |
// Note that using 'isalnum' here raises the benchmark time from |
| 465 |
// 32ns to 58ns: |
// 32ns to 58ns: |
| 466 |
if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && |
if (unquoted[ii] == '\0') { |
| 467 |
(unquoted[ii] < 'A' || unquoted[ii] > 'Z') && |
result += "\\0"; |
| 468 |
(unquoted[ii] < '0' || unquoted[ii] > '9') && |
} else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') && |
| 469 |
unquoted[ii] != '_' && |
(unquoted[ii] < 'A' || unquoted[ii] > 'Z') && |
| 470 |
// If this is the part of a UTF8 or Latin1 character, we need |
(unquoted[ii] < '0' || unquoted[ii] > '9') && |
| 471 |
// to copy this byte without escaping. Experimentally this is |
unquoted[ii] != '_' && |
| 472 |
// what works correctly with the regexp library. |
// If this is the part of a UTF8 or Latin1 character, we need |
| 473 |
!(unquoted[ii] & 128)) { |
// to copy this byte without escaping. Experimentally this is |
| 474 |
|
// what works correctly with the regexp library. |
| 475 |
|
!(unquoted[ii] & 128)) { |
| 476 |
result += '\\'; |
result += '\\'; |
| 477 |
|
result += unquoted[ii]; |
| 478 |
|
} else { |
| 479 |
|
result += unquoted[ii]; |
| 480 |
} |
} |
|
result += unquoted[ii]; |
|
| 481 |
} |
} |
| 482 |
|
|
| 483 |
return result; |
return result; |