/[pcre]/code/trunk/pcrecpp.cc
ViewVC logotype

Diff of /code/trunk/pcrecpp.cc

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 325 by ph10, Wed Mar 5 17:14:08 2008 UTC revision 326 by ph10, Sat Mar 8 17:24:02 2008 UTC
# Line 449  bool RE::Extract(const StringPiece& rewr Line 449  bool RE::Extract(const StringPiece& rewr
449    // Note that it's legal to escape a character even if it has no    // Note that it's legal to escape a character even if it has no
450    // special meaning in a regular expression -- so this function does    // special meaning in a regular expression -- so this function does
451    // that.  (This also makes it identical to the perl function of the    // that.  (This also makes it identical to the perl function of the
452    // same name; see `perldoc -f quotemeta`.)    // same name; see `perldoc -f quotemeta`.)  The one exception is
453      // escaping NUL: rather than doing backslash + NUL, like perl does,
454      // we do '\0', because pcre itself doesn't take embedded NUL chars.
455    for (int ii = 0; ii < unquoted.size(); ++ii) {    for (int ii = 0; ii < unquoted.size(); ++ii) {
456      // Note that using 'isalnum' here raises the benchmark time from      // Note that using 'isalnum' here raises the benchmark time from
457      // 32ns to 58ns:      // 32ns to 58ns:
458      if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&      if (unquoted[ii] == '\0') {
459          (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&        result += "\\0";
460          (unquoted[ii] < '0' || unquoted[ii] > '9') &&      } else if ((unquoted[ii] < 'a' || unquoted[ii] > 'z') &&
461          unquoted[ii] != '_' &&                 (unquoted[ii] < 'A' || unquoted[ii] > 'Z') &&
462          // If this is the part of a UTF8 or Latin1 character, we need                 (unquoted[ii] < '0' || unquoted[ii] > '9') &&
463          // to copy this byte without escaping.  Experimentally this is                 unquoted[ii] != '_' &&
464          // what works correctly with the regexp library.                 // If this is the part of a UTF8 or Latin1 character, we need
465          !(unquoted[ii] & 128)) {                 // to copy this byte without escaping.  Experimentally this is
466                   // what works correctly with the regexp library.
467                   !(unquoted[ii] & 128)) {
468        result += '\\';        result += '\\';
469          result += unquoted[ii];
470        } else {
471          result += unquoted[ii];
472      }      }
     result += unquoted[ii];  
473    }    }
474    
475    return result;    return result;

Legend:
Removed from v.325  
changed lines
  Added in v.326

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12