| 7 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
| 8 |
|
|
| 9 |
Written by Philip Hazel |
Written by Philip Hazel |
| 10 |
Copyright (c) 1997-2006 University of Cambridge |
Copyright (c) 1997-2007 University of Cambridge |
| 11 |
|
|
| 12 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 13 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
| 83 |
#include <stdlib.h> |
#include <stdlib.h> |
| 84 |
#include <string.h> |
#include <string.h> |
| 85 |
|
|
| 86 |
#ifndef PCRE_SPY |
/* When compiling a DLL for Windows, the exported symbols have to be declared |
| 87 |
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ |
using some MS magic. I found some useful information on this web page: |
| 88 |
|
http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the |
| 89 |
|
information there, using __declspec(dllexport) without "extern" we have a |
| 90 |
|
definition; with "extern" we have a declaration. The settings here override the |
| 91 |
|
setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL, |
| 92 |
|
which is all that is needed for applications (they just import the symbols). We |
| 93 |
|
use: |
| 94 |
|
|
| 95 |
|
PCRE_EXP_DECL for declarations |
| 96 |
|
PCRE_EXP_DEFN for definitions of exported functions |
| 97 |
|
PCRE_EXP_DATA_DEFN for definitions of exported variables |
| 98 |
|
|
| 99 |
|
The reason for the two DEFN macros is that in non-Windows environments, one |
| 100 |
|
does not want to have "extern" before variable definitions because it leads to |
| 101 |
|
compiler warnings. So we distinguish between functions and variables. In |
| 102 |
|
Windows, the two should always be the same. |
| 103 |
|
|
| 104 |
|
The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest, |
| 105 |
|
which is an application, but needs to import this file in order to "peek" at |
| 106 |
|
internals, can #include pcre.h first to get an application's-eye view. |
| 107 |
|
|
| 108 |
|
In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon, |
| 109 |
|
special-purpose environments) might want to stick other stuff in front of |
| 110 |
|
exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and |
| 111 |
|
PCRE_EXP_DATA_DEFN only if they are not already set. */ |
| 112 |
|
|
| 113 |
|
#ifndef PCRE_EXP_DECL |
| 114 |
|
# ifdef _WIN32 |
| 115 |
|
# ifdef DLL_EXPORT |
| 116 |
|
# define PCRE_EXP_DECL extern __declspec(dllexport) |
| 117 |
|
# define PCRE_EXP_DEFN __declspec(dllexport) |
| 118 |
|
# define PCRE_EXP_DATA_DEFN __declspec(dllexport) |
| 119 |
|
# else |
| 120 |
|
# define PCRE_EXP_DECL extern |
| 121 |
|
# define PCRE_EXP_DEFN |
| 122 |
|
# define PCRE_EXP_DATA_DEFN |
| 123 |
|
# endif |
| 124 |
|
# |
| 125 |
|
# else |
| 126 |
|
# ifdef __cplusplus |
| 127 |
|
# define PCRE_EXP_DECL extern "C" |
| 128 |
|
# else |
| 129 |
|
# define PCRE_EXP_DECL extern |
| 130 |
|
# endif |
| 131 |
|
# ifndef PCRE_EXP_DEFN |
| 132 |
|
# define PCRE_EXP_DEFN PCRE_EXP_DECL |
| 133 |
|
# endif |
| 134 |
|
# ifndef PCRE_EXP_DATA_DEFN |
| 135 |
|
# define PCRE_EXP_DATA_DEFN |
| 136 |
|
# endif |
| 137 |
|
# endif |
| 138 |
#endif |
#endif |
| 139 |
|
|
| 140 |
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We |
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We |
| 175 |
#define NOTACHAR 0xffffffff |
#define NOTACHAR 0xffffffff |
| 176 |
|
|
| 177 |
/* PCRE is able to support several different kinds of newline (CR, LF, CRLF, |
/* PCRE is able to support several different kinds of newline (CR, LF, CRLF, |
| 178 |
and "all" at present). The following macros are used to package up testing for |
"any" and "anycrlf" at present). The following macros are used to package up |
| 179 |
newlines. NLBLOCK, PSSTART, and PSEND are defined in the various modules to |
testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various |
| 180 |
indicate in which datablock the parameters exist, and what the start/end of |
modules to indicate in which datablock the parameters exist, and what the |
| 181 |
string field names are. */ |
start/end of string field names are. */ |
| 182 |
|
|
| 183 |
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */ |
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */ |
| 184 |
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */ |
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */ |
| 185 |
|
#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */ |
| 186 |
|
|
| 187 |
/* This macro checks for a newline at the given position */ |
/* This macro checks for a newline at the given position */ |
| 188 |
|
|
| 189 |
#define IS_NEWLINE(p) \ |
#define IS_NEWLINE(p) \ |
| 190 |
((NLBLOCK->nltype != NLTYPE_FIXED)? \ |
((NLBLOCK->nltype != NLTYPE_FIXED)? \ |
| 191 |
((p) < NLBLOCK->PSEND && \ |
((p) < NLBLOCK->PSEND && \ |
| 192 |
_pcre_is_newline((p), NLBLOCK->PSEND, &(NLBLOCK->nllen), utf8) \ |
_pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\ |
| 193 |
) \ |
utf8)) \ |
| 194 |
: \ |
: \ |
| 195 |
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ |
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ |
| 196 |
(p)[0] == NLBLOCK->nl[0] && \ |
(p)[0] == NLBLOCK->nl[0] && \ |
| 203 |
#define WAS_NEWLINE(p) \ |
#define WAS_NEWLINE(p) \ |
| 204 |
((NLBLOCK->nltype != NLTYPE_FIXED)? \ |
((NLBLOCK->nltype != NLTYPE_FIXED)? \ |
| 205 |
((p) > NLBLOCK->PSSTART && \ |
((p) > NLBLOCK->PSSTART && \ |
| 206 |
_pcre_was_newline((p), NLBLOCK->PSSTART, &(NLBLOCK->nllen), utf8) \ |
_pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ |
| 207 |
) \ |
&(NLBLOCK->nllen), utf8)) \ |
| 208 |
: \ |
: \ |
| 209 |
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ |
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ |
| 210 |
(p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \ |
(p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \ |
| 229 |
#define USPTR const unsigned char * |
#define USPTR const unsigned char * |
| 230 |
#endif |
#endif |
| 231 |
|
|
| 232 |
|
|
| 233 |
|
|
| 234 |
/* Include the public PCRE header and the definitions of UCP character property |
/* Include the public PCRE header and the definitions of UCP character property |
| 235 |
values. */ |
values. */ |
| 236 |
|
|
| 237 |
#include "pcre.h" |
#include <pcre.h> |
| 238 |
#include "ucp.h" |
#include "ucp.h" |
| 239 |
|
|
| 240 |
/* When compiling for use with the Virtual Pascal compiler, these functions |
/* When compiling for use with the Virtual Pascal compiler, these functions |
| 242 |
option on the command line. */ |
option on the command line. */ |
| 243 |
|
|
| 244 |
#ifdef VPCOMPAT |
#ifdef VPCOMPAT |
| 245 |
|
#define strlen(s) _strlen(s) |
| 246 |
#define strncmp(s1,s2,m) _strncmp(s1,s2,m) |
#define strncmp(s1,s2,m) _strncmp(s1,s2,m) |
| 247 |
|
#define memcmp(s,c,n) _memcmp(s,c,n) |
| 248 |
#define memcpy(d,s,n) _memcpy(d,s,n) |
#define memcpy(d,s,n) _memcpy(d,s,n) |
| 249 |
#define memmove(d,s,n) _memmove(d,s,n) |
#define memmove(d,s,n) _memmove(d,s,n) |
| 250 |
#define memset(s,c,n) _memset(s,c,n) |
#define memset(s,c,n) _memset(s,c,n) |
| 253 |
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), |
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), |
| 254 |
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY |
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY |
| 255 |
is set. Otherwise, include an emulating function for those systems that have |
is set. Otherwise, include an emulating function for those systems that have |
| 256 |
neither (there some non-Unix environments where this is the case). This assumes |
neither (there some non-Unix environments where this is the case). */ |
|
that all calls to memmove are moving strings upwards in store, which is the |
|
|
case in PCRE. */ |
|
| 257 |
|
|
| 258 |
#if ! HAVE_MEMMOVE |
#ifndef HAVE_MEMMOVE |
| 259 |
#undef memmove /* some systems may have a macro */ |
#undef memmove /* some systems may have a macro */ |
| 260 |
#if HAVE_BCOPY |
#ifdef HAVE_BCOPY |
| 261 |
#define memmove(a, b, c) bcopy(b, a, c) |
#define memmove(a, b, c) bcopy(b, a, c) |
| 262 |
#else /* HAVE_BCOPY */ |
#else /* HAVE_BCOPY */ |
| 263 |
static void * |
static void * |
| 264 |
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n) |
pcre_memmove(void *d, const void *s, size_t n) |
| 265 |
{ |
{ |
| 266 |
size_t i; |
size_t i; |
| 267 |
dest += n; |
unsigned char *dest = (unsigned char *)d; |
| 268 |
src += n; |
const unsigned char *src = (const unsigned char *)s; |
| 269 |
for (i = 0; i < n; ++i) *(--dest) = *(--src); |
if (dest > src) |
| 270 |
return dest; |
{ |
| 271 |
|
dest += n; |
| 272 |
|
src += n; |
| 273 |
|
for (i = 0; i < n; ++i) *(--dest) = *(--src); |
| 274 |
|
return (void *)dest; |
| 275 |
|
} |
| 276 |
|
else |
| 277 |
|
{ |
| 278 |
|
for (i = 0; i < n; ++i) *dest++ = *src++; |
| 279 |
|
return (void *)(dest - n); |
| 280 |
|
} |
| 281 |
} |
} |
| 282 |
#define memmove(a, b, c) pcre_memmove(a, b, c) |
#define memmove(a, b, c) pcre_memmove(a, b, c) |
| 283 |
#endif /* not HAVE_BCOPY */ |
#endif /* not HAVE_BCOPY */ |
| 502 |
/* Masks for identifying the public options that are permitted at compile |
/* Masks for identifying the public options that are permitted at compile |
| 503 |
time, run time, or study time, respectively. */ |
time, run time, or study time, respectively. */ |
| 504 |
|
|
| 505 |
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY) |
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \ |
| 506 |
|
PCRE_NEWLINE_ANYCRLF) |
| 507 |
|
|
| 508 |
#define PUBLIC_OPTIONS \ |
#define PUBLIC_OPTIONS \ |
| 509 |
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ |
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ |
| 1090 |
one of the exported public functions. They have to be "external" in the C |
one of the exported public functions. They have to be "external" in the C |
| 1091 |
sense, but are not part of the PCRE public API. */ |
sense, but are not part of the PCRE public API. */ |
| 1092 |
|
|
| 1093 |
extern BOOL _pcre_is_newline(const uschar *, const uschar *, int *, |
extern BOOL _pcre_is_newline(const uschar *, int, const uschar *, |
| 1094 |
BOOL); |
int *, BOOL); |
| 1095 |
extern int _pcre_ord2utf8(int, uschar *); |
extern int _pcre_ord2utf8(int, uschar *); |
| 1096 |
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *, |
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *, |
| 1097 |
const pcre_study_data *, pcre_study_data *); |
const pcre_study_data *, pcre_study_data *); |
| 1098 |
extern int _pcre_ucp_findprop(const unsigned int, int *, int *); |
extern int _pcre_ucp_findprop(const unsigned int, int *, int *); |
| 1099 |
extern unsigned int _pcre_ucp_othercase(const unsigned int); |
extern unsigned int _pcre_ucp_othercase(const unsigned int); |
| 1100 |
extern int _pcre_valid_utf8(const uschar *, int); |
extern int _pcre_valid_utf8(const uschar *, int); |
| 1101 |
extern BOOL _pcre_was_newline(const uschar *, const uschar *, int *, |
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *, |
| 1102 |
BOOL); |
int *, BOOL); |
| 1103 |
extern BOOL _pcre_xclass(int, const uschar *); |
extern BOOL _pcre_xclass(int, const uschar *); |
| 1104 |
|
|
| 1105 |
#endif |
#endif |