| 42 |
one kind of newline is to be recognized. When a newline is found, its length is |
one kind of newline is to be recognized. When a newline is found, its length is |
| 43 |
returned. In principle, we could implement several newline "types", each |
returned. In principle, we could implement several newline "types", each |
| 44 |
referring to a different set of newline characters. At present, PCRE supports |
referring to a different set of newline characters. At present, PCRE supports |
| 45 |
only NLTYPE_FIXED, which gets handled without these functions, and NLTYPE_ALL, |
only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF, |
| 46 |
so for now the type isn't passed into the functions. It can easily be added |
and NLTYPE_ANY. The full list of Unicode newline characters is taken from |
|
later if required. The full list of Unicode newline characters is taken from |
|
| 47 |
http://unicode.org/unicode/reports/tr18/. */ |
http://unicode.org/unicode/reports/tr18/. */ |
| 48 |
|
|
| 49 |
|
|
| 50 |
|
#ifdef HAVE_CONFIG_H |
| 51 |
|
#include <config.h> |
| 52 |
|
#endif |
| 53 |
|
|
| 54 |
#include "pcre_internal.h" |
#include "pcre_internal.h" |
| 55 |
|
|
| 56 |
|
|
| 64 |
|
|
| 65 |
Arguments: |
Arguments: |
| 66 |
ptr pointer to possible newline |
ptr pointer to possible newline |
| 67 |
|
type the newline type |
| 68 |
endptr pointer to the end of the string |
endptr pointer to the end of the string |
| 69 |
lenptr where to return the length |
lenptr where to return the length |
| 70 |
utf8 TRUE if in utf8 mode |
utf8 TRUE if in utf8 mode |
| 73 |
*/ |
*/ |
| 74 |
|
|
| 75 |
BOOL |
BOOL |
| 76 |
_pcre_is_newline(const uschar *ptr, const uschar *endptr, int *lenptr, |
_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr, |
| 77 |
BOOL utf8) |
int *lenptr, BOOL utf8) |
| 78 |
{ |
{ |
| 79 |
int c; |
int c; |
| 80 |
if (utf8) { GETCHAR(c, ptr); } else c = *ptr; |
if (utf8) { GETCHAR(c, ptr); } else c = *ptr; |
| 81 |
switch(c) |
|
| 82 |
|
if (type == NLTYPE_ANYCRLF) switch(c) |
| 83 |
|
{ |
| 84 |
|
case 0x000a: *lenptr = 1; return TRUE; /* LF */ |
| 85 |
|
case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1; |
| 86 |
|
return TRUE; /* CR */ |
| 87 |
|
default: return FALSE; |
| 88 |
|
} |
| 89 |
|
|
| 90 |
|
/* NLTYPE_ANY */ |
| 91 |
|
|
| 92 |
|
else switch(c) |
| 93 |
{ |
{ |
| 94 |
case 0x000a: /* LF */ |
case 0x000a: /* LF */ |
| 95 |
case 0x000b: /* VT */ |
case 0x000b: /* VT */ |
| 114 |
|
|
| 115 |
Arguments: |
Arguments: |
| 116 |
ptr pointer to possible newline |
ptr pointer to possible newline |
| 117 |
|
type the newline type |
| 118 |
startptr pointer to the start of the string |
startptr pointer to the start of the string |
| 119 |
lenptr where to return the length |
lenptr where to return the length |
| 120 |
utf8 TRUE if in utf8 mode |
utf8 TRUE if in utf8 mode |
| 123 |
*/ |
*/ |
| 124 |
|
|
| 125 |
BOOL |
BOOL |
| 126 |
_pcre_was_newline(const uschar *ptr, const uschar *startptr, int *lenptr, |
_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr, |
| 127 |
BOOL utf8) |
int *lenptr, BOOL utf8) |
| 128 |
{ |
{ |
| 129 |
int c; |
int c; |
| 130 |
ptr--; |
ptr--; |
| 134 |
GETCHAR(c, ptr); |
GETCHAR(c, ptr); |
| 135 |
} |
} |
| 136 |
else c = *ptr; |
else c = *ptr; |
| 137 |
switch(c) |
|
| 138 |
|
if (type == NLTYPE_ANYCRLF) switch(c) |
| 139 |
|
{ |
| 140 |
|
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; |
| 141 |
|
return TRUE; /* LF */ |
| 142 |
|
case 0x000d: *lenptr = 1; return TRUE; /* CR */ |
| 143 |
|
default: return FALSE; |
| 144 |
|
} |
| 145 |
|
|
| 146 |
|
else switch(c) |
| 147 |
{ |
{ |
| 148 |
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; |
case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1; |
| 149 |
return TRUE; /* LF */ |
return TRUE; /* LF */ |