/[pcre]/code/trunk/pcre_xclass.c
ViewVC logotype

Contents of /code/trunk/pcre_xclass.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1414 - (hide annotations) (download)
Sun Dec 22 16:27:35 2013 UTC (8 months, 1 week ago) by zherczeg
File MIME type: text/plain
File size: 8231 byte(s)
A new flag is set, when property checks are present in an XCLASS.
1 nigel 77 /*************************************************
2     * Perl-Compatible Regular Expressions *
3     *************************************************/
4    
5     /* PCRE is a library of functions to support regular expressions whose syntax
6     and semantics are as close as possible to those of the Perl 5 language.
7    
8     Written by Philip Hazel
9 ph10 1260 Copyright (c) 1997-2013 University of Cambridge
10 nigel 77
11     -----------------------------------------------------------------------------
12     Redistribution and use in source and binary forms, with or without
13     modification, are permitted provided that the following conditions are met:
14    
15     * Redistributions of source code must retain the above copyright notice,
16     this list of conditions and the following disclaimer.
17    
18     * Redistributions in binary form must reproduce the above copyright
19     notice, this list of conditions and the following disclaimer in the
20     documentation and/or other materials provided with the distribution.
21    
22     * Neither the name of the University of Cambridge nor the names of its
23     contributors may be used to endorse or promote products derived from
24     this software without specific prior written permission.
25    
26     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36     POSSIBILITY OF SUCH DAMAGE.
37     -----------------------------------------------------------------------------
38     */
39    
40    
41     /* This module contains an internal function that is used to match an extended
42 ph10 384 class. It is used by both pcre_exec() and pcre_def_exec(). */
43 nigel 77
44    
45 ph10 200 #ifdef HAVE_CONFIG_H
46 ph10 236 #include "config.h"
47 ph10 200 #endif
48 ph10 199
49 nigel 77 #include "pcre_internal.h"
50    
51    
52     /*************************************************
53     * Match character against an XCLASS *
54     *************************************************/
55    
56     /* This function is called to match a character against an extended class that
57 ph10 384 might contain values > 255 and/or Unicode properties.
58 nigel 77
59     Arguments:
60     c the character
61     data points to the flag byte of the XCLASS data
62    
63     Returns: TRUE if character matches, else FALSE
64     */
65    
66 nigel 87 BOOL
67 chpe 1057 PRIV(xclass)(pcre_uint32 c, const pcre_uchar *data, BOOL utf)
68 nigel 77 {
69 chpe 1057 pcre_uchar t;
70 nigel 77 BOOL negated = (*data & XCL_NOT) != 0;
71    
72 ph10 836 (void)utf;
73     #ifdef COMPILE_PCRE8
74     /* In 8 bit mode, this must always be TRUE. Help the compiler to know that. */
75     utf = TRUE;
76     #endif
77    
78 nigel 77 /* Character values < 256 are matched against a bitmap, if one is present. If
79     not, we still carry on, because there may be ranges that start below 256 in the
80     additional data. */
81    
82     if (c < 256)
83     {
84 zherczeg 1414 if ((*data & XCL_HASPROP) == 0)
85     {
86     if ((*data & XCL_MAP) == 0) return negated;
87     return (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0;
88     }
89 ph10 836 if ((*data & XCL_MAP) != 0 &&
90     (((pcre_uint8 *)(data + 1))[c/8] & (1 << (c&7))) != 0)
91     return !negated; /* char found */
92 nigel 77 }
93    
94     /* First skip the bit map if present. Then match against the list of Unicode
95     properties or large chars or ranges that end with a large char. We won't ever
96     encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
97    
98 ph10 836 if ((*data++ & XCL_MAP) != 0) data += 32 / sizeof(pcre_uchar);
99 nigel 77
100     while ((t = *data++) != XCL_END)
101     {
102 chpe 1057 pcre_uint32 x, y;
103 nigel 77 if (t == XCL_SINGLE)
104     {
105 ph10 836 #ifdef SUPPORT_UTF
106     if (utf)
107     {
108     GETCHARINC(x, data); /* macro generates multiple statements */
109     }
110     else
111     #endif
112     x = *data++;
113 nigel 77 if (c == x) return !negated;
114     }
115     else if (t == XCL_RANGE)
116     {
117 ph10 836 #ifdef SUPPORT_UTF
118     if (utf)
119     {
120     GETCHARINC(x, data); /* macro generates multiple statements */
121     GETCHARINC(y, data); /* macro generates multiple statements */
122     }
123     else
124     #endif
125     {
126     x = *data++;
127     y = *data++;
128     }
129 nigel 77 if (c >= x && c <= y) return !negated;
130     }
131    
132     #ifdef SUPPORT_UCP
133     else /* XCL_PROP & XCL_NOTPROP */
134     {
135 ph10 384 const ucd_record *prop = GET_UCD(c);
136 ph10 1404 BOOL isprop = t == XCL_PROP;
137 nigel 87
138     switch(*data)
139 nigel 77 {
140 nigel 87 case PT_ANY:
141 ph10 1387 if (isprop) return !negated;
142 nigel 87 break;
143    
144     case PT_LAMP:
145 ph10 535 if ((prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
146 ph10 1387 prop->chartype == ucp_Lt) == isprop) return !negated;
147 nigel 87 break;
148    
149     case PT_GC:
150 ph10 1387 if ((data[1] == PRIV(ucp_gentype)[prop->chartype]) == isprop)
151 ph10 517 return !negated;
152 nigel 87 break;
153    
154     case PT_PC:
155 ph10 1387 if ((data[1] == prop->chartype) == isprop) return !negated;
156 nigel 87 break;
157    
158     case PT_SC:
159 ph10 1387 if ((data[1] == prop->script) == isprop) return !negated;
160 nigel 87 break;
161 ph10 535
162 ph10 517 case PT_ALNUM:
163 ph10 836 if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
164 ph10 1387 PRIV(ucp_gentype)[prop->chartype] == ucp_N) == isprop)
165 ph10 517 return !negated;
166 ph10 535 break;
167    
168 ph10 1364 /* Perl space used to exclude VT, but from Perl 5.18 it is included,
169     which means that Perl space and POSIX space are now identical. PCRE
170     was changed at release 8.34. */
171 ph10 1404
172 ph10 517 case PT_SPACE: /* Perl space */
173     case PT_PXSPACE: /* POSIX space */
174 ph10 1376 switch(c)
175     {
176     HSPACE_CASES:
177     VSPACE_CASES:
178 ph10 1404 if (isprop) return !negated;
179 ph10 1376 break;
180 ph10 1404
181 ph10 1376 default:
182 ph10 1387 if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == isprop)
183 ph10 1376 return !negated;
184     break;
185     }
186 ph10 535 break;
187 ph10 517
188 ph10 535 case PT_WORD:
189 ph10 836 if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
190     PRIV(ucp_gentype)[prop->chartype] == ucp_N || c == CHAR_UNDERSCORE)
191 ph10 1387 == isprop)
192 ph10 517 return !negated;
193 ph10 535 break;
194 ph10 1335
195 ph10 1260 case PT_UCNC:
196 ph10 1335 if (c < 0xa0)
197     {
198     if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
199 ph10 1387 c == CHAR_GRAVE_ACCENT) == isprop)
200 ph10 1260 return !negated;
201 ph10 1335 }
202     else
203     {
204 ph10 1387 if ((c < 0xd800 || c > 0xdfff) == isprop)
205 ph10 1260 return !negated;
206 ph10 1335 }
207     break;
208 ph10 1404
209 ph10 1387 /* The following three properties can occur only in an XCLASS, as there
210     is no \p or \P coding for them. */
211 ph10 517
212 ph10 1404 /* Graphic character. Implement this as not Z (space or separator) and
213     not C (other), except for Cf (format) with a few exceptions. This seems
214 ph10 1387 to be what Perl does. The exceptional characters are:
215 ph10 1404
216 ph10 1387 U+061C Arabic Letter Mark
217 ph10 1404 U+180E Mongolian Vowel Separator
218 ph10 1387 U+2066 - U+2069 Various "isolate"s
219 ph10 1404 */
220    
221 ph10 1387 case PT_PXGRAPH:
222     if ((PRIV(ucp_gentype)[prop->chartype] != ucp_Z &&
223     (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
224 ph10 1404 (prop->chartype == ucp_Cf &&
225 ph10 1387 c != 0x061c && c != 0x180e && (c < 0x2066 || c > 0x2069))
226     )) == isprop)
227 ph10 1404 return !negated;
228 ph10 1387 break;
229 ph10 1404
230     /* Printable character: same as graphic, with the addition of Zs, i.e.
231 ph10 1387 not Zl and not Zp, and U+180E. */
232    
233     case PT_PXPRINT:
234     if ((prop->chartype != ucp_Zl &&
235 ph10 1404 prop->chartype != ucp_Zp &&
236 ph10 1387 (PRIV(ucp_gentype)[prop->chartype] != ucp_C ||
237 ph10 1404 (prop->chartype == ucp_Cf &&
238 ph10 1387 c != 0x061c && (c < 0x2066 || c > 0x2069))
239     )) == isprop)
240 ph10 1404 return !negated;
241 ph10 1387 break;
242 ph10 1404
243     /* Punctuation: all Unicode punctuation, plus ASCII characters that
244 ph10 1387 Unicode treats as symbols rather than punctuation, for Perl
245     compatibility (these are $+<=>^`|~). */
246    
247     case PT_PXPUNCT:
248     if ((PRIV(ucp_gentype)[prop->chartype] == ucp_P ||
249     (c < 256 && PRIV(ucp_gentype)[prop->chartype] == ucp_S)) == isprop)
250     return !negated;
251 ph10 1404 break;
252 ph10 1387
253 nigel 87 /* This should never occur, but compilers may mutter if there is no
254     default. */
255    
256     default:
257     return FALSE;
258 nigel 77 }
259 nigel 87
260     data += 2;
261 nigel 77 }
262     #endif /* SUPPORT_UCP */
263     }
264    
265     return negated; /* char did not match */
266     }
267    
268     /* End of pcre_xclass.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12