/[pcre]/code/branches/pcre16/pcre_ucp_searchfuncs.c
ViewVC logotype

Contents of /code/branches/pcre16/pcre_ucp_searchfuncs.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 755 - (show annotations) (download)
Mon Nov 21 10:41:54 2011 UTC (2 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 6614 byte(s)
Created a new branch for the development of 16-bit support.

1 ############################################################
2 ############################################################
3 ## As of PCRE 8.0 this file is OBSOLETE. A different way ##
4 ## of handling Unicode property data is now used. See the ##
5 ## maint/README document. ##
6 ## PH 02 July 2008 ##
7 ############################################################
8 ############################################################
9
10 /*************************************************
11 * Perl-Compatible Regular Expressions *
12 *************************************************/
13
14 /* PCRE is a library of functions to support regular expressions whose syntax
15 and semantics are as close as possible to those of the Perl 5 language.
16
17 Written by Philip Hazel
18 Copyright (c) 1997-2008 University of Cambridge
19
20 -----------------------------------------------------------------------------
21 Redistribution and use in source and binary forms, with or without
22 modification, are permitted provided that the following conditions are met:
23
24 * Redistributions of source code must retain the above copyright notice,
25 this list of conditions and the following disclaimer.
26
27 * Redistributions in binary form must reproduce the above copyright
28 notice, this list of conditions and the following disclaimer in the
29 documentation and/or other materials provided with the distribution.
30
31 * Neither the name of the University of Cambridge nor the names of its
32 contributors may be used to endorse or promote products derived from
33 this software without specific prior written permission.
34
35 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
36 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
39 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
40 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
41 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
42 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
43 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
44 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
45 POSSIBILITY OF SUCH DAMAGE.
46 -----------------------------------------------------------------------------
47 */
48
49
50 /* This module contains code for searching the table of Unicode character
51 properties. */
52
53 #ifdef HAVE_CONFIG_H
54 #include "config.h"
55 #endif
56
57 #include "pcre_internal.h"
58
59 #include "ucp.h" /* Category definitions */
60 #include "ucpinternal.h" /* Internal table details */
61 #include "ucptable.h" /* The table itself */
62
63
64 /* Table to translate from particular type value to the general value. */
65
66 static const int ucp_gentype[] = {
67 ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
68 ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
69 ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
70 ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
71 ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
72 ucp_P, ucp_P, /* Ps, Po */
73 ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
74 ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
75 };
76
77
78
79 /*************************************************
80 * Search table and return type *
81 *************************************************/
82
83 /* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
84 character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
85
86 Arguments:
87 c the character value
88 type_ptr the detailed character type is returned here
89 script_ptr the script is returned here
90
91 Returns: the character type category
92 */
93
94 int
95 _pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
96 {
97 int bot = 0;
98 int top = sizeof(ucp_table)/sizeof(cnode);
99 int mid;
100
101 /* The table is searched using a binary chop. You might think that using
102 intermediate variables to hold some of the common expressions would speed
103 things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
104 makes things a lot slower. */
105
106 for (;;)
107 {
108 if (top <= bot)
109 {
110 *type_ptr = ucp_Cn;
111 *script_ptr = ucp_Common;
112 return ucp_C;
113 }
114 mid = (bot + top) >> 1;
115 if (c == (ucp_table[mid].f0 & f0_charmask)) break;
116 if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
117 else
118 {
119 if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
120 c <= (ucp_table[mid].f0 & f0_charmask) +
121 (ucp_table[mid].f1 & f1_rangemask)) break;
122 bot = mid + 1;
123 }
124 }
125
126 /* Found an entry in the table. Set the script and detailed type values, and
127 return the general type. */
128
129 *script_ptr = (ucp_table[mid].f0 & f0_scriptmask) >> f0_scriptshift;
130 *type_ptr = (ucp_table[mid].f1 & f1_typemask) >> f1_typeshift;
131
132 return ucp_gentype[*type_ptr];
133 }
134
135
136
137 /*************************************************
138 * Search table and return other case *
139 *************************************************/
140
141 /* If the given character is a letter, and there is another case for the
142 letter, return the other case. Otherwise, return -1.
143
144 Arguments:
145 c the character value
146
147 Returns: the other case or NOTACHAR if none
148 */
149
150 unsigned int
151 _pcre_ucp_othercase(const unsigned int c)
152 {
153 int bot = 0;
154 int top = sizeof(ucp_table)/sizeof(cnode);
155 int mid, offset;
156
157 /* The table is searched using a binary chop. You might think that using
158 intermediate variables to hold some of the common expressions would speed
159 things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
160 makes things a lot slower. */
161
162 for (;;)
163 {
164 if (top <= bot) return (unsigned int)(-1);
165 mid = (bot + top) >> 1;
166 if (c == (ucp_table[mid].f0 & f0_charmask)) break;
167 if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
168 else
169 {
170 if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
171 c <= (ucp_table[mid].f0 & f0_charmask) +
172 (ucp_table[mid].f1 & f1_rangemask)) break;
173 bot = mid + 1;
174 }
175 }
176
177 /* Found an entry in the table. Return NOTACHAR for a range entry. Otherwise
178 return the other case if there is one, else NOTACHAR. */
179
180 if ((ucp_table[mid].f0 & f0_rangeflag) != 0) return NOTACHAR;
181
182 offset = ucp_table[mid].f1 & f1_casemask;
183 if ((offset & f1_caseneg) != 0) offset |= f1_caseneg;
184 return (offset == 0)? NOTACHAR : c + offset;
185 }
186
187
188 /* End of pcre_ucp_searchfuncs.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12