/[pcre]/code/trunk/pcre_ucp_searchfuncs.c
ViewVC logotype

Contents of /code/trunk/pcre_ucp_searchfuncs.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 369 - (hide annotations) (download)
Sun Aug 24 16:53:47 2008 UTC (5 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 6614 byte(s)
Patch to reduce warnings from certain compilers.

1 ph10 351 ############################################################
2     ############################################################
3     ## As of PCRE 8.0 this file is OBSOLETE. A different way ##
4     ## of handling Unicode property data is now used. See the ##
5     ## maint/README document. ##
6     ## PH 02 July 2008 ##
7     ############################################################
8     ############################################################
9    
10 nigel 87 /*************************************************
11     * Perl-Compatible Regular Expressions *
12     *************************************************/
13    
14     /* PCRE is a library of functions to support regular expressions whose syntax
15     and semantics are as close as possible to those of the Perl 5 language.
16    
17     Written by Philip Hazel
18 ph10 305 Copyright (c) 1997-2008 University of Cambridge
19 nigel 87
20     -----------------------------------------------------------------------------
21     Redistribution and use in source and binary forms, with or without
22     modification, are permitted provided that the following conditions are met:
23    
24     * Redistributions of source code must retain the above copyright notice,
25     this list of conditions and the following disclaimer.
26    
27     * Redistributions in binary form must reproduce the above copyright
28     notice, this list of conditions and the following disclaimer in the
29     documentation and/or other materials provided with the distribution.
30    
31     * Neither the name of the University of Cambridge nor the names of its
32     contributors may be used to endorse or promote products derived from
33     this software without specific prior written permission.
34    
35     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
36     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
37     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
38     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
39     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
40     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
41     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
42     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
43     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
44     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
45     POSSIBILITY OF SUCH DAMAGE.
46     -----------------------------------------------------------------------------
47     */
48    
49    
50     /* This module contains code for searching the table of Unicode character
51     properties. */
52    
53 ph10 200 #ifdef HAVE_CONFIG_H
54 ph10 236 #include "config.h"
55 ph10 200 #endif
56 ph10 199
57 nigel 87 #include "pcre_internal.h"
58    
59     #include "ucp.h" /* Category definitions */
60     #include "ucpinternal.h" /* Internal table details */
61 ph10 97 #include "ucptable.h" /* The table itself */
62 nigel 87
63    
64     /* Table to translate from particular type value to the general value. */
65    
66 ph10 105 static const int ucp_gentype[] = {
67 nigel 87 ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
68     ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
69     ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
70     ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
71     ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
72     ucp_P, ucp_P, /* Ps, Po */
73     ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
74     ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
75     };
76    
77    
78    
79     /*************************************************
80     * Search table and return type *
81     *************************************************/
82    
83     /* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
84     character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
85    
86     Arguments:
87     c the character value
88     type_ptr the detailed character type is returned here
89     script_ptr the script is returned here
90    
91     Returns: the character type category
92     */
93    
94     int
95 nigel 91 _pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
96 nigel 87 {
97     int bot = 0;
98     int top = sizeof(ucp_table)/sizeof(cnode);
99     int mid;
100    
101     /* The table is searched using a binary chop. You might think that using
102     intermediate variables to hold some of the common expressions would speed
103     things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
104     makes things a lot slower. */
105    
106     for (;;)
107     {
108     if (top <= bot)
109     {
110     *type_ptr = ucp_Cn;
111     *script_ptr = ucp_Common;
112     return ucp_C;
113     }
114     mid = (bot + top) >> 1;
115     if (c == (ucp_table[mid].f0 & f0_charmask)) break;
116     if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
117     else
118     {
119     if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
120     c <= (ucp_table[mid].f0 & f0_charmask) +
121     (ucp_table[mid].f1 & f1_rangemask)) break;
122     bot = mid + 1;
123     }
124     }
125    
126     /* Found an entry in the table. Set the script and detailed type values, and
127     return the general type. */
128    
129     *script_ptr = (ucp_table[mid].f0 & f0_scriptmask) >> f0_scriptshift;
130     *type_ptr = (ucp_table[mid].f1 & f1_typemask) >> f1_typeshift;
131    
132     return ucp_gentype[*type_ptr];
133     }
134    
135    
136    
137     /*************************************************
138     * Search table and return other case *
139     *************************************************/
140    
141     /* If the given character is a letter, and there is another case for the
142     letter, return the other case. Otherwise, return -1.
143    
144     Arguments:
145     c the character value
146    
147 nigel 93 Returns: the other case or NOTACHAR if none
148 nigel 87 */
149    
150 nigel 93 unsigned int
151     _pcre_ucp_othercase(const unsigned int c)
152 nigel 87 {
153     int bot = 0;
154     int top = sizeof(ucp_table)/sizeof(cnode);
155     int mid, offset;
156    
157     /* The table is searched using a binary chop. You might think that using
158     intermediate variables to hold some of the common expressions would speed
159     things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
160     makes things a lot slower. */
161    
162     for (;;)
163     {
164 ph10 369 if (top <= bot) return (unsigned int)(-1);
165 nigel 87 mid = (bot + top) >> 1;
166     if (c == (ucp_table[mid].f0 & f0_charmask)) break;
167     if (c < (ucp_table[mid].f0 & f0_charmask)) top = mid;
168     else
169     {
170     if ((ucp_table[mid].f0 & f0_rangeflag) != 0 &&
171     c <= (ucp_table[mid].f0 & f0_charmask) +
172     (ucp_table[mid].f1 & f1_rangemask)) break;
173     bot = mid + 1;
174     }
175     }
176    
177 nigel 93 /* Found an entry in the table. Return NOTACHAR for a range entry. Otherwise
178     return the other case if there is one, else NOTACHAR. */
179 nigel 87
180 nigel 93 if ((ucp_table[mid].f0 & f0_rangeflag) != 0) return NOTACHAR;
181 nigel 87
182     offset = ucp_table[mid].f1 & f1_casemask;
183     if ((offset & f1_caseneg) != 0) offset |= f1_caseneg;
184 nigel 93 return (offset == 0)? NOTACHAR : c + offset;
185 nigel 87 }
186    
187    
188     /* End of pcre_ucp_searchfuncs.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12