/[pcre]/code/trunk/pcre_tables.c
ViewVC logotype

Diff of /code/trunk/pcre_tables.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 200 by ph10, Wed Aug 1 09:10:40 2007 UTC revision 351 by ph10, Fri Jul 4 18:27:16 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 45  clashes with the library. */ Line 45  clashes with the library. */
45    
46    
47  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
48  #include <config.h>  #include "config.h"
49  #endif  #endif
50    
51  #include "pcre_internal.h"  #include "pcre_internal.h"
# Line 87  const uschar _pcre_utf8_table4[] = { Line 87  const uschar _pcre_utf8_table4[] = {
87    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
88    3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };    3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
89    
90  /* This table translates Unicode property names into type and code values. It  /* Table to translate from particular type value to the general value. */
91  is searched by binary chop, so must be in collating sequence of name. */  
92    const int _pcre_ucp_gentype[] = {
93      ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
94      ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
95      ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
96      ucp_N, ucp_N, ucp_N,                /* Nd, Nl, No */
97      ucp_P, ucp_P, ucp_P, ucp_P, ucp_P,  /* Pc, Pd, Pe, Pf, Pi */
98      ucp_P, ucp_P,                       /* Ps, Po */
99      ucp_S, ucp_S, ucp_S, ucp_S,         /* Sc, Sk, Sm, So */
100      ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
101    };
102    
103    /* The pcre_utt[] table below translates Unicode property names into type and
104    code values. It is searched by binary chop, so must be in collating sequence of
105    name. Originally, the table contained pointers to the name strings in the first
106    field of each entry. However, that leads to a large number of relocations when
107    a shared library is dynamically loaded. A significant reduction is made by
108    putting all the names into a single, large string and then using offsets in the
109    table itself. Maintenance is more error-prone, but frequent changes to this
110    data are unlikely.
111    
112    July 2008: There is now a script called maint/GenerateUtt.py which can be used
113    to generate this data instead of maintaining it entirely by hand. */
114    
115    const char _pcre_utt_names[] =
116      "Any\0"
117      "Arabic\0"
118      "Armenian\0"
119      "Balinese\0"
120      "Bengali\0"
121      "Bopomofo\0"
122      "Braille\0"
123      "Buginese\0"
124      "Buhid\0"
125      "C\0"
126      "Canadian_Aboriginal\0"
127      "Carian\0"
128      "Cc\0"
129      "Cf\0"
130      "Cham\0"
131      "Cherokee\0"
132      "Cn\0"
133      "Co\0"
134      "Common\0"
135      "Coptic\0"
136      "Cs\0"
137      "Cuneiform\0"
138      "Cypriot\0"
139      "Cyrillic\0"
140      "Deseret\0"
141      "Devanagari\0"
142      "Ethiopic\0"
143      "Georgian\0"
144      "Glagolitic\0"
145      "Gothic\0"
146      "Greek\0"
147      "Gujarati\0"
148      "Gurmukhi\0"
149      "Han\0"
150      "Hangul\0"
151      "Hanunoo\0"
152      "Hebrew\0"
153      "Hiragana\0"
154      "Inherited\0"
155      "Kannada\0"
156      "Katakana\0"
157      "Kayah_Li\0"
158      "Kharoshthi\0"
159      "Khmer\0"
160      "L\0"
161      "L&\0"
162      "Lao\0"
163      "Latin\0"
164      "Lepcha\0"
165      "Limbu\0"
166      "Linear_B\0"
167      "Ll\0"
168      "Lm\0"
169      "Lo\0"
170      "Lt\0"
171      "Lu\0"
172      "Lycian\0"
173      "Lydian\0"
174      "M\0"
175      "Malayalam\0"
176      "Mc\0"
177      "Me\0"
178      "Mn\0"
179      "Mongolian\0"
180      "Myanmar\0"
181      "N\0"
182      "Nd\0"
183      "New_Tai_Lue\0"
184      "Nko\0"
185      "Nl\0"
186      "No\0"
187      "Ogham\0"
188      "Ol_Chiki\0"
189      "Old_Italic\0"
190      "Old_Persian\0"
191      "Oriya\0"
192      "Osmanya\0"
193      "P\0"
194      "Pc\0"
195      "Pd\0"
196      "Pe\0"
197      "Pf\0"
198      "Phags_Pa\0"
199      "Phoenician\0"
200      "Pi\0"
201      "Po\0"
202      "Ps\0"
203      "Rejang\0"
204      "Runic\0"
205      "S\0"
206      "Saurashtra\0"
207      "Sc\0"
208      "Shavian\0"
209      "Sinhala\0"
210      "Sk\0"
211      "Sm\0"
212      "So\0"
213      "Sundanese\0"
214      "Syloti_Nagri\0"
215      "Syriac\0"
216      "Tagalog\0"
217      "Tagbanwa\0"
218      "Tai_Le\0"
219      "Tamil\0"
220      "Telugu\0"
221      "Thaana\0"
222      "Thai\0"
223      "Tibetan\0"
224      "Tifinagh\0"
225      "Ugaritic\0"
226      "Vai\0"
227      "Yi\0"
228      "Z\0"
229      "Zl\0"
230      "Zp\0"
231      "Zs\0";
232    
233  const ucp_type_table _pcre_utt[] = {  const ucp_type_table _pcre_utt[] = {
234    { "Any",                 PT_ANY,  0 },    {   0, PT_ANY, 0 },
235    { "Arabic",              PT_SC,   ucp_Arabic },    {   4, PT_SC, ucp_Arabic },
236    { "Armenian",            PT_SC,   ucp_Armenian },    {  11, PT_SC, ucp_Armenian },
237    { "Balinese",            PT_SC,   ucp_Balinese },    {  20, PT_SC, ucp_Balinese },
238    { "Bengali",             PT_SC,   ucp_Bengali },    {  29, PT_SC, ucp_Bengali },
239    { "Bopomofo",            PT_SC,   ucp_Bopomofo },    {  37, PT_SC, ucp_Bopomofo },
240    { "Braille",             PT_SC,   ucp_Braille },    {  46, PT_SC, ucp_Braille },
241    { "Buginese",            PT_SC,   ucp_Buginese },    {  54, PT_SC, ucp_Buginese },
242    { "Buhid",               PT_SC,   ucp_Buhid },    {  63, PT_SC, ucp_Buhid },
243    { "C",                   PT_GC,   ucp_C },    {  69, PT_GC, ucp_C },
244    { "Canadian_Aboriginal", PT_SC,   ucp_Canadian_Aboriginal },    {  71, PT_SC, ucp_Canadian_Aboriginal },
245    { "Cc",                  PT_PC,   ucp_Cc },    {  91, PT_SC, ucp_Carian },
246    { "Cf",                  PT_PC,   ucp_Cf },    {  98, PT_PC, ucp_Cc },
247    { "Cherokee",            PT_SC,   ucp_Cherokee },    { 101, PT_PC, ucp_Cf },
248    { "Cn",                  PT_PC,   ucp_Cn },    { 104, PT_SC, ucp_Cham },
249    { "Co",                  PT_PC,   ucp_Co },    { 109, PT_SC, ucp_Cherokee },
250    { "Common",              PT_SC,   ucp_Common },    { 118, PT_PC, ucp_Cn },
251    { "Coptic",              PT_SC,   ucp_Coptic },    { 121, PT_PC, ucp_Co },
252    { "Cs",                  PT_PC,   ucp_Cs },    { 124, PT_SC, ucp_Common },
253    { "Cuneiform",           PT_SC,   ucp_Cuneiform },    { 131, PT_SC, ucp_Coptic },
254    { "Cypriot",             PT_SC,   ucp_Cypriot },    { 138, PT_PC, ucp_Cs },
255    { "Cyrillic",            PT_SC,   ucp_Cyrillic },    { 141, PT_SC, ucp_Cuneiform },
256    { "Deseret",             PT_SC,   ucp_Deseret },    { 151, PT_SC, ucp_Cypriot },
257    { "Devanagari",          PT_SC,   ucp_Devanagari },    { 159, PT_SC, ucp_Cyrillic },
258    { "Ethiopic",            PT_SC,   ucp_Ethiopic },    { 168, PT_SC, ucp_Deseret },
259    { "Georgian",            PT_SC,   ucp_Georgian },    { 176, PT_SC, ucp_Devanagari },
260    { "Glagolitic",          PT_SC,   ucp_Glagolitic },    { 187, PT_SC, ucp_Ethiopic },
261    { "Gothic",              PT_SC,   ucp_Gothic },    { 196, PT_SC, ucp_Georgian },
262    { "Greek",               PT_SC,   ucp_Greek },    { 205, PT_SC, ucp_Glagolitic },
263    { "Gujarati",            PT_SC,   ucp_Gujarati },    { 216, PT_SC, ucp_Gothic },
264    { "Gurmukhi",            PT_SC,   ucp_Gurmukhi },    { 223, PT_SC, ucp_Greek },
265    { "Han",                 PT_SC,   ucp_Han },    { 229, PT_SC, ucp_Gujarati },
266    { "Hangul",              PT_SC,   ucp_Hangul },    { 238, PT_SC, ucp_Gurmukhi },
267    { "Hanunoo",             PT_SC,   ucp_Hanunoo },    { 247, PT_SC, ucp_Han },
268    { "Hebrew",              PT_SC,   ucp_Hebrew },    { 251, PT_SC, ucp_Hangul },
269    { "Hiragana",            PT_SC,   ucp_Hiragana },    { 258, PT_SC, ucp_Hanunoo },
270    { "Inherited",           PT_SC,   ucp_Inherited },    { 266, PT_SC, ucp_Hebrew },
271    { "Kannada",             PT_SC,   ucp_Kannada },    { 273, PT_SC, ucp_Hiragana },
272    { "Katakana",            PT_SC,   ucp_Katakana },    { 282, PT_SC, ucp_Inherited },
273    { "Kharoshthi",          PT_SC,   ucp_Kharoshthi },    { 292, PT_SC, ucp_Kannada },
274    { "Khmer",               PT_SC,   ucp_Khmer },    { 300, PT_SC, ucp_Katakana },
275    { "L",                   PT_GC,   ucp_L },    { 309, PT_SC, ucp_Kayah_Li },
276    { "L&",                  PT_LAMP, 0 },    { 318, PT_SC, ucp_Kharoshthi },
277    { "Lao",                 PT_SC,   ucp_Lao },    { 329, PT_SC, ucp_Khmer },
278    { "Latin",               PT_SC,   ucp_Latin },    { 335, PT_GC, ucp_L },
279    { "Limbu",               PT_SC,   ucp_Limbu },    { 337, PT_LAMP, 0 },
280    { "Linear_B",            PT_SC,   ucp_Linear_B },    { 340, PT_SC, ucp_Lao },
281    { "Ll",                  PT_PC,   ucp_Ll },    { 344, PT_SC, ucp_Latin },
282    { "Lm",                  PT_PC,   ucp_Lm },    { 350, PT_SC, ucp_Lepcha },
283    { "Lo",                  PT_PC,   ucp_Lo },    { 357, PT_SC, ucp_Limbu },
284    { "Lt",                  PT_PC,   ucp_Lt },    { 363, PT_SC, ucp_Linear_B },
285    { "Lu",                  PT_PC,   ucp_Lu },    { 372, PT_PC, ucp_Ll },
286    { "M",                   PT_GC,   ucp_M },    { 375, PT_PC, ucp_Lm },
287    { "Malayalam",           PT_SC,   ucp_Malayalam },    { 378, PT_PC, ucp_Lo },
288    { "Mc",                  PT_PC,   ucp_Mc },    { 381, PT_PC, ucp_Lt },
289    { "Me",                  PT_PC,   ucp_Me },    { 384, PT_PC, ucp_Lu },
290    { "Mn",                  PT_PC,   ucp_Mn },    { 387, PT_SC, ucp_Lycian },
291    { "Mongolian",           PT_SC,   ucp_Mongolian },    { 394, PT_SC, ucp_Lydian },
292    { "Myanmar",             PT_SC,   ucp_Myanmar },    { 401, PT_GC, ucp_M },
293    { "N",                   PT_GC,   ucp_N },    { 403, PT_SC, ucp_Malayalam },
294    { "Nd",                  PT_PC,   ucp_Nd },    { 413, PT_PC, ucp_Mc },
295    { "New_Tai_Lue",         PT_SC,   ucp_New_Tai_Lue },    { 416, PT_PC, ucp_Me },
296    { "Nko",                 PT_SC,   ucp_Nko },    { 419, PT_PC, ucp_Mn },
297    { "Nl",                  PT_PC,   ucp_Nl },    { 422, PT_SC, ucp_Mongolian },
298    { "No",                  PT_PC,   ucp_No },    { 432, PT_SC, ucp_Myanmar },
299    { "Ogham",               PT_SC,   ucp_Ogham },    { 440, PT_GC, ucp_N },
300    { "Old_Italic",          PT_SC,   ucp_Old_Italic },    { 442, PT_PC, ucp_Nd },
301    { "Old_Persian",         PT_SC,   ucp_Old_Persian },    { 445, PT_SC, ucp_New_Tai_Lue },
302    { "Oriya",               PT_SC,   ucp_Oriya },    { 457, PT_SC, ucp_Nko },
303    { "Osmanya",             PT_SC,   ucp_Osmanya },    { 461, PT_PC, ucp_Nl },
304    { "P",                   PT_GC,   ucp_P },    { 464, PT_PC, ucp_No },
305    { "Pc",                  PT_PC,   ucp_Pc },    { 467, PT_SC, ucp_Ogham },
306    { "Pd",                  PT_PC,   ucp_Pd },    { 473, PT_SC, ucp_Ol_Chiki },
307    { "Pe",                  PT_PC,   ucp_Pe },    { 482, PT_SC, ucp_Old_Italic },
308    { "Pf",                  PT_PC,   ucp_Pf },    { 493, PT_SC, ucp_Old_Persian },
309    { "Phags_Pa",            PT_SC,   ucp_Phags_Pa },    { 505, PT_SC, ucp_Oriya },
310    { "Phoenician",          PT_SC,   ucp_Phoenician },    { 511, PT_SC, ucp_Osmanya },
311    { "Pi",                  PT_PC,   ucp_Pi },    { 519, PT_GC, ucp_P },
312    { "Po",                  PT_PC,   ucp_Po },    { 521, PT_PC, ucp_Pc },
313    { "Ps",                  PT_PC,   ucp_Ps },    { 524, PT_PC, ucp_Pd },
314    { "Runic",               PT_SC,   ucp_Runic },    { 527, PT_PC, ucp_Pe },
315    { "S",                   PT_GC,   ucp_S },    { 530, PT_PC, ucp_Pf },
316    { "Sc",                  PT_PC,   ucp_Sc },    { 533, PT_SC, ucp_Phags_Pa },
317    { "Shavian",             PT_SC,   ucp_Shavian },    { 542, PT_SC, ucp_Phoenician },
318    { "Sinhala",             PT_SC,   ucp_Sinhala },    { 553, PT_PC, ucp_Pi },
319    { "Sk",                  PT_PC,   ucp_Sk },    { 556, PT_PC, ucp_Po },
320    { "Sm",                  PT_PC,   ucp_Sm },    { 559, PT_PC, ucp_Ps },
321    { "So",                  PT_PC,   ucp_So },    { 562, PT_SC, ucp_Rejang },
322    { "Syloti_Nagri",        PT_SC,   ucp_Syloti_Nagri },    { 569, PT_SC, ucp_Runic },
323    { "Syriac",              PT_SC,   ucp_Syriac },    { 575, PT_GC, ucp_S },
324    { "Tagalog",             PT_SC,   ucp_Tagalog },    { 577, PT_SC, ucp_Saurashtra },
325    { "Tagbanwa",            PT_SC,   ucp_Tagbanwa },    { 588, PT_PC, ucp_Sc },
326    { "Tai_Le",              PT_SC,   ucp_Tai_Le },    { 591, PT_SC, ucp_Shavian },
327    { "Tamil",               PT_SC,   ucp_Tamil },    { 599, PT_SC, ucp_Sinhala },
328    { "Telugu",              PT_SC,   ucp_Telugu },    { 607, PT_PC, ucp_Sk },
329    { "Thaana",              PT_SC,   ucp_Thaana },    { 610, PT_PC, ucp_Sm },
330    { "Thai",                PT_SC,   ucp_Thai },    { 613, PT_PC, ucp_So },
331    { "Tibetan",             PT_SC,   ucp_Tibetan },    { 616, PT_SC, ucp_Sundanese },
332    { "Tifinagh",            PT_SC,   ucp_Tifinagh },    { 626, PT_SC, ucp_Syloti_Nagri },
333    { "Ugaritic",            PT_SC,   ucp_Ugaritic },    { 639, PT_SC, ucp_Syriac },
334    { "Yi",                  PT_SC,   ucp_Yi },    { 646, PT_SC, ucp_Tagalog },
335    { "Z",                   PT_GC,   ucp_Z },    { 654, PT_SC, ucp_Tagbanwa },
336    { "Zl",                  PT_PC,   ucp_Zl },    { 663, PT_SC, ucp_Tai_Le },
337    { "Zp",                  PT_PC,   ucp_Zp },    { 670, PT_SC, ucp_Tamil },
338    { "Zs",                  PT_PC,   ucp_Zs }    { 676, PT_SC, ucp_Telugu },
339      { 683, PT_SC, ucp_Thaana },
340      { 690, PT_SC, ucp_Thai },
341      { 695, PT_SC, ucp_Tibetan },
342      { 703, PT_SC, ucp_Tifinagh },
343      { 712, PT_SC, ucp_Ugaritic },
344      { 721, PT_SC, ucp_Vai },
345      { 725, PT_SC, ucp_Yi },
346      { 728, PT_GC, ucp_Z },
347      { 730, PT_PC, ucp_Zl },
348      { 733, PT_PC, ucp_Zp },
349      { 736, PT_PC, ucp_Zs }
350  };  };
351    
352  const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);  const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);

Legend:
Removed from v.200  
changed lines
  Added in v.351

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12