/[pcre]/code/trunk/pcre_tables.c
ViewVC logotype

Diff of /code/trunk/pcre_tables.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC revision 305 by ph10, Sun Jan 20 20:07:32 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 44  uses macros to change their names from _ Line 44  uses macros to change their names from _
44  clashes with the library. */  clashes with the library. */
45    
46    
47    #ifdef HAVE_CONFIG_H
48    #include "config.h"
49    #endif
50    
51  #include "pcre_internal.h"  #include "pcre_internal.h"
52    
53    
# Line 61  const uschar _pcre_OP_lengths[] = { OP_L Line 65  const uschar _pcre_OP_lengths[] = { OP_L
65  /* These are the breakpoints for different numbers of bytes in a UTF-8  /* These are the breakpoints for different numbers of bytes in a UTF-8
66  character. */  character. */
67    
68    #ifdef SUPPORT_UTF8
69    
70  const int _pcre_utf8_table1[] =  const int _pcre_utf8_table1[] =
71    { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};    { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
72    
# Line 81  const uschar _pcre_utf8_table4[] = { Line 87  const uschar _pcre_utf8_table4[] = {
87    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
88    3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };    3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
89    
90  /* This table translates Unicode property names into type and code values. It  /* The pcre_utt[] table below translates Unicode property names into type and
91  is searched by binary chop, so must be in collating sequence of name. */  code values. It is searched by binary chop, so must be in collating sequence of
92    name. Originally, the table contained pointers to the name strings in the first
93    field of each entry. However, that leads to a large number of relocations when
94    a shared library is dynamically loaded. A significant reduction is made by
95    putting all the names into a single, large string and then using offsets in the
96    table itself. Maintenance is more error-prone, but frequent changes to this
97    data is unlikely. */
98    
99    const char _pcre_utt_names[] =
100      "Any\0"
101      "Arabic\0"
102      "Armenian\0"
103      "Balinese\0"
104      "Bengali\0"
105      "Bopomofo\0"
106      "Braille\0"
107      "Buginese\0"
108      "Buhid\0"
109      "C\0"
110      "Canadian_Aboriginal\0"
111      "Cc\0"
112      "Cf\0"
113      "Cherokee\0"
114      "Cn\0"
115      "Co\0"
116      "Common\0"
117      "Coptic\0"
118      "Cs\0"
119      "Cuneiform\0"
120      "Cypriot\0"
121      "Cyrillic\0"
122      "Deseret\0"
123      "Devanagari\0"
124      "Ethiopic\0"
125      "Georgian\0"
126      "Glagolitic\0"
127      "Gothic\0"
128      "Greek\0"
129      "Gujarati\0"
130      "Gurmukhi\0"
131      "Han\0"
132      "Hangul\0"
133      "Hanunoo\0"
134      "Hebrew\0"
135      "Hiragana\0"
136      "Inherited\0"
137      "Kannada\0"
138      "Katakana\0"
139      "Kharoshthi\0"
140      "Khmer\0"
141      "L\0"
142      "L&\0"
143      "Lao\0"
144      "Latin\0"
145      "Limbu\0"
146      "Linear_B\0"
147      "Ll\0"
148      "Lm\0"
149      "Lo\0"
150      "Lt\0"
151      "Lu\0"
152      "M\0"
153      "Malayalam\0"
154      "Mc\0"
155      "Me\0"
156      "Mn\0"
157      "Mongolian\0"
158      "Myanmar\0"
159      "N\0"
160      "Nd\0"
161      "New_Tai_Lue\0"
162      "Nko\0"
163      "Nl\0"
164      "No\0"
165      "Ogham\0"
166      "Old_Italic\0"
167      "Old_Persian\0"
168      "Oriya\0"
169      "Osmanya\0"
170      "P\0"
171      "Pc\0"
172      "Pd\0"
173      "Pe\0"
174      "Pf\0"
175      "Phags_Pa\0"
176      "Phoenician\0"
177      "Pi\0"
178      "Po\0"
179      "Ps\0"
180      "Runic\0"
181      "S\0"
182      "Sc\0"
183      "Shavian\0"
184      "Sinhala\0"
185      "Sk\0"
186      "Sm\0"
187      "So\0"
188      "Syloti_Nagri\0"
189      "Syriac\0"
190      "Tagalog\0"
191      "Tagbanwa\0"
192      "Tai_Le\0"
193      "Tamil\0"
194      "Telugu\0"
195      "Thaana\0"
196      "Thai\0"
197      "Tibetan\0"
198      "Tifinagh\0"
199      "Ugaritic\0"
200      "Yi\0"
201      "Z\0"
202      "Zl\0"
203      "Zp\0"
204      "Zs\0";
205    
206  const ucp_type_table _pcre_utt[] = {  const ucp_type_table _pcre_utt[] = {
207    { "Any",                 PT_ANY,  0 },    { 0,   PT_ANY, 0 },
208    { "Arabic",              PT_SC,   ucp_Arabic },    { 4,   PT_SC, ucp_Arabic },
209    { "Armenian",            PT_SC,   ucp_Armenian },    { 11,  PT_SC, ucp_Armenian },
210    { "Balinese",            PT_SC,   ucp_Balinese },    { 20,  PT_SC, ucp_Balinese },
211    { "Bengali",             PT_SC,   ucp_Bengali },    { 29,  PT_SC, ucp_Bengali },
212    { "Bopomofo",            PT_SC,   ucp_Bopomofo },    { 37,  PT_SC, ucp_Bopomofo },
213    { "Braille",             PT_SC,   ucp_Braille },    { 46,  PT_SC, ucp_Braille },
214    { "Buginese",            PT_SC,   ucp_Buginese },    { 54,  PT_SC, ucp_Buginese },
215    { "Buhid",               PT_SC,   ucp_Buhid },    { 63,  PT_SC, ucp_Buhid },
216    { "C",                   PT_GC,   ucp_C },    { 69,  PT_GC, ucp_C },
217    { "Canadian_Aboriginal", PT_SC,   ucp_Canadian_Aboriginal },    { 71,  PT_SC, ucp_Canadian_Aboriginal },
218    { "Cc",                  PT_PC,   ucp_Cc },    { 91,  PT_PC, ucp_Cc },
219    { "Cf",                  PT_PC,   ucp_Cf },    { 94,  PT_PC, ucp_Cf },
220    { "Cherokee",            PT_SC,   ucp_Cherokee },    { 97,  PT_SC, ucp_Cherokee },
221    { "Cn",                  PT_PC,   ucp_Cn },    { 106, PT_PC, ucp_Cn },
222    { "Co",                  PT_PC,   ucp_Co },    { 109, PT_PC, ucp_Co },
223    { "Common",              PT_SC,   ucp_Common },    { 112, PT_SC, ucp_Common },
224    { "Coptic",              PT_SC,   ucp_Coptic },    { 119, PT_SC, ucp_Coptic },
225    { "Cs",                  PT_PC,   ucp_Cs },    { 126, PT_PC, ucp_Cs },
226    { "Cuneiform",           PT_SC,   ucp_Cuneiform },    { 129, PT_SC, ucp_Cuneiform },
227    { "Cypriot",             PT_SC,   ucp_Cypriot },    { 139, PT_SC, ucp_Cypriot },
228    { "Cyrillic",            PT_SC,   ucp_Cyrillic },    { 147, PT_SC, ucp_Cyrillic },
229    { "Deseret",             PT_SC,   ucp_Deseret },    { 156, PT_SC, ucp_Deseret },
230    { "Devanagari",          PT_SC,   ucp_Devanagari },    { 164, PT_SC, ucp_Devanagari },
231    { "Ethiopic",            PT_SC,   ucp_Ethiopic },    { 175, PT_SC, ucp_Ethiopic },
232    { "Georgian",            PT_SC,   ucp_Georgian },    { 184, PT_SC, ucp_Georgian },
233    { "Glagolitic",          PT_SC,   ucp_Glagolitic },    { 193, PT_SC, ucp_Glagolitic },
234    { "Gothic",              PT_SC,   ucp_Gothic },    { 204, PT_SC, ucp_Gothic },
235    { "Greek",               PT_SC,   ucp_Greek },    { 211, PT_SC, ucp_Greek },
236    { "Gujarati",            PT_SC,   ucp_Gujarati },    { 217, PT_SC, ucp_Gujarati },
237    { "Gurmukhi",            PT_SC,   ucp_Gurmukhi },    { 226, PT_SC, ucp_Gurmukhi },
238    { "Han",                 PT_SC,   ucp_Han },    { 235, PT_SC, ucp_Han },
239    { "Hangul",              PT_SC,   ucp_Hangul },    { 239, PT_SC, ucp_Hangul },
240    { "Hanunoo",             PT_SC,   ucp_Hanunoo },    { 246, PT_SC, ucp_Hanunoo },
241    { "Hebrew",              PT_SC,   ucp_Hebrew },    { 254, PT_SC, ucp_Hebrew },
242    { "Hiragana",            PT_SC,   ucp_Hiragana },    { 261, PT_SC, ucp_Hiragana },
243    { "Inherited",           PT_SC,   ucp_Inherited },    { 270, PT_SC, ucp_Inherited },
244    { "Kannada",             PT_SC,   ucp_Kannada },    { 280, PT_SC, ucp_Kannada },
245    { "Katakana",            PT_SC,   ucp_Katakana },    { 288, PT_SC, ucp_Katakana },
246    { "Kharoshthi",          PT_SC,   ucp_Kharoshthi },    { 297, PT_SC, ucp_Kharoshthi },
247    { "Khmer",               PT_SC,   ucp_Khmer },    { 308, PT_SC, ucp_Khmer },
248    { "L",                   PT_GC,   ucp_L },    { 314, PT_GC, ucp_L },
249    { "L&",                  PT_LAMP, 0 },    { 316, PT_LAMP, 0 },
250    { "Lao",                 PT_SC,   ucp_Lao },    { 319, PT_SC, ucp_Lao },
251    { "Latin",               PT_SC,   ucp_Latin },    { 323, PT_SC, ucp_Latin },
252    { "Limbu",               PT_SC,   ucp_Limbu },    { 329, PT_SC, ucp_Limbu },
253    { "Linear_B",            PT_SC,   ucp_Linear_B },    { 335, PT_SC, ucp_Linear_B },
254    { "Ll",                  PT_PC,   ucp_Ll },    { 344, PT_PC, ucp_Ll },
255    { "Lm",                  PT_PC,   ucp_Lm },    { 347, PT_PC, ucp_Lm },
256    { "Lo",                  PT_PC,   ucp_Lo },    { 350, PT_PC, ucp_Lo },
257    { "Lt",                  PT_PC,   ucp_Lt },    { 353, PT_PC, ucp_Lt },
258    { "Lu",                  PT_PC,   ucp_Lu },    { 356, PT_PC, ucp_Lu },
259    { "M",                   PT_GC,   ucp_M },    { 359, PT_GC, ucp_M },
260    { "Malayalam",           PT_SC,   ucp_Malayalam },    { 361, PT_SC, ucp_Malayalam },
261    { "Mc",                  PT_PC,   ucp_Mc },    { 371, PT_PC, ucp_Mc },
262    { "Me",                  PT_PC,   ucp_Me },    { 374, PT_PC, ucp_Me },
263    { "Mn",                  PT_PC,   ucp_Mn },    { 377, PT_PC, ucp_Mn },
264    { "Mongolian",           PT_SC,   ucp_Mongolian },    { 380, PT_SC, ucp_Mongolian },
265    { "Myanmar",             PT_SC,   ucp_Myanmar },    { 390, PT_SC, ucp_Myanmar },
266    { "N",                   PT_GC,   ucp_N },    { 398, PT_GC, ucp_N },
267    { "Nd",                  PT_PC,   ucp_Nd },    { 400, PT_PC, ucp_Nd },
268    { "New_Tai_Lue",         PT_SC,   ucp_New_Tai_Lue },    { 403, PT_SC, ucp_New_Tai_Lue },
269    { "Nko",                 PT_SC,   ucp_Nko },    { 415, PT_SC, ucp_Nko },
270    { "Nl",                  PT_PC,   ucp_Nl },    { 419, PT_PC, ucp_Nl },
271    { "No",                  PT_PC,   ucp_No },    { 422, PT_PC, ucp_No },
272    { "Ogham",               PT_SC,   ucp_Ogham },    { 425, PT_SC, ucp_Ogham },
273    { "Old_Italic",          PT_SC,   ucp_Old_Italic },    { 431, PT_SC, ucp_Old_Italic },
274    { "Old_Persian",         PT_SC,   ucp_Old_Persian },    { 442, PT_SC, ucp_Old_Persian },
275    { "Oriya",               PT_SC,   ucp_Oriya },    { 454, PT_SC, ucp_Oriya },
276    { "Osmanya",             PT_SC,   ucp_Osmanya },    { 460, PT_SC, ucp_Osmanya },
277    { "P",                   PT_GC,   ucp_P },    { 468, PT_GC, ucp_P },
278    { "Pc",                  PT_PC,   ucp_Pc },    { 470, PT_PC, ucp_Pc },
279    { "Pd",                  PT_PC,   ucp_Pd },    { 473, PT_PC, ucp_Pd },
280    { "Pe",                  PT_PC,   ucp_Pe },    { 476, PT_PC, ucp_Pe },
281    { "Pf",                  PT_PC,   ucp_Pf },    { 479, PT_PC, ucp_Pf },
282    { "Phags_Pa",            PT_SC,   ucp_Phags_Pa },    { 482, PT_SC, ucp_Phags_Pa },
283    { "Phoenician",          PT_SC,   ucp_Phoenician },    { 491, PT_SC, ucp_Phoenician },
284    { "Pi",                  PT_PC,   ucp_Pi },    { 502, PT_PC, ucp_Pi },
285    { "Po",                  PT_PC,   ucp_Po },    { 505, PT_PC, ucp_Po },
286    { "Ps",                  PT_PC,   ucp_Ps },    { 508, PT_PC, ucp_Ps },
287    { "Runic",               PT_SC,   ucp_Runic },    { 511, PT_SC, ucp_Runic },
288    { "S",                   PT_GC,   ucp_S },    { 517, PT_GC, ucp_S },
289    { "Sc",                  PT_PC,   ucp_Sc },    { 519, PT_PC, ucp_Sc },
290    { "Shavian",             PT_SC,   ucp_Shavian },    { 522, PT_SC, ucp_Shavian },
291    { "Sinhala",             PT_SC,   ucp_Sinhala },    { 530, PT_SC, ucp_Sinhala },
292    { "Sk",                  PT_PC,   ucp_Sk },    { 538, PT_PC, ucp_Sk },
293    { "Sm",                  PT_PC,   ucp_Sm },    { 541, PT_PC, ucp_Sm },
294    { "So",                  PT_PC,   ucp_So },    { 544, PT_PC, ucp_So },
295    { "Syloti_Nagri",        PT_SC,   ucp_Syloti_Nagri },    { 547, PT_SC, ucp_Syloti_Nagri },
296    { "Syriac",              PT_SC,   ucp_Syriac },    { 560, PT_SC, ucp_Syriac },
297    { "Tagalog",             PT_SC,   ucp_Tagalog },    { 567, PT_SC, ucp_Tagalog },
298    { "Tagbanwa",            PT_SC,   ucp_Tagbanwa },    { 575, PT_SC, ucp_Tagbanwa },
299    { "Tai_Le",              PT_SC,   ucp_Tai_Le },    { 584, PT_SC, ucp_Tai_Le },
300    { "Tamil",               PT_SC,   ucp_Tamil },    { 591, PT_SC, ucp_Tamil },
301    { "Telugu",              PT_SC,   ucp_Telugu },    { 597, PT_SC, ucp_Telugu },
302    { "Thaana",              PT_SC,   ucp_Thaana },    { 604, PT_SC, ucp_Thaana },
303    { "Thai",                PT_SC,   ucp_Thai },    { 611, PT_SC, ucp_Thai },
304    { "Tibetan",             PT_SC,   ucp_Tibetan },    { 616, PT_SC, ucp_Tibetan },
305    { "Tifinagh",            PT_SC,   ucp_Tifinagh },    { 624, PT_SC, ucp_Tifinagh },
306    { "Ugaritic",            PT_SC,   ucp_Ugaritic },    { 633, PT_SC, ucp_Ugaritic },
307    { "Yi",                  PT_SC,   ucp_Yi },    { 642, PT_SC, ucp_Yi },
308    { "Z",                   PT_GC,   ucp_Z },    { 645, PT_GC, ucp_Z },
309    { "Zl",                  PT_PC,   ucp_Zl },    { 647, PT_PC, ucp_Zl },
310    { "Zp",                  PT_PC,   ucp_Zp },    { 650, PT_PC, ucp_Zp },
311    { "Zs",                  PT_PC,   ucp_Zs }    { 653, PT_PC, ucp_Zs }
312  };  };
313    
314  const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);  const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
315    
316    #endif  /* SUPPORT_UTF8 */
317    
318  /* End of pcre_tables.c */  /* End of pcre_tables.c */

Legend:
Removed from v.93  
changed lines
  Added in v.305

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12