/[pcre]/code/trunk/pcre_tables.c
ViewVC logotype

Diff of /code/trunk/pcre_tables.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 305 by ph10, Sun Jan 20 20:07:32 2008 UTC revision 351 by ph10, Fri Jul 4 18:27:16 2008 UTC
# Line 87  const uschar _pcre_utf8_table4[] = { Line 87  const uschar _pcre_utf8_table4[] = {
87    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
88    3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };    3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
89    
90    /* Table to translate from particular type value to the general value. */
91    
92    const int _pcre_ucp_gentype[] = {
93      ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
94      ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
95      ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
96      ucp_N, ucp_N, ucp_N,                /* Nd, Nl, No */
97      ucp_P, ucp_P, ucp_P, ucp_P, ucp_P,  /* Pc, Pd, Pe, Pf, Pi */
98      ucp_P, ucp_P,                       /* Ps, Po */
99      ucp_S, ucp_S, ucp_S, ucp_S,         /* Sc, Sk, Sm, So */
100      ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
101    };
102    
103  /* The pcre_utt[] table below translates Unicode property names into type and  /* The pcre_utt[] table below translates Unicode property names into type and
104  code values. It is searched by binary chop, so must be in collating sequence of  code values. It is searched by binary chop, so must be in collating sequence of
105  name. Originally, the table contained pointers to the name strings in the first  name. Originally, the table contained pointers to the name strings in the first
# Line 94  field of each entry. However, that leads Line 107  field of each entry. However, that leads
107  a shared library is dynamically loaded. A significant reduction is made by  a shared library is dynamically loaded. A significant reduction is made by
108  putting all the names into a single, large string and then using offsets in the  putting all the names into a single, large string and then using offsets in the
109  table itself. Maintenance is more error-prone, but frequent changes to this  table itself. Maintenance is more error-prone, but frequent changes to this
110  data is unlikely. */  data are unlikely.
111    
112    July 2008: There is now a script called maint/GenerateUtt.py which can be used
113    to generate this data instead of maintaining it entirely by hand. */
114    
115  const char _pcre_utt_names[] =  const char _pcre_utt_names[] =
116    "Any\0"    "Any\0"
# Line 108  const char _pcre_utt_names[] = Line 124  const char _pcre_utt_names[] =
124    "Buhid\0"    "Buhid\0"
125    "C\0"    "C\0"
126    "Canadian_Aboriginal\0"    "Canadian_Aboriginal\0"
127      "Carian\0"
128    "Cc\0"    "Cc\0"
129    "Cf\0"    "Cf\0"
130      "Cham\0"
131    "Cherokee\0"    "Cherokee\0"
132    "Cn\0"    "Cn\0"
133    "Co\0"    "Co\0"
# Line 136  const char _pcre_utt_names[] = Line 154  const char _pcre_utt_names[] =
154    "Inherited\0"    "Inherited\0"
155    "Kannada\0"    "Kannada\0"
156    "Katakana\0"    "Katakana\0"
157      "Kayah_Li\0"
158    "Kharoshthi\0"    "Kharoshthi\0"
159    "Khmer\0"    "Khmer\0"
160    "L\0"    "L\0"
161    "L&\0"    "L&\0"
162    "Lao\0"    "Lao\0"
163    "Latin\0"    "Latin\0"
164      "Lepcha\0"
165    "Limbu\0"    "Limbu\0"
166    "Linear_B\0"    "Linear_B\0"
167    "Ll\0"    "Ll\0"
# Line 149  const char _pcre_utt_names[] = Line 169  const char _pcre_utt_names[] =
169    "Lo\0"    "Lo\0"
170    "Lt\0"    "Lt\0"
171    "Lu\0"    "Lu\0"
172      "Lycian\0"
173      "Lydian\0"
174    "M\0"    "M\0"
175    "Malayalam\0"    "Malayalam\0"
176    "Mc\0"    "Mc\0"
# Line 163  const char _pcre_utt_names[] = Line 185  const char _pcre_utt_names[] =
185    "Nl\0"    "Nl\0"
186    "No\0"    "No\0"
187    "Ogham\0"    "Ogham\0"
188      "Ol_Chiki\0"
189    "Old_Italic\0"    "Old_Italic\0"
190    "Old_Persian\0"    "Old_Persian\0"
191    "Oriya\0"    "Oriya\0"
# Line 177  const char _pcre_utt_names[] = Line 200  const char _pcre_utt_names[] =
200    "Pi\0"    "Pi\0"
201    "Po\0"    "Po\0"
202    "Ps\0"    "Ps\0"
203      "Rejang\0"
204    "Runic\0"    "Runic\0"
205    "S\0"    "S\0"
206      "Saurashtra\0"
207    "Sc\0"    "Sc\0"
208    "Shavian\0"    "Shavian\0"
209    "Sinhala\0"    "Sinhala\0"
210    "Sk\0"    "Sk\0"
211    "Sm\0"    "Sm\0"
212    "So\0"    "So\0"
213      "Sundanese\0"
214    "Syloti_Nagri\0"    "Syloti_Nagri\0"
215    "Syriac\0"    "Syriac\0"
216    "Tagalog\0"    "Tagalog\0"
# Line 197  const char _pcre_utt_names[] = Line 223  const char _pcre_utt_names[] =
223    "Tibetan\0"    "Tibetan\0"
224    "Tifinagh\0"    "Tifinagh\0"
225    "Ugaritic\0"    "Ugaritic\0"
226      "Vai\0"
227    "Yi\0"    "Yi\0"
228    "Z\0"    "Z\0"
229    "Zl\0"    "Zl\0"
# Line 204  const char _pcre_utt_names[] = Line 231  const char _pcre_utt_names[] =
231    "Zs\0";    "Zs\0";
232    
233  const ucp_type_table _pcre_utt[] = {  const ucp_type_table _pcre_utt[] = {
234    { 0,   PT_ANY, 0 },    {   0, PT_ANY, 0 },
235    { 4,   PT_SC, ucp_Arabic },    {   4, PT_SC, ucp_Arabic },
236    { 11,  PT_SC, ucp_Armenian },    {  11, PT_SC, ucp_Armenian },
237    { 20,  PT_SC, ucp_Balinese },    {  20, PT_SC, ucp_Balinese },
238    { 29,  PT_SC, ucp_Bengali },    {  29, PT_SC, ucp_Bengali },
239    { 37,  PT_SC, ucp_Bopomofo },    {  37, PT_SC, ucp_Bopomofo },
240    { 46,  PT_SC, ucp_Braille },    {  46, PT_SC, ucp_Braille },
241    { 54,  PT_SC, ucp_Buginese },    {  54, PT_SC, ucp_Buginese },
242    { 63,  PT_SC, ucp_Buhid },    {  63, PT_SC, ucp_Buhid },
243    { 69,  PT_GC, ucp_C },    {  69, PT_GC, ucp_C },
244    { 71,  PT_SC, ucp_Canadian_Aboriginal },    {  71, PT_SC, ucp_Canadian_Aboriginal },
245    { 91,  PT_PC, ucp_Cc },    {  91, PT_SC, ucp_Carian },
246    { 94,  PT_PC, ucp_Cf },    {  98, PT_PC, ucp_Cc },
247    { 97,  PT_SC, ucp_Cherokee },    { 101, PT_PC, ucp_Cf },
248    { 106, PT_PC, ucp_Cn },    { 104, PT_SC, ucp_Cham },
249    { 109, PT_PC, ucp_Co },    { 109, PT_SC, ucp_Cherokee },
250    { 112, PT_SC, ucp_Common },    { 118, PT_PC, ucp_Cn },
251    { 119, PT_SC, ucp_Coptic },    { 121, PT_PC, ucp_Co },
252    { 126, PT_PC, ucp_Cs },    { 124, PT_SC, ucp_Common },
253    { 129, PT_SC, ucp_Cuneiform },    { 131, PT_SC, ucp_Coptic },
254    { 139, PT_SC, ucp_Cypriot },    { 138, PT_PC, ucp_Cs },
255    { 147, PT_SC, ucp_Cyrillic },    { 141, PT_SC, ucp_Cuneiform },
256    { 156, PT_SC, ucp_Deseret },    { 151, PT_SC, ucp_Cypriot },
257    { 164, PT_SC, ucp_Devanagari },    { 159, PT_SC, ucp_Cyrillic },
258    { 175, PT_SC, ucp_Ethiopic },    { 168, PT_SC, ucp_Deseret },
259    { 184, PT_SC, ucp_Georgian },    { 176, PT_SC, ucp_Devanagari },
260    { 193, PT_SC, ucp_Glagolitic },    { 187, PT_SC, ucp_Ethiopic },
261    { 204, PT_SC, ucp_Gothic },    { 196, PT_SC, ucp_Georgian },
262    { 211, PT_SC, ucp_Greek },    { 205, PT_SC, ucp_Glagolitic },
263    { 217, PT_SC, ucp_Gujarati },    { 216, PT_SC, ucp_Gothic },
264    { 226, PT_SC, ucp_Gurmukhi },    { 223, PT_SC, ucp_Greek },
265    { 235, PT_SC, ucp_Han },    { 229, PT_SC, ucp_Gujarati },
266    { 239, PT_SC, ucp_Hangul },    { 238, PT_SC, ucp_Gurmukhi },
267    { 246, PT_SC, ucp_Hanunoo },    { 247, PT_SC, ucp_Han },
268    { 254, PT_SC, ucp_Hebrew },    { 251, PT_SC, ucp_Hangul },
269    { 261, PT_SC, ucp_Hiragana },    { 258, PT_SC, ucp_Hanunoo },
270    { 270, PT_SC, ucp_Inherited },    { 266, PT_SC, ucp_Hebrew },
271    { 280, PT_SC, ucp_Kannada },    { 273, PT_SC, ucp_Hiragana },
272    { 288, PT_SC, ucp_Katakana },    { 282, PT_SC, ucp_Inherited },
273    { 297, PT_SC, ucp_Kharoshthi },    { 292, PT_SC, ucp_Kannada },
274    { 308, PT_SC, ucp_Khmer },    { 300, PT_SC, ucp_Katakana },
275    { 314, PT_GC, ucp_L },    { 309, PT_SC, ucp_Kayah_Li },
276    { 316, PT_LAMP, 0 },    { 318, PT_SC, ucp_Kharoshthi },
277    { 319, PT_SC, ucp_Lao },    { 329, PT_SC, ucp_Khmer },
278    { 323, PT_SC, ucp_Latin },    { 335, PT_GC, ucp_L },
279    { 329, PT_SC, ucp_Limbu },    { 337, PT_LAMP, 0 },
280    { 335, PT_SC, ucp_Linear_B },    { 340, PT_SC, ucp_Lao },
281    { 344, PT_PC, ucp_Ll },    { 344, PT_SC, ucp_Latin },
282    { 347, PT_PC, ucp_Lm },    { 350, PT_SC, ucp_Lepcha },
283    { 350, PT_PC, ucp_Lo },    { 357, PT_SC, ucp_Limbu },
284    { 353, PT_PC, ucp_Lt },    { 363, PT_SC, ucp_Linear_B },
285    { 356, PT_PC, ucp_Lu },    { 372, PT_PC, ucp_Ll },
286    { 359, PT_GC, ucp_M },    { 375, PT_PC, ucp_Lm },
287    { 361, PT_SC, ucp_Malayalam },    { 378, PT_PC, ucp_Lo },
288    { 371, PT_PC, ucp_Mc },    { 381, PT_PC, ucp_Lt },
289    { 374, PT_PC, ucp_Me },    { 384, PT_PC, ucp_Lu },
290    { 377, PT_PC, ucp_Mn },    { 387, PT_SC, ucp_Lycian },
291    { 380, PT_SC, ucp_Mongolian },    { 394, PT_SC, ucp_Lydian },
292    { 390, PT_SC, ucp_Myanmar },    { 401, PT_GC, ucp_M },
293    { 398, PT_GC, ucp_N },    { 403, PT_SC, ucp_Malayalam },
294    { 400, PT_PC, ucp_Nd },    { 413, PT_PC, ucp_Mc },
295    { 403, PT_SC, ucp_New_Tai_Lue },    { 416, PT_PC, ucp_Me },
296    { 415, PT_SC, ucp_Nko },    { 419, PT_PC, ucp_Mn },
297    { 419, PT_PC, ucp_Nl },    { 422, PT_SC, ucp_Mongolian },
298    { 422, PT_PC, ucp_No },    { 432, PT_SC, ucp_Myanmar },
299    { 425, PT_SC, ucp_Ogham },    { 440, PT_GC, ucp_N },
300    { 431, PT_SC, ucp_Old_Italic },    { 442, PT_PC, ucp_Nd },
301    { 442, PT_SC, ucp_Old_Persian },    { 445, PT_SC, ucp_New_Tai_Lue },
302    { 454, PT_SC, ucp_Oriya },    { 457, PT_SC, ucp_Nko },
303    { 460, PT_SC, ucp_Osmanya },    { 461, PT_PC, ucp_Nl },
304    { 468, PT_GC, ucp_P },    { 464, PT_PC, ucp_No },
305    { 470, PT_PC, ucp_Pc },    { 467, PT_SC, ucp_Ogham },
306    { 473, PT_PC, ucp_Pd },    { 473, PT_SC, ucp_Ol_Chiki },
307    { 476, PT_PC, ucp_Pe },    { 482, PT_SC, ucp_Old_Italic },
308    { 479, PT_PC, ucp_Pf },    { 493, PT_SC, ucp_Old_Persian },
309    { 482, PT_SC, ucp_Phags_Pa },    { 505, PT_SC, ucp_Oriya },
310    { 491, PT_SC, ucp_Phoenician },    { 511, PT_SC, ucp_Osmanya },
311    { 502, PT_PC, ucp_Pi },    { 519, PT_GC, ucp_P },
312    { 505, PT_PC, ucp_Po },    { 521, PT_PC, ucp_Pc },
313    { 508, PT_PC, ucp_Ps },    { 524, PT_PC, ucp_Pd },
314    { 511, PT_SC, ucp_Runic },    { 527, PT_PC, ucp_Pe },
315    { 517, PT_GC, ucp_S },    { 530, PT_PC, ucp_Pf },
316    { 519, PT_PC, ucp_Sc },    { 533, PT_SC, ucp_Phags_Pa },
317    { 522, PT_SC, ucp_Shavian },    { 542, PT_SC, ucp_Phoenician },
318    { 530, PT_SC, ucp_Sinhala },    { 553, PT_PC, ucp_Pi },
319    { 538, PT_PC, ucp_Sk },    { 556, PT_PC, ucp_Po },
320    { 541, PT_PC, ucp_Sm },    { 559, PT_PC, ucp_Ps },
321    { 544, PT_PC, ucp_So },    { 562, PT_SC, ucp_Rejang },
322    { 547, PT_SC, ucp_Syloti_Nagri },    { 569, PT_SC, ucp_Runic },
323    { 560, PT_SC, ucp_Syriac },    { 575, PT_GC, ucp_S },
324    { 567, PT_SC, ucp_Tagalog },    { 577, PT_SC, ucp_Saurashtra },
325    { 575, PT_SC, ucp_Tagbanwa },    { 588, PT_PC, ucp_Sc },
326    { 584, PT_SC, ucp_Tai_Le },    { 591, PT_SC, ucp_Shavian },
327    { 591, PT_SC, ucp_Tamil },    { 599, PT_SC, ucp_Sinhala },
328    { 597, PT_SC, ucp_Telugu },    { 607, PT_PC, ucp_Sk },
329    { 604, PT_SC, ucp_Thaana },    { 610, PT_PC, ucp_Sm },
330    { 611, PT_SC, ucp_Thai },    { 613, PT_PC, ucp_So },
331    { 616, PT_SC, ucp_Tibetan },    { 616, PT_SC, ucp_Sundanese },
332    { 624, PT_SC, ucp_Tifinagh },    { 626, PT_SC, ucp_Syloti_Nagri },
333    { 633, PT_SC, ucp_Ugaritic },    { 639, PT_SC, ucp_Syriac },
334    { 642, PT_SC, ucp_Yi },    { 646, PT_SC, ucp_Tagalog },
335    { 645, PT_GC, ucp_Z },    { 654, PT_SC, ucp_Tagbanwa },
336    { 647, PT_PC, ucp_Zl },    { 663, PT_SC, ucp_Tai_Le },
337    { 650, PT_PC, ucp_Zp },    { 670, PT_SC, ucp_Tamil },
338    { 653, PT_PC, ucp_Zs }    { 676, PT_SC, ucp_Telugu },
339      { 683, PT_SC, ucp_Thaana },
340      { 690, PT_SC, ucp_Thai },
341      { 695, PT_SC, ucp_Tibetan },
342      { 703, PT_SC, ucp_Tifinagh },
343      { 712, PT_SC, ucp_Ugaritic },
344      { 721, PT_SC, ucp_Vai },
345      { 725, PT_SC, ucp_Yi },
346      { 728, PT_GC, ucp_Z },
347      { 730, PT_PC, ucp_Zl },
348      { 733, PT_PC, ucp_Zp },
349      { 736, PT_PC, ucp_Zs }
350  };  };
351    
352  const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);  const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);

Legend:
Removed from v.305  
changed lines
  Added in v.351

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12