/[pcre]/code/trunk/pcre_tables.c
ViewVC logotype

Diff of /code/trunk/pcre_tables.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 591 by ph10, Wed May 5 10:44:20 2010 UTC revision 592 by ph10, Sat Apr 30 17:37:37 2011 UTC
# Line 110  table itself. Maintenance is more error- Line 110  table itself. Maintenance is more error-
110  data are unlikely.  data are unlikely.
111    
112  July 2008: There is now a script called maint/GenerateUtt.py that can be used  July 2008: There is now a script called maint/GenerateUtt.py that can be used
113  to generate this data instead of maintaining it entirely by hand.  to generate this data automatically instead of maintaining it by hand.
114    
115  The script was updated in March 2009 to generate a new EBCDIC-compliant  The script was updated in March 2009 to generate a new EBCDIC-compliant
116  version. Like all other character and string literals that are compared against  version. Like all other character and string literals that are compared against
# Line 123  strings to make sure that UTF-8 support Line 123  strings to make sure that UTF-8 support
123  #define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"  #define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
124  #define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"  #define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
125  #define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"  #define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
126    #define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0"
127  #define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"  #define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
128  #define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"  #define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
129    #define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0"
130  #define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"  #define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
131  #define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"  #define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"
132  #define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0"  #define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0"
# Line 186  strings to make sure that UTF-8 support Line 188  strings to make sure that UTF-8 support
188  #define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"  #define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
189  #define STRING_M0 STR_M "\0"  #define STRING_M0 STR_M "\0"
190  #define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"  #define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
191    #define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0"
192  #define STRING_Mc0 STR_M STR_c "\0"  #define STRING_Mc0 STR_M STR_c "\0"
193  #define STRING_Me0 STR_M STR_e "\0"  #define STRING_Me0 STR_M STR_e "\0"
194  #define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"  #define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
# Line 253  strings to make sure that UTF-8 support Line 256  strings to make sure that UTF-8 support
256  #define STRING_Zp0 STR_Z STR_p "\0"  #define STRING_Zp0 STR_Z STR_p "\0"
257  #define STRING_Zs0 STR_Z STR_s "\0"  #define STRING_Zs0 STR_Z STR_s "\0"
258    
259  const char _pcre_utt_names[] =  const char _pcre_utt_names[] =
260    STRING_Any0    STRING_Any0
261    STRING_Arabic0    STRING_Arabic0
262    STRING_Armenian0    STRING_Armenian0
263    STRING_Avestan0    STRING_Avestan0
264    STRING_Balinese0    STRING_Balinese0
265    STRING_Bamum0    STRING_Bamum0
266      STRING_Batak0
267    STRING_Bengali0    STRING_Bengali0
268    STRING_Bopomofo0    STRING_Bopomofo0
269      STRING_Brahmi0
270    STRING_Braille0    STRING_Braille0
271    STRING_Buginese0    STRING_Buginese0
272    STRING_Buhid0    STRING_Buhid0
# Line 323  const char _pcre_utt_names[] = Line 328  const char _pcre_utt_names[] =
328    STRING_Lydian0    STRING_Lydian0
329    STRING_M0    STRING_M0
330    STRING_Malayalam0    STRING_Malayalam0
331      STRING_Mandaic0
332    STRING_Mc0    STRING_Mc0
333    STRING_Me0    STRING_Me0
334    STRING_Meetei_Mayek0    STRING_Meetei_Mayek0
# Line 390  const char _pcre_utt_names[] = Line 396  const char _pcre_utt_names[] =
396    STRING_Zp0    STRING_Zp0
397    STRING_Zs0;    STRING_Zs0;
398    
399  const ucp_type_table _pcre_utt[] = {  const ucp_type_table _pcre_utt[] = {
400    {   0, PT_ANY, 0 },    {   0, PT_ANY, 0 },
401    {   4, PT_SC, ucp_Arabic },    {   4, PT_SC, ucp_Arabic },
402    {  11, PT_SC, ucp_Armenian },    {  11, PT_SC, ucp_Armenian },
403    {  20, PT_SC, ucp_Avestan },    {  20, PT_SC, ucp_Avestan },
404    {  28, PT_SC, ucp_Balinese },    {  28, PT_SC, ucp_Balinese },
405    {  37, PT_SC, ucp_Bamum },    {  37, PT_SC, ucp_Bamum },
406    {  43, PT_SC, ucp_Bengali },    {  43, PT_SC, ucp_Batak },
407    {  51, PT_SC, ucp_Bopomofo },    {  49, PT_SC, ucp_Bengali },
408    {  60, PT_SC, ucp_Braille },    {  57, PT_SC, ucp_Bopomofo },
409    {  68, PT_SC, ucp_Buginese },    {  66, PT_SC, ucp_Brahmi },
410    {  77, PT_SC, ucp_Buhid },    {  73, PT_SC, ucp_Braille },
411    {  83, PT_GC, ucp_C },    {  81, PT_SC, ucp_Buginese },
412    {  85, PT_SC, ucp_Canadian_Aboriginal },    {  90, PT_SC, ucp_Buhid },
413    { 105, PT_SC, ucp_Carian },    {  96, PT_GC, ucp_C },
414    { 112, PT_PC, ucp_Cc },    {  98, PT_SC, ucp_Canadian_Aboriginal },
415    { 115, PT_PC, ucp_Cf },    { 118, PT_SC, ucp_Carian },
416    { 118, PT_SC, ucp_Cham },    { 125, PT_PC, ucp_Cc },
417    { 123, PT_SC, ucp_Cherokee },    { 128, PT_PC, ucp_Cf },
418    { 132, PT_PC, ucp_Cn },    { 131, PT_SC, ucp_Cham },
419    { 135, PT_PC, ucp_Co },    { 136, PT_SC, ucp_Cherokee },
420    { 138, PT_SC, ucp_Common },    { 145, PT_PC, ucp_Cn },
421    { 145, PT_SC, ucp_Coptic },    { 148, PT_PC, ucp_Co },
422    { 152, PT_PC, ucp_Cs },    { 151, PT_SC, ucp_Common },
423    { 155, PT_SC, ucp_Cuneiform },    { 158, PT_SC, ucp_Coptic },
424    { 165, PT_SC, ucp_Cypriot },    { 165, PT_PC, ucp_Cs },
425    { 173, PT_SC, ucp_Cyrillic },    { 168, PT_SC, ucp_Cuneiform },
426    { 182, PT_SC, ucp_Deseret },    { 178, PT_SC, ucp_Cypriot },
427    { 190, PT_SC, ucp_Devanagari },    { 186, PT_SC, ucp_Cyrillic },
428    { 201, PT_SC, ucp_Egyptian_Hieroglyphs },    { 195, PT_SC, ucp_Deseret },
429    { 222, PT_SC, ucp_Ethiopic },    { 203, PT_SC, ucp_Devanagari },
430    { 231, PT_SC, ucp_Georgian },    { 214, PT_SC, ucp_Egyptian_Hieroglyphs },
431    { 240, PT_SC, ucp_Glagolitic },    { 235, PT_SC, ucp_Ethiopic },
432    { 251, PT_SC, ucp_Gothic },    { 244, PT_SC, ucp_Georgian },
433    { 258, PT_SC, ucp_Greek },    { 253, PT_SC, ucp_Glagolitic },
434    { 264, PT_SC, ucp_Gujarati },    { 264, PT_SC, ucp_Gothic },
435    { 273, PT_SC, ucp_Gurmukhi },    { 271, PT_SC, ucp_Greek },
436    { 282, PT_SC, ucp_Han },    { 277, PT_SC, ucp_Gujarati },
437    { 286, PT_SC, ucp_Hangul },    { 286, PT_SC, ucp_Gurmukhi },
438    { 293, PT_SC, ucp_Hanunoo },    { 295, PT_SC, ucp_Han },
439    { 301, PT_SC, ucp_Hebrew },    { 299, PT_SC, ucp_Hangul },
440    { 308, PT_SC, ucp_Hiragana },    { 306, PT_SC, ucp_Hanunoo },
441    { 317, PT_SC, ucp_Imperial_Aramaic },    { 314, PT_SC, ucp_Hebrew },
442    { 334, PT_SC, ucp_Inherited },    { 321, PT_SC, ucp_Hiragana },
443    { 344, PT_SC, ucp_Inscriptional_Pahlavi },    { 330, PT_SC, ucp_Imperial_Aramaic },
444    { 366, PT_SC, ucp_Inscriptional_Parthian },    { 347, PT_SC, ucp_Inherited },
445    { 389, PT_SC, ucp_Javanese },    { 357, PT_SC, ucp_Inscriptional_Pahlavi },
446    { 398, PT_SC, ucp_Kaithi },    { 379, PT_SC, ucp_Inscriptional_Parthian },
447    { 405, PT_SC, ucp_Kannada },    { 402, PT_SC, ucp_Javanese },
448    { 413, PT_SC, ucp_Katakana },    { 411, PT_SC, ucp_Kaithi },
449    { 422, PT_SC, ucp_Kayah_Li },    { 418, PT_SC, ucp_Kannada },
450    { 431, PT_SC, ucp_Kharoshthi },    { 426, PT_SC, ucp_Katakana },
451    { 442, PT_SC, ucp_Khmer },    { 435, PT_SC, ucp_Kayah_Li },
452    { 448, PT_GC, ucp_L },    { 444, PT_SC, ucp_Kharoshthi },
453    { 450, PT_LAMP, 0 },    { 455, PT_SC, ucp_Khmer },
454    { 453, PT_SC, ucp_Lao },    { 461, PT_GC, ucp_L },
455    { 457, PT_SC, ucp_Latin },    { 463, PT_LAMP, 0 },
456    { 463, PT_SC, ucp_Lepcha },    { 466, PT_SC, ucp_Lao },
457    { 470, PT_SC, ucp_Limbu },    { 470, PT_SC, ucp_Latin },
458    { 476, PT_SC, ucp_Linear_B },    { 476, PT_SC, ucp_Lepcha },
459    { 485, PT_SC, ucp_Lisu },    { 483, PT_SC, ucp_Limbu },
460    { 490, PT_PC, ucp_Ll },    { 489, PT_SC, ucp_Linear_B },
461    { 493, PT_PC, ucp_Lm },    { 498, PT_SC, ucp_Lisu },
462    { 496, PT_PC, ucp_Lo },    { 503, PT_PC, ucp_Ll },
463    { 499, PT_PC, ucp_Lt },    { 506, PT_PC, ucp_Lm },
464    { 502, PT_PC, ucp_Lu },    { 509, PT_PC, ucp_Lo },
465    { 505, PT_SC, ucp_Lycian },    { 512, PT_PC, ucp_Lt },
466    { 512, PT_SC, ucp_Lydian },    { 515, PT_PC, ucp_Lu },
467    { 519, PT_GC, ucp_M },    { 518, PT_SC, ucp_Lycian },
468    { 521, PT_SC, ucp_Malayalam },    { 525, PT_SC, ucp_Lydian },
469    { 531, PT_PC, ucp_Mc },    { 532, PT_GC, ucp_M },
470    { 534, PT_PC, ucp_Me },    { 534, PT_SC, ucp_Malayalam },
471    { 537, PT_SC, ucp_Meetei_Mayek },    { 544, PT_SC, ucp_Mandaic },
472    { 550, PT_PC, ucp_Mn },    { 552, PT_PC, ucp_Mc },
473    { 553, PT_SC, ucp_Mongolian },    { 555, PT_PC, ucp_Me },
474    { 563, PT_SC, ucp_Myanmar },    { 558, PT_SC, ucp_Meetei_Mayek },
475    { 571, PT_GC, ucp_N },    { 571, PT_PC, ucp_Mn },
476    { 573, PT_PC, ucp_Nd },    { 574, PT_SC, ucp_Mongolian },
477    { 576, PT_SC, ucp_New_Tai_Lue },    { 584, PT_SC, ucp_Myanmar },
478    { 588, PT_SC, ucp_Nko },    { 592, PT_GC, ucp_N },
479    { 592, PT_PC, ucp_Nl },    { 594, PT_PC, ucp_Nd },
480    { 595, PT_PC, ucp_No },    { 597, PT_SC, ucp_New_Tai_Lue },
481    { 598, PT_SC, ucp_Ogham },    { 609, PT_SC, ucp_Nko },
482    { 604, PT_SC, ucp_Ol_Chiki },    { 613, PT_PC, ucp_Nl },
483    { 613, PT_SC, ucp_Old_Italic },    { 616, PT_PC, ucp_No },
484    { 624, PT_SC, ucp_Old_Persian },    { 619, PT_SC, ucp_Ogham },
485    { 636, PT_SC, ucp_Old_South_Arabian },    { 625, PT_SC, ucp_Ol_Chiki },
486    { 654, PT_SC, ucp_Old_Turkic },    { 634, PT_SC, ucp_Old_Italic },
487    { 665, PT_SC, ucp_Oriya },    { 645, PT_SC, ucp_Old_Persian },
488    { 671, PT_SC, ucp_Osmanya },    { 657, PT_SC, ucp_Old_South_Arabian },
489    { 679, PT_GC, ucp_P },    { 675, PT_SC, ucp_Old_Turkic },
490    { 681, PT_PC, ucp_Pc },    { 686, PT_SC, ucp_Oriya },
491    { 684, PT_PC, ucp_Pd },    { 692, PT_SC, ucp_Osmanya },
492    { 687, PT_PC, ucp_Pe },    { 700, PT_GC, ucp_P },
493    { 690, PT_PC, ucp_Pf },    { 702, PT_PC, ucp_Pc },
494    { 693, PT_SC, ucp_Phags_Pa },    { 705, PT_PC, ucp_Pd },
495    { 702, PT_SC, ucp_Phoenician },    { 708, PT_PC, ucp_Pe },
496    { 713, PT_PC, ucp_Pi },    { 711, PT_PC, ucp_Pf },
497    { 716, PT_PC, ucp_Po },    { 714, PT_SC, ucp_Phags_Pa },
498    { 719, PT_PC, ucp_Ps },    { 723, PT_SC, ucp_Phoenician },
499    { 722, PT_SC, ucp_Rejang },    { 734, PT_PC, ucp_Pi },
500    { 729, PT_SC, ucp_Runic },    { 737, PT_PC, ucp_Po },
501    { 735, PT_GC, ucp_S },    { 740, PT_PC, ucp_Ps },
502    { 737, PT_SC, ucp_Samaritan },    { 743, PT_SC, ucp_Rejang },
503    { 747, PT_SC, ucp_Saurashtra },    { 750, PT_SC, ucp_Runic },
504    { 758, PT_PC, ucp_Sc },    { 756, PT_GC, ucp_S },
505    { 761, PT_SC, ucp_Shavian },    { 758, PT_SC, ucp_Samaritan },
506    { 769, PT_SC, ucp_Sinhala },    { 768, PT_SC, ucp_Saurashtra },
507    { 777, PT_PC, ucp_Sk },    { 779, PT_PC, ucp_Sc },
508    { 780, PT_PC, ucp_Sm },    { 782, PT_SC, ucp_Shavian },
509    { 783, PT_PC, ucp_So },    { 790, PT_SC, ucp_Sinhala },
510    { 786, PT_SC, ucp_Sundanese },    { 798, PT_PC, ucp_Sk },
511    { 796, PT_SC, ucp_Syloti_Nagri },    { 801, PT_PC, ucp_Sm },
512    { 809, PT_SC, ucp_Syriac },    { 804, PT_PC, ucp_So },
513    { 816, PT_SC, ucp_Tagalog },    { 807, PT_SC, ucp_Sundanese },
514    { 824, PT_SC, ucp_Tagbanwa },    { 817, PT_SC, ucp_Syloti_Nagri },
515    { 833, PT_SC, ucp_Tai_Le },    { 830, PT_SC, ucp_Syriac },
516    { 840, PT_SC, ucp_Tai_Tham },    { 837, PT_SC, ucp_Tagalog },
517    { 849, PT_SC, ucp_Tai_Viet },    { 845, PT_SC, ucp_Tagbanwa },
518    { 858, PT_SC, ucp_Tamil },    { 854, PT_SC, ucp_Tai_Le },
519    { 864, PT_SC, ucp_Telugu },    { 861, PT_SC, ucp_Tai_Tham },
520    { 871, PT_SC, ucp_Thaana },    { 870, PT_SC, ucp_Tai_Viet },
521    { 878, PT_SC, ucp_Thai },    { 879, PT_SC, ucp_Tamil },
522    { 883, PT_SC, ucp_Tibetan },    { 885, PT_SC, ucp_Telugu },
523    { 891, PT_SC, ucp_Tifinagh },    { 892, PT_SC, ucp_Thaana },
524    { 900, PT_SC, ucp_Ugaritic },    { 899, PT_SC, ucp_Thai },
525    { 909, PT_SC, ucp_Vai },    { 904, PT_SC, ucp_Tibetan },
526    { 913, PT_ALNUM, 0 },    { 912, PT_SC, ucp_Tifinagh },
527    { 917, PT_PXSPACE, 0 },    { 921, PT_SC, ucp_Ugaritic },
528    { 921, PT_SPACE, 0 },    { 930, PT_SC, ucp_Vai },
529    { 925, PT_WORD, 0 },    { 934, PT_ALNUM, 0 },
530    { 929, PT_SC, ucp_Yi },    { 938, PT_PXSPACE, 0 },
531    { 932, PT_GC, ucp_Z },    { 942, PT_SPACE, 0 },
532    { 934, PT_PC, ucp_Zl },    { 946, PT_WORD, 0 },
533    { 937, PT_PC, ucp_Zp },    { 950, PT_SC, ucp_Yi },
534    { 940, PT_PC, ucp_Zs }    { 953, PT_GC, ucp_Z },
535      { 955, PT_PC, ucp_Zl },
536      { 958, PT_PC, ucp_Zp },
537      { 961, PT_PC, ucp_Zs }
538  };  };
539    
540  const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);  const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);

Legend:
Removed from v.591  
changed lines
  Added in v.592

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12