/[pcre]/code/trunk/pcre_tables.c
ViewVC logotype

Diff of /code/trunk/pcre_tables.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 592 by ph10, Sat Apr 30 17:37:37 2011 UTC revision 836 by ph10, Wed Dec 28 17:16:11 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 37  POSSIBILITY OF SUCH DAMAGE.
37  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
38  */  */
39    
40    #ifndef PCRE_INCLUDED
41    
42  /* This module contains some fixed tables that are used by more than one of the  /* This module contains some fixed tables that are used by more than one of the
43  PCRE code modules. The tables are also #included by the pcretest program, which  PCRE code modules. The tables are also #included by the pcretest program, which
# Line 50  clashes with the library. */ Line 51  clashes with the library. */
51    
52  #include "pcre_internal.h"  #include "pcre_internal.h"
53    
54    #endif /* PCRE_INCLUDED */
55    
56  /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that  /* Table of sizes for the fixed-length opcodes. It's defined in a macro so that
57  the definition is next to the definition of the opcodes in pcre_internal.h. */  the definition is next to the definition of the opcodes in pcre_internal.h. */
58    
59  const uschar _pcre_OP_lengths[] = { OP_LENGTHS };  const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS };
60    
61    
62    
# Line 65  const uschar _pcre_OP_lengths[] = { OP_L Line 67  const uschar _pcre_OP_lengths[] = { OP_L
67  /* These are the breakpoints for different numbers of bytes in a UTF-8  /* These are the breakpoints for different numbers of bytes in a UTF-8
68  character. */  character. */
69    
70  #ifdef SUPPORT_UTF8  #if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \
71      || (defined PCRE_INCLUDED && defined SUPPORT_PCRE16)
72    
73  const int _pcre_utf8_table1[] =  /* These tables are also required by pcretest in 16 bit mode. */
74    
75    const int PRIV(utf8_table1)[] =
76    { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};    { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
77    
78  const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int);  const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int);
79    
80  /* These are the indicator bits and the mask for the data bits to set in the  /* These are the indicator bits and the mask for the data bits to set in the
81  first byte of a character, indexed by the number of additional bytes. */  first byte of a character, indexed by the number of additional bytes. */
82    
83  const int _pcre_utf8_table2[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  const int PRIV(utf8_table2)[] = { 0,    0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
84  const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
85    
86  /* Table of the number of extra bytes, indexed by the first byte masked with  /* Table of the number of extra bytes, indexed by the first byte masked with
87  0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */  0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */
88    
89  const uschar _pcre_utf8_table4[] = {  const pcre_uint8 PRIV(utf8_table4)[] = {
90    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
91    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
92    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
93    3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };    3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
94    
95    #endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE16)*/
96    
97    #ifdef SUPPORT_UTF
98    
99  /* Table to translate from particular type value to the general value. */  /* Table to translate from particular type value to the general value. */
100    
101  const int _pcre_ucp_gentype[] = {  const int PRIV(ucp_gentype)[] = {
102    ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */    ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
103    ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */    ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
104    ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */    ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
# Line 100  const int _pcre_ucp_gentype[] = { Line 109  const int _pcre_ucp_gentype[] = {
109    ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */    ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
110  };  };
111    
112    #ifdef SUPPORT_JIT
113    /* This table reverses PRIV(ucp_gentype). We can save the cost
114    of a memory load. */
115    
116    const int PRIV(ucp_typerange)[] = {
117      ucp_Cc, ucp_Cs,
118      ucp_Ll, ucp_Lu,
119      ucp_Mc, ucp_Mn,
120      ucp_Nd, ucp_No,
121      ucp_Pc, ucp_Ps,
122      ucp_Sc, ucp_So,
123      ucp_Zl, ucp_Zs,
124    };
125    #endif /* SUPPORT_JIT */
126    
127  /* The pcre_utt[] table below translates Unicode property names into type and  /* The pcre_utt[] table below translates Unicode property names into type and
128  code values. It is searched by binary chop, so must be in collating sequence of  code values. It is searched by binary chop, so must be in collating sequence of
129  name. Originally, the table contained pointers to the name strings in the first  name. Originally, the table contained pointers to the name strings in the first
# Line 256  strings to make sure that UTF-8 support Line 280  strings to make sure that UTF-8 support
280  #define STRING_Zp0 STR_Z STR_p "\0"  #define STRING_Zp0 STR_Z STR_p "\0"
281  #define STRING_Zs0 STR_Z STR_s "\0"  #define STRING_Zs0 STR_Z STR_s "\0"
282    
283  const char _pcre_utt_names[] =  const char PRIV(utt_names)[] =
284    STRING_Any0    STRING_Any0
285    STRING_Arabic0    STRING_Arabic0
286    STRING_Armenian0    STRING_Armenian0
# Line 396  const char _pcre_utt_names[] = Line 420  const char _pcre_utt_names[] =
420    STRING_Zp0    STRING_Zp0
421    STRING_Zs0;    STRING_Zs0;
422    
423  const ucp_type_table _pcre_utt[] = {  const ucp_type_table PRIV(utt)[] = {
424    {   0, PT_ANY, 0 },    {   0, PT_ANY, 0 },
425    {   4, PT_SC, ucp_Arabic },    {   4, PT_SC, ucp_Arabic },
426    {  11, PT_SC, ucp_Armenian },    {  11, PT_SC, ucp_Armenian },
427    {  20, PT_SC, ucp_Avestan },    {  20, PT_SC, ucp_Avestan },
428    {  28, PT_SC, ucp_Balinese },    {  28, PT_SC, ucp_Balinese },
429    {  37, PT_SC, ucp_Bamum },    {  37, PT_SC, ucp_Bamum },
430    {  43, PT_SC, ucp_Batak },    {  43, PT_SC, ucp_Batak },
431    {  49, PT_SC, ucp_Bengali },    {  49, PT_SC, ucp_Bengali },
432    {  57, PT_SC, ucp_Bopomofo },    {  57, PT_SC, ucp_Bopomofo },
433    {  66, PT_SC, ucp_Brahmi },    {  66, PT_SC, ucp_Brahmi },
434    {  73, PT_SC, ucp_Braille },    {  73, PT_SC, ucp_Braille },
435    {  81, PT_SC, ucp_Buginese },    {  81, PT_SC, ucp_Buginese },
436    {  90, PT_SC, ucp_Buhid },    {  90, PT_SC, ucp_Buhid },
437    {  96, PT_GC, ucp_C },    {  96, PT_GC, ucp_C },
438    {  98, PT_SC, ucp_Canadian_Aboriginal },    {  98, PT_SC, ucp_Canadian_Aboriginal },
439    { 118, PT_SC, ucp_Carian },    { 118, PT_SC, ucp_Carian },
440    { 125, PT_PC, ucp_Cc },    { 125, PT_PC, ucp_Cc },
441    { 128, PT_PC, ucp_Cf },    { 128, PT_PC, ucp_Cf },
442    { 131, PT_SC, ucp_Cham },    { 131, PT_SC, ucp_Cham },
443    { 136, PT_SC, ucp_Cherokee },    { 136, PT_SC, ucp_Cherokee },
444    { 145, PT_PC, ucp_Cn },    { 145, PT_PC, ucp_Cn },
445    { 148, PT_PC, ucp_Co },    { 148, PT_PC, ucp_Co },
446    { 151, PT_SC, ucp_Common },    { 151, PT_SC, ucp_Common },
447    { 158, PT_SC, ucp_Coptic },    { 158, PT_SC, ucp_Coptic },
448    { 165, PT_PC, ucp_Cs },    { 165, PT_PC, ucp_Cs },
449    { 168, PT_SC, ucp_Cuneiform },    { 168, PT_SC, ucp_Cuneiform },
450    { 178, PT_SC, ucp_Cypriot },    { 178, PT_SC, ucp_Cypriot },
451    { 186, PT_SC, ucp_Cyrillic },    { 186, PT_SC, ucp_Cyrillic },
452    { 195, PT_SC, ucp_Deseret },    { 195, PT_SC, ucp_Deseret },
453    { 203, PT_SC, ucp_Devanagari },    { 203, PT_SC, ucp_Devanagari },
454    { 214, PT_SC, ucp_Egyptian_Hieroglyphs },    { 214, PT_SC, ucp_Egyptian_Hieroglyphs },
455    { 235, PT_SC, ucp_Ethiopic },    { 235, PT_SC, ucp_Ethiopic },
456    { 244, PT_SC, ucp_Georgian },    { 244, PT_SC, ucp_Georgian },
457    { 253, PT_SC, ucp_Glagolitic },    { 253, PT_SC, ucp_Glagolitic },
458    { 264, PT_SC, ucp_Gothic },    { 264, PT_SC, ucp_Gothic },
459    { 271, PT_SC, ucp_Greek },    { 271, PT_SC, ucp_Greek },
460    { 277, PT_SC, ucp_Gujarati },    { 277, PT_SC, ucp_Gujarati },
461    { 286, PT_SC, ucp_Gurmukhi },    { 286, PT_SC, ucp_Gurmukhi },
462    { 295, PT_SC, ucp_Han },    { 295, PT_SC, ucp_Han },
463    { 299, PT_SC, ucp_Hangul },    { 299, PT_SC, ucp_Hangul },
464    { 306, PT_SC, ucp_Hanunoo },    { 306, PT_SC, ucp_Hanunoo },
465    { 314, PT_SC, ucp_Hebrew },    { 314, PT_SC, ucp_Hebrew },
466    { 321, PT_SC, ucp_Hiragana },    { 321, PT_SC, ucp_Hiragana },
467    { 330, PT_SC, ucp_Imperial_Aramaic },    { 330, PT_SC, ucp_Imperial_Aramaic },
468    { 347, PT_SC, ucp_Inherited },    { 347, PT_SC, ucp_Inherited },
469    { 357, PT_SC, ucp_Inscriptional_Pahlavi },    { 357, PT_SC, ucp_Inscriptional_Pahlavi },
470    { 379, PT_SC, ucp_Inscriptional_Parthian },    { 379, PT_SC, ucp_Inscriptional_Parthian },
471    { 402, PT_SC, ucp_Javanese },    { 402, PT_SC, ucp_Javanese },
472    { 411, PT_SC, ucp_Kaithi },    { 411, PT_SC, ucp_Kaithi },
473    { 418, PT_SC, ucp_Kannada },    { 418, PT_SC, ucp_Kannada },
474    { 426, PT_SC, ucp_Katakana },    { 426, PT_SC, ucp_Katakana },
475    { 435, PT_SC, ucp_Kayah_Li },    { 435, PT_SC, ucp_Kayah_Li },
476    { 444, PT_SC, ucp_Kharoshthi },    { 444, PT_SC, ucp_Kharoshthi },
477    { 455, PT_SC, ucp_Khmer },    { 455, PT_SC, ucp_Khmer },
478    { 461, PT_GC, ucp_L },    { 461, PT_GC, ucp_L },
479    { 463, PT_LAMP, 0 },    { 463, PT_LAMP, 0 },
480    { 466, PT_SC, ucp_Lao },    { 466, PT_SC, ucp_Lao },
481    { 470, PT_SC, ucp_Latin },    { 470, PT_SC, ucp_Latin },
482    { 476, PT_SC, ucp_Lepcha },    { 476, PT_SC, ucp_Lepcha },
483    { 483, PT_SC, ucp_Limbu },    { 483, PT_SC, ucp_Limbu },
484    { 489, PT_SC, ucp_Linear_B },    { 489, PT_SC, ucp_Linear_B },
485    { 498, PT_SC, ucp_Lisu },    { 498, PT_SC, ucp_Lisu },
486    { 503, PT_PC, ucp_Ll },    { 503, PT_PC, ucp_Ll },
487    { 506, PT_PC, ucp_Lm },    { 506, PT_PC, ucp_Lm },
488    { 509, PT_PC, ucp_Lo },    { 509, PT_PC, ucp_Lo },
489    { 512, PT_PC, ucp_Lt },    { 512, PT_PC, ucp_Lt },
490    { 515, PT_PC, ucp_Lu },    { 515, PT_PC, ucp_Lu },
491    { 518, PT_SC, ucp_Lycian },    { 518, PT_SC, ucp_Lycian },
492    { 525, PT_SC, ucp_Lydian },    { 525, PT_SC, ucp_Lydian },
493    { 532, PT_GC, ucp_M },    { 532, PT_GC, ucp_M },
494    { 534, PT_SC, ucp_Malayalam },    { 534, PT_SC, ucp_Malayalam },
495    { 544, PT_SC, ucp_Mandaic },    { 544, PT_SC, ucp_Mandaic },
496    { 552, PT_PC, ucp_Mc },    { 552, PT_PC, ucp_Mc },
497    { 555, PT_PC, ucp_Me },    { 555, PT_PC, ucp_Me },
498    { 558, PT_SC, ucp_Meetei_Mayek },    { 558, PT_SC, ucp_Meetei_Mayek },
499    { 571, PT_PC, ucp_Mn },    { 571, PT_PC, ucp_Mn },
500    { 574, PT_SC, ucp_Mongolian },    { 574, PT_SC, ucp_Mongolian },
501    { 584, PT_SC, ucp_Myanmar },    { 584, PT_SC, ucp_Myanmar },
502    { 592, PT_GC, ucp_N },    { 592, PT_GC, ucp_N },
503    { 594, PT_PC, ucp_Nd },    { 594, PT_PC, ucp_Nd },
504    { 597, PT_SC, ucp_New_Tai_Lue },    { 597, PT_SC, ucp_New_Tai_Lue },
505    { 609, PT_SC, ucp_Nko },    { 609, PT_SC, ucp_Nko },
506    { 613, PT_PC, ucp_Nl },    { 613, PT_PC, ucp_Nl },
507    { 616, PT_PC, ucp_No },    { 616, PT_PC, ucp_No },
508    { 619, PT_SC, ucp_Ogham },    { 619, PT_SC, ucp_Ogham },
509    { 625, PT_SC, ucp_Ol_Chiki },    { 625, PT_SC, ucp_Ol_Chiki },
510    { 634, PT_SC, ucp_Old_Italic },    { 634, PT_SC, ucp_Old_Italic },
511    { 645, PT_SC, ucp_Old_Persian },    { 645, PT_SC, ucp_Old_Persian },
512    { 657, PT_SC, ucp_Old_South_Arabian },    { 657, PT_SC, ucp_Old_South_Arabian },
513    { 675, PT_SC, ucp_Old_Turkic },    { 675, PT_SC, ucp_Old_Turkic },
514    { 686, PT_SC, ucp_Oriya },    { 686, PT_SC, ucp_Oriya },
515    { 692, PT_SC, ucp_Osmanya },    { 692, PT_SC, ucp_Osmanya },
516    { 700, PT_GC, ucp_P },    { 700, PT_GC, ucp_P },
517    { 702, PT_PC, ucp_Pc },    { 702, PT_PC, ucp_Pc },
518    { 705, PT_PC, ucp_Pd },    { 705, PT_PC, ucp_Pd },
519    { 708, PT_PC, ucp_Pe },    { 708, PT_PC, ucp_Pe },
520    { 711, PT_PC, ucp_Pf },    { 711, PT_PC, ucp_Pf },
521    { 714, PT_SC, ucp_Phags_Pa },    { 714, PT_SC, ucp_Phags_Pa },
522    { 723, PT_SC, ucp_Phoenician },    { 723, PT_SC, ucp_Phoenician },
523    { 734, PT_PC, ucp_Pi },    { 734, PT_PC, ucp_Pi },
524    { 737, PT_PC, ucp_Po },    { 737, PT_PC, ucp_Po },
525    { 740, PT_PC, ucp_Ps },    { 740, PT_PC, ucp_Ps },
526    { 743, PT_SC, ucp_Rejang },    { 743, PT_SC, ucp_Rejang },
527    { 750, PT_SC, ucp_Runic },    { 750, PT_SC, ucp_Runic },
528    { 756, PT_GC, ucp_S },    { 756, PT_GC, ucp_S },
529    { 758, PT_SC, ucp_Samaritan },    { 758, PT_SC, ucp_Samaritan },
530    { 768, PT_SC, ucp_Saurashtra },    { 768, PT_SC, ucp_Saurashtra },
531    { 779, PT_PC, ucp_Sc },    { 779, PT_PC, ucp_Sc },
532    { 782, PT_SC, ucp_Shavian },    { 782, PT_SC, ucp_Shavian },
533    { 790, PT_SC, ucp_Sinhala },    { 790, PT_SC, ucp_Sinhala },
534    { 798, PT_PC, ucp_Sk },    { 798, PT_PC, ucp_Sk },
535    { 801, PT_PC, ucp_Sm },    { 801, PT_PC, ucp_Sm },
536    { 804, PT_PC, ucp_So },    { 804, PT_PC, ucp_So },
537    { 807, PT_SC, ucp_Sundanese },    { 807, PT_SC, ucp_Sundanese },
538    { 817, PT_SC, ucp_Syloti_Nagri },    { 817, PT_SC, ucp_Syloti_Nagri },
539    { 830, PT_SC, ucp_Syriac },    { 830, PT_SC, ucp_Syriac },
540    { 837, PT_SC, ucp_Tagalog },    { 837, PT_SC, ucp_Tagalog },
541    { 845, PT_SC, ucp_Tagbanwa },    { 845, PT_SC, ucp_Tagbanwa },
542    { 854, PT_SC, ucp_Tai_Le },    { 854, PT_SC, ucp_Tai_Le },
543    { 861, PT_SC, ucp_Tai_Tham },    { 861, PT_SC, ucp_Tai_Tham },
544    { 870, PT_SC, ucp_Tai_Viet },    { 870, PT_SC, ucp_Tai_Viet },
545    { 879, PT_SC, ucp_Tamil },    { 879, PT_SC, ucp_Tamil },
546    { 885, PT_SC, ucp_Telugu },    { 885, PT_SC, ucp_Telugu },
547    { 892, PT_SC, ucp_Thaana },    { 892, PT_SC, ucp_Thaana },
548    { 899, PT_SC, ucp_Thai },    { 899, PT_SC, ucp_Thai },
549    { 904, PT_SC, ucp_Tibetan },    { 904, PT_SC, ucp_Tibetan },
550    { 912, PT_SC, ucp_Tifinagh },    { 912, PT_SC, ucp_Tifinagh },
551    { 921, PT_SC, ucp_Ugaritic },    { 921, PT_SC, ucp_Ugaritic },
552    { 930, PT_SC, ucp_Vai },    { 930, PT_SC, ucp_Vai },
553    { 934, PT_ALNUM, 0 },    { 934, PT_ALNUM, 0 },
554    { 938, PT_PXSPACE, 0 },    { 938, PT_PXSPACE, 0 },
555    { 942, PT_SPACE, 0 },    { 942, PT_SPACE, 0 },
556    { 946, PT_WORD, 0 },    { 946, PT_WORD, 0 },
557    { 950, PT_SC, ucp_Yi },    { 950, PT_SC, ucp_Yi },
558    { 953, PT_GC, ucp_Z },    { 953, PT_GC, ucp_Z },
559    { 955, PT_PC, ucp_Zl },    { 955, PT_PC, ucp_Zl },
560    { 958, PT_PC, ucp_Zp },    { 958, PT_PC, ucp_Zp },
561    { 961, PT_PC, ucp_Zs }    { 961, PT_PC, ucp_Zs }
562  };  };
563    
564  const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);  const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table);
565    
566  #endif  /* SUPPORT_UTF8 */  #endif /* SUPPORT_UTF */
567    
568  /* End of pcre_tables.c */  /* End of pcre_tables.c */

Legend:
Removed from v.592  
changed lines
  Added in v.836

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12