/[pcre]/code/trunk/ucp.h
ViewVC logotype

Contents of /code/trunk/ucp.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1221 - (hide annotations) (download)
Sun Nov 11 20:27:03 2012 UTC (6 months ago) by ph10
File MIME type: text/plain
File size: 4669 byte(s)
File tidies, preparing for 8.32-RC1.

1 nigel 75 /*************************************************
2 nigel 87 * Unicode Property Table handler *
3 nigel 75 *************************************************/
4    
5 nigel 77 #ifndef _UCP_H
6     #define _UCP_H
7    
8 nigel 87 /* This file contains definitions of the property values that are returned by
9 ph10 387 the UCD access macros. New values that are added for new releases of Unicode
10 ph10 1011 should always be at the end of each enum, for backwards compatibility.
11 nigel 75
12 ph10 1011 IMPORTANT: Note also that the specific numeric values of the enums have to be
13     the same as the values that are generated by the maint/MultiStage2.py script,
14     where the equivalent property descriptive names are listed in vectors. */
15    
16 nigel 87 /* These are the general character categories. */
17    
18 nigel 75 enum {
19     ucp_C, /* Other */
20     ucp_L, /* Letter */
21     ucp_M, /* Mark */
22     ucp_N, /* Number */
23     ucp_P, /* Punctuation */
24     ucp_S, /* Symbol */
25     ucp_Z /* Separator */
26     };
27    
28 ph10 1011 /* These are the particular character categories. */
29 nigel 75
30     enum {
31     ucp_Cc, /* Control */
32     ucp_Cf, /* Format */
33     ucp_Cn, /* Unassigned */
34     ucp_Co, /* Private use */
35     ucp_Cs, /* Surrogate */
36     ucp_Ll, /* Lower case letter */
37     ucp_Lm, /* Modifier letter */
38     ucp_Lo, /* Other letter */
39     ucp_Lt, /* Title case letter */
40     ucp_Lu, /* Upper case letter */
41     ucp_Mc, /* Spacing mark */
42     ucp_Me, /* Enclosing mark */
43     ucp_Mn, /* Non-spacing mark */
44     ucp_Nd, /* Decimal number */
45     ucp_Nl, /* Letter number */
46     ucp_No, /* Other number */
47     ucp_Pc, /* Connector punctuation */
48     ucp_Pd, /* Dash punctuation */
49     ucp_Pe, /* Close punctuation */
50     ucp_Pf, /* Final punctuation */
51     ucp_Pi, /* Initial punctuation */
52     ucp_Po, /* Other punctuation */
53     ucp_Ps, /* Open punctuation */
54     ucp_Sc, /* Currency symbol */
55     ucp_Sk, /* Modifier symbol */
56     ucp_Sm, /* Mathematical symbol */
57     ucp_So, /* Other symbol */
58     ucp_Zl, /* Line separator */
59     ucp_Zp, /* Paragraph separator */
60     ucp_Zs /* Space separator */
61     };
62    
63 ph10 1221 /* These are grapheme break properties. Note that the code for processing them
64     assumes that the values are less than 16. If more values are added that take
65 ph10 1011 the number to 16 or more, the code will have to be rewritten. */
66    
67     enum {
68 chpe 1050 ucp_gbCR, /* 0 */
69     ucp_gbLF, /* 1 */
70     ucp_gbControl, /* 2 */
71     ucp_gbExtend, /* 3 */
72     ucp_gbPrepend, /* 4 */
73     ucp_gbSpacingMark, /* 5 */
74     ucp_gbL, /* 6 Hangul syllable type L */
75     ucp_gbV, /* 7 Hangul syllable type V */
76     ucp_gbT, /* 8 Hangul syllable type T */
77     ucp_gbLV, /* 9 Hangul syllable type LV */
78     ucp_gbLVT, /* 10 Hangul syllable type LVT */
79     ucp_gbRegionalIndicator, /* 11 */
80     ucp_gbOther /* 12 */
81 ph10 1011 };
82    
83 nigel 87 /* These are the script identifications. */
84    
85     enum {
86     ucp_Arabic,
87     ucp_Armenian,
88     ucp_Bengali,
89     ucp_Bopomofo,
90     ucp_Braille,
91     ucp_Buginese,
92     ucp_Buhid,
93     ucp_Canadian_Aboriginal,
94     ucp_Cherokee,
95     ucp_Common,
96     ucp_Coptic,
97     ucp_Cypriot,
98     ucp_Cyrillic,
99     ucp_Deseret,
100     ucp_Devanagari,
101     ucp_Ethiopic,
102     ucp_Georgian,
103     ucp_Glagolitic,
104     ucp_Gothic,
105     ucp_Greek,
106     ucp_Gujarati,
107     ucp_Gurmukhi,
108     ucp_Han,
109     ucp_Hangul,
110     ucp_Hanunoo,
111     ucp_Hebrew,
112     ucp_Hiragana,
113     ucp_Inherited,
114     ucp_Kannada,
115     ucp_Katakana,
116     ucp_Kharoshthi,
117     ucp_Khmer,
118     ucp_Lao,
119     ucp_Latin,
120     ucp_Limbu,
121     ucp_Linear_B,
122     ucp_Malayalam,
123     ucp_Mongolian,
124     ucp_Myanmar,
125     ucp_New_Tai_Lue,
126     ucp_Ogham,
127     ucp_Old_Italic,
128     ucp_Old_Persian,
129     ucp_Oriya,
130     ucp_Osmanya,
131     ucp_Runic,
132     ucp_Shavian,
133     ucp_Sinhala,
134     ucp_Syloti_Nagri,
135     ucp_Syriac,
136     ucp_Tagalog,
137     ucp_Tagbanwa,
138     ucp_Tai_Le,
139     ucp_Tamil,
140     ucp_Telugu,
141     ucp_Thaana,
142     ucp_Thai,
143     ucp_Tibetan,
144     ucp_Tifinagh,
145     ucp_Ugaritic,
146 nigel 93 ucp_Yi,
147 ph10 351 /* New for Unicode 5.0: */
148     ucp_Balinese,
149     ucp_Cuneiform,
150     ucp_Nko,
151     ucp_Phags_Pa,
152     ucp_Phoenician,
153     /* New for Unicode 5.1: */
154     ucp_Carian,
155     ucp_Cham,
156     ucp_Kayah_Li,
157     ucp_Lepcha,
158     ucp_Lycian,
159     ucp_Lydian,
160     ucp_Ol_Chiki,
161     ucp_Rejang,
162     ucp_Saurashtra,
163     ucp_Sundanese,
164 ph10 491 ucp_Vai,
165     /* New for Unicode 5.2: */
166     ucp_Avestan,
167     ucp_Bamum,
168     ucp_Egyptian_Hieroglyphs,
169     ucp_Imperial_Aramaic,
170     ucp_Inscriptional_Pahlavi,
171     ucp_Inscriptional_Parthian,
172     ucp_Javanese,
173     ucp_Kaithi,
174     ucp_Lisu,
175     ucp_Meetei_Mayek,
176     ucp_Old_South_Arabian,
177     ucp_Old_Turkic,
178     ucp_Samaritan,
179     ucp_Tai_Tham,
180 ph10 592 ucp_Tai_Viet,
181     /* New for Unicode 6.0.0: */
182     ucp_Batak,
183     ucp_Brahmi,
184 ph10 942 ucp_Mandaic,
185     /* New for Unicode 6.1.0: */
186     ucp_Chakma,
187     ucp_Meroitic_Cursive,
188     ucp_Meroitic_Hieroglyphs,
189     ucp_Miao,
190     ucp_Sharada,
191     ucp_Sora_Sompeng,
192     ucp_Takri
193 nigel 87 };
194    
195 nigel 77 #endif
196 nigel 75
197     /* End of ucp.h */

Properties

Name Value
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12