/[pcre]/code/trunk/ucp.h
ViewVC logotype

Contents of /code/trunk/ucp.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1404 - (show annotations) (download)
Tue Nov 19 15:36:57 2013 UTC (4 months, 4 weeks ago) by ph10
File MIME type: text/plain
File size: 4782 byte(s)
Source tidies for 8.34-RC1.

1 /*************************************************
2 * Unicode Property Table handler *
3 *************************************************/
4
5 #ifndef _UCP_H
6 #define _UCP_H
7
8 /* This file contains definitions of the property values that are returned by
9 the UCD access macros. New values that are added for new releases of Unicode
10 should always be at the end of each enum, for backwards compatibility.
11
12 IMPORTANT: Note also that the specific numeric values of the enums have to be
13 the same as the values that are generated by the maint/MultiStage2.py script,
14 where the equivalent property descriptive names are listed in vectors.
15
16 ALSO: The specific values of the first two enums are assumed for the table
17 called catposstab in pcre_compile.c. */
18
19 /* These are the general character categories. */
20
21 enum {
22 ucp_C, /* Other */
23 ucp_L, /* Letter */
24 ucp_M, /* Mark */
25 ucp_N, /* Number */
26 ucp_P, /* Punctuation */
27 ucp_S, /* Symbol */
28 ucp_Z /* Separator */
29 };
30
31 /* These are the particular character categories. */
32
33 enum {
34 ucp_Cc, /* Control */
35 ucp_Cf, /* Format */
36 ucp_Cn, /* Unassigned */
37 ucp_Co, /* Private use */
38 ucp_Cs, /* Surrogate */
39 ucp_Ll, /* Lower case letter */
40 ucp_Lm, /* Modifier letter */
41 ucp_Lo, /* Other letter */
42 ucp_Lt, /* Title case letter */
43 ucp_Lu, /* Upper case letter */
44 ucp_Mc, /* Spacing mark */
45 ucp_Me, /* Enclosing mark */
46 ucp_Mn, /* Non-spacing mark */
47 ucp_Nd, /* Decimal number */
48 ucp_Nl, /* Letter number */
49 ucp_No, /* Other number */
50 ucp_Pc, /* Connector punctuation */
51 ucp_Pd, /* Dash punctuation */
52 ucp_Pe, /* Close punctuation */
53 ucp_Pf, /* Final punctuation */
54 ucp_Pi, /* Initial punctuation */
55 ucp_Po, /* Other punctuation */
56 ucp_Ps, /* Open punctuation */
57 ucp_Sc, /* Currency symbol */
58 ucp_Sk, /* Modifier symbol */
59 ucp_Sm, /* Mathematical symbol */
60 ucp_So, /* Other symbol */
61 ucp_Zl, /* Line separator */
62 ucp_Zp, /* Paragraph separator */
63 ucp_Zs /* Space separator */
64 };
65
66 /* These are grapheme break properties. Note that the code for processing them
67 assumes that the values are less than 16. If more values are added that take
68 the number to 16 or more, the code will have to be rewritten. */
69
70 enum {
71 ucp_gbCR, /* 0 */
72 ucp_gbLF, /* 1 */
73 ucp_gbControl, /* 2 */
74 ucp_gbExtend, /* 3 */
75 ucp_gbPrepend, /* 4 */
76 ucp_gbSpacingMark, /* 5 */
77 ucp_gbL, /* 6 Hangul syllable type L */
78 ucp_gbV, /* 7 Hangul syllable type V */
79 ucp_gbT, /* 8 Hangul syllable type T */
80 ucp_gbLV, /* 9 Hangul syllable type LV */
81 ucp_gbLVT, /* 10 Hangul syllable type LVT */
82 ucp_gbRegionalIndicator, /* 11 */
83 ucp_gbOther /* 12 */
84 };
85
86 /* These are the script identifications. */
87
88 enum {
89 ucp_Arabic,
90 ucp_Armenian,
91 ucp_Bengali,
92 ucp_Bopomofo,
93 ucp_Braille,
94 ucp_Buginese,
95 ucp_Buhid,
96 ucp_Canadian_Aboriginal,
97 ucp_Cherokee,
98 ucp_Common,
99 ucp_Coptic,
100 ucp_Cypriot,
101 ucp_Cyrillic,
102 ucp_Deseret,
103 ucp_Devanagari,
104 ucp_Ethiopic,
105 ucp_Georgian,
106 ucp_Glagolitic,
107 ucp_Gothic,
108 ucp_Greek,
109 ucp_Gujarati,
110 ucp_Gurmukhi,
111 ucp_Han,
112 ucp_Hangul,
113 ucp_Hanunoo,
114 ucp_Hebrew,
115 ucp_Hiragana,
116 ucp_Inherited,
117 ucp_Kannada,
118 ucp_Katakana,
119 ucp_Kharoshthi,
120 ucp_Khmer,
121 ucp_Lao,
122 ucp_Latin,
123 ucp_Limbu,
124 ucp_Linear_B,
125 ucp_Malayalam,
126 ucp_Mongolian,
127 ucp_Myanmar,
128 ucp_New_Tai_Lue,
129 ucp_Ogham,
130 ucp_Old_Italic,
131 ucp_Old_Persian,
132 ucp_Oriya,
133 ucp_Osmanya,
134 ucp_Runic,
135 ucp_Shavian,
136 ucp_Sinhala,
137 ucp_Syloti_Nagri,
138 ucp_Syriac,
139 ucp_Tagalog,
140 ucp_Tagbanwa,
141 ucp_Tai_Le,
142 ucp_Tamil,
143 ucp_Telugu,
144 ucp_Thaana,
145 ucp_Thai,
146 ucp_Tibetan,
147 ucp_Tifinagh,
148 ucp_Ugaritic,
149 ucp_Yi,
150 /* New for Unicode 5.0: */
151 ucp_Balinese,
152 ucp_Cuneiform,
153 ucp_Nko,
154 ucp_Phags_Pa,
155 ucp_Phoenician,
156 /* New for Unicode 5.1: */
157 ucp_Carian,
158 ucp_Cham,
159 ucp_Kayah_Li,
160 ucp_Lepcha,
161 ucp_Lycian,
162 ucp_Lydian,
163 ucp_Ol_Chiki,
164 ucp_Rejang,
165 ucp_Saurashtra,
166 ucp_Sundanese,
167 ucp_Vai,
168 /* New for Unicode 5.2: */
169 ucp_Avestan,
170 ucp_Bamum,
171 ucp_Egyptian_Hieroglyphs,
172 ucp_Imperial_Aramaic,
173 ucp_Inscriptional_Pahlavi,
174 ucp_Inscriptional_Parthian,
175 ucp_Javanese,
176 ucp_Kaithi,
177 ucp_Lisu,
178 ucp_Meetei_Mayek,
179 ucp_Old_South_Arabian,
180 ucp_Old_Turkic,
181 ucp_Samaritan,
182 ucp_Tai_Tham,
183 ucp_Tai_Viet,
184 /* New for Unicode 6.0.0: */
185 ucp_Batak,
186 ucp_Brahmi,
187 ucp_Mandaic,
188 /* New for Unicode 6.1.0: */
189 ucp_Chakma,
190 ucp_Meroitic_Cursive,
191 ucp_Meroitic_Hieroglyphs,
192 ucp_Miao,
193 ucp_Sharada,
194 ucp_Sora_Sompeng,
195 ucp_Takri
196 };
197
198 #endif
199
200 /* End of ucp.h */

Properties

Name Value
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12