/[pcre]/code/trunk/maint/ucptest.c
ViewVC logotype

Contents of /code/trunk/maint/ucptest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 114 - (show annotations) (download)
Fri Mar 9 10:15:12 2007 UTC (7 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 8278 byte(s)
Rename "maintain" as "maint".

1 /***************************************************
2 * A program for testing the Unicode property table *
3 ***************************************************/
4
5 /* Copyright (c) University of Cambridge 2006 */
6
7 /* Compile thus:
8 gcc -o ucptest ucptest.c ../pcre_ucp_searchfuncs.c
9 */
10
11 #include <ctype.h>
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <string.h>
15 #include "../pcre_internal.h"
16 #include "../ucp.h"
17 #include "../ucpinternal.h"
18
19
20 /* -------------------------------------------------------------------*/
21
22 #define CS (char *)
23 #define CCS (const char *)
24 #define CSS (char **)
25 #define US (unsigned char *)
26 #define CUS (const unsigned char *)
27 #define USS (unsigned char **)
28
29 /* -------------------------------------------------------------------*/
30
31
32
33
34 /*************************************************
35 * Print Unicode property info for a char *
36 *************************************************/
37
38 static void
39 print_prop(int c)
40 {
41 int fulltype, script, othercase;
42 int type = _pcre_ucp_findprop(c, &fulltype, &script);
43
44 printf("%04x ", c);
45 if (type < 0) printf("not found\n"); else
46 {
47 uschar *fulltypename = US"??";
48 uschar *typename = US"??";
49 uschar *scriptname = US"??";
50 switch (type)
51 {
52 case ucp_C: typename = US"Control"; break;
53 case ucp_L: typename = US"Letter"; break;
54 case ucp_M: typename = US"Mark"; break;
55 case ucp_N: typename = US"Number"; break;
56 case ucp_P: typename = US"Punctuation"; break;
57 case ucp_S: typename = US"Symbol"; break;
58 case ucp_Z: typename = US"Separator"; break;
59 }
60 switch (fulltype)
61 {
62 case ucp_Cc: fulltypename = US"Control"; break;
63 case ucp_Cf: fulltypename = US"Format"; break;
64 case ucp_Cn: fulltypename = US"Unassigned"; break;
65 case ucp_Co: fulltypename = US"Private use"; break;
66 case ucp_Cs: fulltypename = US"Surrogate"; break;
67 case ucp_Ll: fulltypename = US"Lower case letter"; break;
68 case ucp_Lm: fulltypename = US"Modifier letter"; break;
69 case ucp_Lo: fulltypename = US"Other letter"; break;
70 case ucp_Lt: fulltypename = US"Title case letter"; break;
71 case ucp_Lu: fulltypename = US"Upper case letter"; break;
72 case ucp_Mc: fulltypename = US"Spacing mark"; break;
73 case ucp_Me: fulltypename = US"Enclosing mark"; break;
74 case ucp_Mn: fulltypename = US"Non-spacing mark"; break;
75 case ucp_Nd: fulltypename = US"Decimal number"; break;
76 case ucp_Nl: fulltypename = US"Letter number"; break;
77 case ucp_No: fulltypename = US"Other number"; break;
78 case ucp_Pc: fulltypename = US"Connector punctuation"; break;
79 case ucp_Pd: fulltypename = US"Dash punctuation"; break;
80 case ucp_Pe: fulltypename = US"Close punctuation"; break;
81 case ucp_Pf: fulltypename = US"Final punctuation"; break;
82 case ucp_Pi: fulltypename = US"Initial punctuation"; break;
83 case ucp_Po: fulltypename = US"Other punctuation"; break;
84 case ucp_Ps: fulltypename = US"Open punctuation"; break;
85 case ucp_Sc: fulltypename = US"Currency symbol"; break;
86 case ucp_Sk: fulltypename = US"Modifier symbol"; break;
87 case ucp_Sm: fulltypename = US"Mathematical symbol"; break;
88 case ucp_So: fulltypename = US"Other symbol"; break;
89 case ucp_Zl: fulltypename = US"Line separator"; break;
90 case ucp_Zp: fulltypename = US"Paragraph separator"; break;
91 case ucp_Zs: fulltypename = US"Space separator"; break;
92 }
93 switch(script)
94 {
95 case ucp_Arabic: scriptname = US"Arabic"; break;
96 case ucp_Armenian: scriptname = US"Armenian"; break;
97 case ucp_Balinese: scriptname = US"Balinese"; break;
98 case ucp_Bengali: scriptname = US"Bengali"; break;
99 case ucp_Bopomofo: scriptname = US"Bopomofo"; break;
100 case ucp_Braille: scriptname = US"Braille"; break;
101 case ucp_Buginese: scriptname = US"Buginese"; break;
102 case ucp_Buhid: scriptname = US"Buhid"; break;
103 case ucp_Canadian_Aboriginal: scriptname = US"Canadian_Aboriginal"; break;
104 case ucp_Cherokee: scriptname = US"Cherokee"; break;
105 case ucp_Common: scriptname = US"Common"; break;
106 case ucp_Coptic: scriptname = US"Coptic"; break;
107 case ucp_Cuneiform: scriptname = US"Cuneiform"; break;
108 case ucp_Cypriot: scriptname = US"Cypriot"; break;
109 case ucp_Cyrillic: scriptname = US"Cyrillic"; break;
110 case ucp_Deseret: scriptname = US"Deseret"; break;
111 case ucp_Devanagari: scriptname = US"Devanagari"; break;
112 case ucp_Ethiopic: scriptname = US"Ethiopic"; break;
113 case ucp_Georgian: scriptname = US"Georgian"; break;
114 case ucp_Glagolitic: scriptname = US"Glagolitic"; break;
115 case ucp_Gothic: scriptname = US"Gothic"; break;
116 case ucp_Greek: scriptname = US"Greek"; break;
117 case ucp_Gujarati: scriptname = US"Gujarati"; break;
118 case ucp_Gurmukhi: scriptname = US"Gurmukhi"; break;
119 case ucp_Han: scriptname = US"Han"; break;
120 case ucp_Hangul: scriptname = US"Hangul"; break;
121 case ucp_Hanunoo: scriptname = US"Hanunoo"; break;
122 case ucp_Hebrew: scriptname = US"Hebrew"; break;
123 case ucp_Hiragana: scriptname = US"Hiragana"; break;
124 case ucp_Inherited: scriptname = US"Inherited"; break;
125 case ucp_Kannada: scriptname = US"Kannada"; break;
126 case ucp_Katakana: scriptname = US"Katakana"; break;
127 case ucp_Kharoshthi: scriptname = US"Kharoshthi"; break;
128 case ucp_Khmer: scriptname = US"Khmer"; break;
129 case ucp_Lao: scriptname = US"Lao"; break;
130 case ucp_Latin: scriptname = US"Latin"; break;
131 case ucp_Limbu: scriptname = US"Limbu"; break;
132 case ucp_Linear_B: scriptname = US"Linear_B"; break;
133 case ucp_Malayalam: scriptname = US"Malayalam"; break;
134 case ucp_Mongolian: scriptname = US"Mongolian"; break;
135 case ucp_Myanmar: scriptname = US"Myanmar"; break;
136 case ucp_New_Tai_Lue: scriptname = US"New_Tai_Lue"; break;
137 case ucp_Nko: scriptname = US"Nko"; break;
138 case ucp_Ogham: scriptname = US"Ogham"; break;
139 case ucp_Old_Italic: scriptname = US"Old_Italic"; break;
140 case ucp_Old_Persian: scriptname = US"Old_Persian"; break;
141 case ucp_Oriya: scriptname = US"Oriya"; break;
142 case ucp_Osmanya: scriptname = US"Osmanya"; break;
143 case ucp_Phags_Pa: scriptname = US"Phags_Pa"; break;
144 case ucp_Phoenician: scriptname = US"Phoenician"; break;
145 case ucp_Runic: scriptname = US"Runic"; break;
146 case ucp_Shavian: scriptname = US"Shavian"; break;
147 case ucp_Sinhala: scriptname = US"Sinhala"; break;
148 case ucp_Syloti_Nagri: scriptname = US"Syloti_Nagri"; break;
149 case ucp_Syriac: scriptname = US"Syriac"; break;
150 case ucp_Tagalog: scriptname = US"Tagalog"; break;
151 case ucp_Tagbanwa: scriptname = US"Tagbanwa"; break;
152 case ucp_Tai_Le: scriptname = US"Tai_Le"; break;
153 case ucp_Tamil: scriptname = US"Tamil"; break;
154 case ucp_Telugu: scriptname = US"Telugu"; break;
155 case ucp_Thaana: scriptname = US"Thaana"; break;
156 case ucp_Thai: scriptname = US"Thai"; break;
157 case ucp_Tibetan: scriptname = US"Tibetan"; break;
158 case ucp_Tifinagh: scriptname = US"Tifinagh"; break;
159 case ucp_Ugaritic: scriptname = US"Ugaritic"; break;
160 case ucp_Yi: scriptname = US"Yi"; break;
161 }
162
163 printf("%s: %s %s", typename, fulltypename, scriptname);
164 othercase = _pcre_ucp_othercase(c);
165 if (othercase >= 0) printf(" %04x", othercase);
166 printf("\n");
167 }
168 }
169
170
171
172 /*************************************************
173 * Main program *
174 *************************************************/
175
176 int
177 main(void)
178 {
179 uschar buffer[1024];
180 while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
181 {
182 uschar name[24];
183 uschar *s, *t;
184
185 printf("%s", buffer);
186 s = buffer;
187 while (isspace(*s)) s++;
188 if (*s == 0) continue;
189
190 for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
191 *t = 0;
192 while (isspace(*s)) s++;
193
194 if (strcmp(CS name, "findprop") == 0)
195 {
196 while (*s != 0)
197 {
198 uschar *endptr;
199 int c = strtoul(CS s, CSS(&endptr), 16);
200 print_prop(c);
201 s = endptr;
202 while (isspace(*s)) s++;
203 }
204 }
205
206 else printf("Unknown test command %s\n", name);
207 }
208
209 return 0;
210 }
211
212 /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12