/[pcre]/code/trunk/maint/ucptest.c
ViewVC logotype

Contents of /code/trunk/maint/ucptest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 114 - (hide annotations) (download)
Fri Mar 9 10:15:12 2007 UTC (7 years, 9 months ago) by ph10
File MIME type: text/plain
File size: 8278 byte(s)
Rename "maintain" as "maint".

1 ph10 97 /***************************************************
2     * A program for testing the Unicode property table *
3     ***************************************************/
4    
5     /* Copyright (c) University of Cambridge 2006 */
6    
7     /* Compile thus:
8 ph10 98 gcc -o ucptest ucptest.c ../pcre_ucp_searchfuncs.c
9 ph10 97 */
10    
11     #include <ctype.h>
12     #include <stdio.h>
13     #include <stdlib.h>
14     #include <string.h>
15 ph10 98 #include "../pcre_internal.h"
16     #include "../ucp.h"
17     #include "../ucpinternal.h"
18 ph10 97
19    
20     /* -------------------------------------------------------------------*/
21    
22     #define CS (char *)
23     #define CCS (const char *)
24     #define CSS (char **)
25     #define US (unsigned char *)
26     #define CUS (const unsigned char *)
27     #define USS (unsigned char **)
28    
29     /* -------------------------------------------------------------------*/
30    
31    
32    
33    
34     /*************************************************
35     * Print Unicode property info for a char *
36     *************************************************/
37    
38     static void
39     print_prop(int c)
40     {
41     int fulltype, script, othercase;
42     int type = _pcre_ucp_findprop(c, &fulltype, &script);
43    
44     printf("%04x ", c);
45     if (type < 0) printf("not found\n"); else
46     {
47     uschar *fulltypename = US"??";
48     uschar *typename = US"??";
49     uschar *scriptname = US"??";
50     switch (type)
51     {
52     case ucp_C: typename = US"Control"; break;
53     case ucp_L: typename = US"Letter"; break;
54     case ucp_M: typename = US"Mark"; break;
55     case ucp_N: typename = US"Number"; break;
56     case ucp_P: typename = US"Punctuation"; break;
57     case ucp_S: typename = US"Symbol"; break;
58     case ucp_Z: typename = US"Separator"; break;
59     }
60     switch (fulltype)
61     {
62     case ucp_Cc: fulltypename = US"Control"; break;
63     case ucp_Cf: fulltypename = US"Format"; break;
64     case ucp_Cn: fulltypename = US"Unassigned"; break;
65     case ucp_Co: fulltypename = US"Private use"; break;
66     case ucp_Cs: fulltypename = US"Surrogate"; break;
67     case ucp_Ll: fulltypename = US"Lower case letter"; break;
68     case ucp_Lm: fulltypename = US"Modifier letter"; break;
69     case ucp_Lo: fulltypename = US"Other letter"; break;
70     case ucp_Lt: fulltypename = US"Title case letter"; break;
71     case ucp_Lu: fulltypename = US"Upper case letter"; break;
72     case ucp_Mc: fulltypename = US"Spacing mark"; break;
73     case ucp_Me: fulltypename = US"Enclosing mark"; break;
74     case ucp_Mn: fulltypename = US"Non-spacing mark"; break;
75     case ucp_Nd: fulltypename = US"Decimal number"; break;
76     case ucp_Nl: fulltypename = US"Letter number"; break;
77     case ucp_No: fulltypename = US"Other number"; break;
78     case ucp_Pc: fulltypename = US"Connector punctuation"; break;
79     case ucp_Pd: fulltypename = US"Dash punctuation"; break;
80     case ucp_Pe: fulltypename = US"Close punctuation"; break;
81     case ucp_Pf: fulltypename = US"Final punctuation"; break;
82     case ucp_Pi: fulltypename = US"Initial punctuation"; break;
83     case ucp_Po: fulltypename = US"Other punctuation"; break;
84     case ucp_Ps: fulltypename = US"Open punctuation"; break;
85     case ucp_Sc: fulltypename = US"Currency symbol"; break;
86     case ucp_Sk: fulltypename = US"Modifier symbol"; break;
87     case ucp_Sm: fulltypename = US"Mathematical symbol"; break;
88     case ucp_So: fulltypename = US"Other symbol"; break;
89     case ucp_Zl: fulltypename = US"Line separator"; break;
90     case ucp_Zp: fulltypename = US"Paragraph separator"; break;
91     case ucp_Zs: fulltypename = US"Space separator"; break;
92     }
93     switch(script)
94     {
95     case ucp_Arabic: scriptname = US"Arabic"; break;
96     case ucp_Armenian: scriptname = US"Armenian"; break;
97     case ucp_Balinese: scriptname = US"Balinese"; break;
98     case ucp_Bengali: scriptname = US"Bengali"; break;
99     case ucp_Bopomofo: scriptname = US"Bopomofo"; break;
100     case ucp_Braille: scriptname = US"Braille"; break;
101     case ucp_Buginese: scriptname = US"Buginese"; break;
102     case ucp_Buhid: scriptname = US"Buhid"; break;
103     case ucp_Canadian_Aboriginal: scriptname = US"Canadian_Aboriginal"; break;
104     case ucp_Cherokee: scriptname = US"Cherokee"; break;
105     case ucp_Common: scriptname = US"Common"; break;
106     case ucp_Coptic: scriptname = US"Coptic"; break;
107     case ucp_Cuneiform: scriptname = US"Cuneiform"; break;
108     case ucp_Cypriot: scriptname = US"Cypriot"; break;
109     case ucp_Cyrillic: scriptname = US"Cyrillic"; break;
110     case ucp_Deseret: scriptname = US"Deseret"; break;
111     case ucp_Devanagari: scriptname = US"Devanagari"; break;
112     case ucp_Ethiopic: scriptname = US"Ethiopic"; break;
113     case ucp_Georgian: scriptname = US"Georgian"; break;
114     case ucp_Glagolitic: scriptname = US"Glagolitic"; break;
115     case ucp_Gothic: scriptname = US"Gothic"; break;
116     case ucp_Greek: scriptname = US"Greek"; break;
117     case ucp_Gujarati: scriptname = US"Gujarati"; break;
118     case ucp_Gurmukhi: scriptname = US"Gurmukhi"; break;
119     case ucp_Han: scriptname = US"Han"; break;
120     case ucp_Hangul: scriptname = US"Hangul"; break;
121     case ucp_Hanunoo: scriptname = US"Hanunoo"; break;
122     case ucp_Hebrew: scriptname = US"Hebrew"; break;
123     case ucp_Hiragana: scriptname = US"Hiragana"; break;
124     case ucp_Inherited: scriptname = US"Inherited"; break;
125     case ucp_Kannada: scriptname = US"Kannada"; break;
126     case ucp_Katakana: scriptname = US"Katakana"; break;
127     case ucp_Kharoshthi: scriptname = US"Kharoshthi"; break;
128     case ucp_Khmer: scriptname = US"Khmer"; break;
129     case ucp_Lao: scriptname = US"Lao"; break;
130     case ucp_Latin: scriptname = US"Latin"; break;
131     case ucp_Limbu: scriptname = US"Limbu"; break;
132     case ucp_Linear_B: scriptname = US"Linear_B"; break;
133     case ucp_Malayalam: scriptname = US"Malayalam"; break;
134     case ucp_Mongolian: scriptname = US"Mongolian"; break;
135     case ucp_Myanmar: scriptname = US"Myanmar"; break;
136     case ucp_New_Tai_Lue: scriptname = US"New_Tai_Lue"; break;
137     case ucp_Nko: scriptname = US"Nko"; break;
138     case ucp_Ogham: scriptname = US"Ogham"; break;
139     case ucp_Old_Italic: scriptname = US"Old_Italic"; break;
140     case ucp_Old_Persian: scriptname = US"Old_Persian"; break;
141     case ucp_Oriya: scriptname = US"Oriya"; break;
142     case ucp_Osmanya: scriptname = US"Osmanya"; break;
143     case ucp_Phags_Pa: scriptname = US"Phags_Pa"; break;
144     case ucp_Phoenician: scriptname = US"Phoenician"; break;
145     case ucp_Runic: scriptname = US"Runic"; break;
146     case ucp_Shavian: scriptname = US"Shavian"; break;
147     case ucp_Sinhala: scriptname = US"Sinhala"; break;
148     case ucp_Syloti_Nagri: scriptname = US"Syloti_Nagri"; break;
149     case ucp_Syriac: scriptname = US"Syriac"; break;
150     case ucp_Tagalog: scriptname = US"Tagalog"; break;
151     case ucp_Tagbanwa: scriptname = US"Tagbanwa"; break;
152     case ucp_Tai_Le: scriptname = US"Tai_Le"; break;
153     case ucp_Tamil: scriptname = US"Tamil"; break;
154     case ucp_Telugu: scriptname = US"Telugu"; break;
155     case ucp_Thaana: scriptname = US"Thaana"; break;
156     case ucp_Thai: scriptname = US"Thai"; break;
157     case ucp_Tibetan: scriptname = US"Tibetan"; break;
158     case ucp_Tifinagh: scriptname = US"Tifinagh"; break;
159     case ucp_Ugaritic: scriptname = US"Ugaritic"; break;
160     case ucp_Yi: scriptname = US"Yi"; break;
161     }
162    
163     printf("%s: %s %s", typename, fulltypename, scriptname);
164     othercase = _pcre_ucp_othercase(c);
165     if (othercase >= 0) printf(" %04x", othercase);
166     printf("\n");
167     }
168     }
169    
170    
171    
172     /*************************************************
173     * Main program *
174     *************************************************/
175    
176     int
177     main(void)
178     {
179     uschar buffer[1024];
180     while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
181     {
182     uschar name[24];
183     uschar *s, *t;
184    
185     printf("%s", buffer);
186     s = buffer;
187     while (isspace(*s)) s++;
188     if (*s == 0) continue;
189    
190     for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
191     *t = 0;
192     while (isspace(*s)) s++;
193    
194     if (strcmp(CS name, "findprop") == 0)
195     {
196     while (*s != 0)
197     {
198     uschar *endptr;
199     int c = strtoul(CS s, CSS(&endptr), 16);
200     print_prop(c);
201     s = endptr;
202     while (isspace(*s)) s++;
203     }
204     }
205    
206     else printf("Unknown test command %s\n", name);
207     }
208    
209     return 0;
210     }
211    
212     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12