/[pcre]/code/trunk/maint/ucptest.c
ViewVC logotype

Contents of /code/trunk/maint/ucptest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 97 - (hide annotations) (download)
Mon Mar 5 12:36:47 2007 UTC (7 years, 1 month ago) by ph10
Original Path: code/trunk/maintain/ucptest.c
File MIME type: text/plain
File size: 8275 byte(s)
Applied Bob and Daniel's patches to convert the build system to automake. Added 
the maintain directory, containing files that are used for maintenance, but are 
not distributed. This is an intermediate step.

1 ph10 97 /***************************************************
2     * A program for testing the Unicode property table *
3     ***************************************************/
4    
5     /* Copyright (c) University of Cambridge 2006 */
6    
7     /* Compile thus:
8     gcc -o ucptest maintain/ucptest.c pcre_ucp_searchfuncs.c
9     */
10    
11     #include <ctype.h>
12     #include <stdio.h>
13     #include <stdlib.h>
14     #include <string.h>
15     #include "pcre_internal.h"
16     #include "ucp.h"
17     #include "ucpinternal.h"
18    
19    
20     /* -------------------------------------------------------------------*/
21    
22     #define CS (char *)
23     #define CCS (const char *)
24     #define CSS (char **)
25     #define US (unsigned char *)
26     #define CUS (const unsigned char *)
27     #define USS (unsigned char **)
28    
29     /* -------------------------------------------------------------------*/
30    
31    
32    
33    
34     /*************************************************
35     * Print Unicode property info for a char *
36     *************************************************/
37    
38     static void
39     print_prop(int c)
40     {
41     int fulltype, script, othercase;
42     int type = _pcre_ucp_findprop(c, &fulltype, &script);
43    
44     printf("%04x ", c);
45     if (type < 0) printf("not found\n"); else
46     {
47     uschar *fulltypename = US"??";
48     uschar *typename = US"??";
49     uschar *scriptname = US"??";
50     switch (type)
51     {
52     case ucp_C: typename = US"Control"; break;
53     case ucp_L: typename = US"Letter"; break;
54     case ucp_M: typename = US"Mark"; break;
55     case ucp_N: typename = US"Number"; break;
56     case ucp_P: typename = US"Punctuation"; break;
57     case ucp_S: typename = US"Symbol"; break;
58     case ucp_Z: typename = US"Separator"; break;
59     }
60     switch (fulltype)
61     {
62     case ucp_Cc: fulltypename = US"Control"; break;
63     case ucp_Cf: fulltypename = US"Format"; break;
64     case ucp_Cn: fulltypename = US"Unassigned"; break;
65     case ucp_Co: fulltypename = US"Private use"; break;
66     case ucp_Cs: fulltypename = US"Surrogate"; break;
67     case ucp_Ll: fulltypename = US"Lower case letter"; break;
68     case ucp_Lm: fulltypename = US"Modifier letter"; break;
69     case ucp_Lo: fulltypename = US"Other letter"; break;
70     case ucp_Lt: fulltypename = US"Title case letter"; break;
71     case ucp_Lu: fulltypename = US"Upper case letter"; break;
72     case ucp_Mc: fulltypename = US"Spacing mark"; break;
73     case ucp_Me: fulltypename = US"Enclosing mark"; break;
74     case ucp_Mn: fulltypename = US"Non-spacing mark"; break;
75     case ucp_Nd: fulltypename = US"Decimal number"; break;
76     case ucp_Nl: fulltypename = US"Letter number"; break;
77     case ucp_No: fulltypename = US"Other number"; break;
78     case ucp_Pc: fulltypename = US"Connector punctuation"; break;
79     case ucp_Pd: fulltypename = US"Dash punctuation"; break;
80     case ucp_Pe: fulltypename = US"Close punctuation"; break;
81     case ucp_Pf: fulltypename = US"Final punctuation"; break;
82     case ucp_Pi: fulltypename = US"Initial punctuation"; break;
83     case ucp_Po: fulltypename = US"Other punctuation"; break;
84     case ucp_Ps: fulltypename = US"Open punctuation"; break;
85     case ucp_Sc: fulltypename = US"Currency symbol"; break;
86     case ucp_Sk: fulltypename = US"Modifier symbol"; break;
87     case ucp_Sm: fulltypename = US"Mathematical symbol"; break;
88     case ucp_So: fulltypename = US"Other symbol"; break;
89     case ucp_Zl: fulltypename = US"Line separator"; break;
90     case ucp_Zp: fulltypename = US"Paragraph separator"; break;
91     case ucp_Zs: fulltypename = US"Space separator"; break;
92     }
93     switch(script)
94     {
95     case ucp_Arabic: scriptname = US"Arabic"; break;
96     case ucp_Armenian: scriptname = US"Armenian"; break;
97     case ucp_Balinese: scriptname = US"Balinese"; break;
98     case ucp_Bengali: scriptname = US"Bengali"; break;
99     case ucp_Bopomofo: scriptname = US"Bopomofo"; break;
100     case ucp_Braille: scriptname = US"Braille"; break;
101     case ucp_Buginese: scriptname = US"Buginese"; break;
102     case ucp_Buhid: scriptname = US"Buhid"; break;
103     case ucp_Canadian_Aboriginal: scriptname = US"Canadian_Aboriginal"; break;
104     case ucp_Cherokee: scriptname = US"Cherokee"; break;
105     case ucp_Common: scriptname = US"Common"; break;
106     case ucp_Coptic: scriptname = US"Coptic"; break;
107     case ucp_Cuneiform: scriptname = US"Cuneiform"; break;
108     case ucp_Cypriot: scriptname = US"Cypriot"; break;
109     case ucp_Cyrillic: scriptname = US"Cyrillic"; break;
110     case ucp_Deseret: scriptname = US"Deseret"; break;
111     case ucp_Devanagari: scriptname = US"Devanagari"; break;
112     case ucp_Ethiopic: scriptname = US"Ethiopic"; break;
113     case ucp_Georgian: scriptname = US"Georgian"; break;
114     case ucp_Glagolitic: scriptname = US"Glagolitic"; break;
115     case ucp_Gothic: scriptname = US"Gothic"; break;
116     case ucp_Greek: scriptname = US"Greek"; break;
117     case ucp_Gujarati: scriptname = US"Gujarati"; break;
118     case ucp_Gurmukhi: scriptname = US"Gurmukhi"; break;
119     case ucp_Han: scriptname = US"Han"; break;
120     case ucp_Hangul: scriptname = US"Hangul"; break;
121     case ucp_Hanunoo: scriptname = US"Hanunoo"; break;
122     case ucp_Hebrew: scriptname = US"Hebrew"; break;
123     case ucp_Hiragana: scriptname = US"Hiragana"; break;
124     case ucp_Inherited: scriptname = US"Inherited"; break;
125     case ucp_Kannada: scriptname = US"Kannada"; break;
126     case ucp_Katakana: scriptname = US"Katakana"; break;
127     case ucp_Kharoshthi: scriptname = US"Kharoshthi"; break;
128     case ucp_Khmer: scriptname = US"Khmer"; break;
129     case ucp_Lao: scriptname = US"Lao"; break;
130     case ucp_Latin: scriptname = US"Latin"; break;
131     case ucp_Limbu: scriptname = US"Limbu"; break;
132     case ucp_Linear_B: scriptname = US"Linear_B"; break;
133     case ucp_Malayalam: scriptname = US"Malayalam"; break;
134     case ucp_Mongolian: scriptname = US"Mongolian"; break;
135     case ucp_Myanmar: scriptname = US"Myanmar"; break;
136     case ucp_New_Tai_Lue: scriptname = US"New_Tai_Lue"; break;
137     case ucp_Nko: scriptname = US"Nko"; break;
138     case ucp_Ogham: scriptname = US"Ogham"; break;
139     case ucp_Old_Italic: scriptname = US"Old_Italic"; break;
140     case ucp_Old_Persian: scriptname = US"Old_Persian"; break;
141     case ucp_Oriya: scriptname = US"Oriya"; break;
142     case ucp_Osmanya: scriptname = US"Osmanya"; break;
143     case ucp_Phags_Pa: scriptname = US"Phags_Pa"; break;
144     case ucp_Phoenician: scriptname = US"Phoenician"; break;
145     case ucp_Runic: scriptname = US"Runic"; break;
146     case ucp_Shavian: scriptname = US"Shavian"; break;
147     case ucp_Sinhala: scriptname = US"Sinhala"; break;
148     case ucp_Syloti_Nagri: scriptname = US"Syloti_Nagri"; break;
149     case ucp_Syriac: scriptname = US"Syriac"; break;
150     case ucp_Tagalog: scriptname = US"Tagalog"; break;
151     case ucp_Tagbanwa: scriptname = US"Tagbanwa"; break;
152     case ucp_Tai_Le: scriptname = US"Tai_Le"; break;
153     case ucp_Tamil: scriptname = US"Tamil"; break;
154     case ucp_Telugu: scriptname = US"Telugu"; break;
155     case ucp_Thaana: scriptname = US"Thaana"; break;
156     case ucp_Thai: scriptname = US"Thai"; break;
157     case ucp_Tibetan: scriptname = US"Tibetan"; break;
158     case ucp_Tifinagh: scriptname = US"Tifinagh"; break;
159     case ucp_Ugaritic: scriptname = US"Ugaritic"; break;
160     case ucp_Yi: scriptname = US"Yi"; break;
161     }
162    
163     printf("%s: %s %s", typename, fulltypename, scriptname);
164     othercase = _pcre_ucp_othercase(c);
165     if (othercase >= 0) printf(" %04x", othercase);
166     printf("\n");
167     }
168     }
169    
170    
171    
172     /*************************************************
173     * Main program *
174     *************************************************/
175    
176     int
177     main(void)
178     {
179     uschar buffer[1024];
180     while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
181     {
182     uschar name[24];
183     uschar *s, *t;
184    
185     printf("%s", buffer);
186     s = buffer;
187     while (isspace(*s)) s++;
188     if (*s == 0) continue;
189    
190     for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s;
191     *t = 0;
192     while (isspace(*s)) s++;
193    
194     if (strcmp(CS name, "findprop") == 0)
195     {
196     while (*s != 0)
197     {
198     uschar *endptr;
199     int c = strtoul(CS s, CSS(&endptr), 16);
200     print_prop(c);
201     s = endptr;
202     while (isspace(*s)) s++;
203     }
204     }
205    
206     else printf("Unknown test command %s\n", name);
207     }
208    
209     return 0;
210     }
211    
212     /* End */

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12