| 1 |
ph10 |
97 |
/*************************************************** |
| 2 |
|
|
* A program for testing the Unicode property table * |
| 3 |
|
|
***************************************************/ |
| 4 |
|
|
|
| 5 |
|
|
/* Copyright (c) University of Cambridge 2006 */ |
| 6 |
|
|
|
| 7 |
|
|
/* Compile thus: |
| 8 |
|
|
gcc -o ucptest maintain/ucptest.c pcre_ucp_searchfuncs.c |
| 9 |
|
|
*/ |
| 10 |
|
|
|
| 11 |
|
|
#include <ctype.h> |
| 12 |
|
|
#include <stdio.h> |
| 13 |
|
|
#include <stdlib.h> |
| 14 |
|
|
#include <string.h> |
| 15 |
|
|
#include "pcre_internal.h" |
| 16 |
|
|
#include "ucp.h" |
| 17 |
|
|
#include "ucpinternal.h" |
| 18 |
|
|
|
| 19 |
|
|
|
| 20 |
|
|
/* -------------------------------------------------------------------*/ |
| 21 |
|
|
|
| 22 |
|
|
#define CS (char *) |
| 23 |
|
|
#define CCS (const char *) |
| 24 |
|
|
#define CSS (char **) |
| 25 |
|
|
#define US (unsigned char *) |
| 26 |
|
|
#define CUS (const unsigned char *) |
| 27 |
|
|
#define USS (unsigned char **) |
| 28 |
|
|
|
| 29 |
|
|
/* -------------------------------------------------------------------*/ |
| 30 |
|
|
|
| 31 |
|
|
|
| 32 |
|
|
|
| 33 |
|
|
|
| 34 |
|
|
/************************************************* |
| 35 |
|
|
* Print Unicode property info for a char * |
| 36 |
|
|
*************************************************/ |
| 37 |
|
|
|
| 38 |
|
|
static void |
| 39 |
|
|
print_prop(int c) |
| 40 |
|
|
{ |
| 41 |
|
|
int fulltype, script, othercase; |
| 42 |
|
|
int type = _pcre_ucp_findprop(c, &fulltype, &script); |
| 43 |
|
|
|
| 44 |
|
|
printf("%04x ", c); |
| 45 |
|
|
if (type < 0) printf("not found\n"); else |
| 46 |
|
|
{ |
| 47 |
|
|
uschar *fulltypename = US"??"; |
| 48 |
|
|
uschar *typename = US"??"; |
| 49 |
|
|
uschar *scriptname = US"??"; |
| 50 |
|
|
switch (type) |
| 51 |
|
|
{ |
| 52 |
|
|
case ucp_C: typename = US"Control"; break; |
| 53 |
|
|
case ucp_L: typename = US"Letter"; break; |
| 54 |
|
|
case ucp_M: typename = US"Mark"; break; |
| 55 |
|
|
case ucp_N: typename = US"Number"; break; |
| 56 |
|
|
case ucp_P: typename = US"Punctuation"; break; |
| 57 |
|
|
case ucp_S: typename = US"Symbol"; break; |
| 58 |
|
|
case ucp_Z: typename = US"Separator"; break; |
| 59 |
|
|
} |
| 60 |
|
|
switch (fulltype) |
| 61 |
|
|
{ |
| 62 |
|
|
case ucp_Cc: fulltypename = US"Control"; break; |
| 63 |
|
|
case ucp_Cf: fulltypename = US"Format"; break; |
| 64 |
|
|
case ucp_Cn: fulltypename = US"Unassigned"; break; |
| 65 |
|
|
case ucp_Co: fulltypename = US"Private use"; break; |
| 66 |
|
|
case ucp_Cs: fulltypename = US"Surrogate"; break; |
| 67 |
|
|
case ucp_Ll: fulltypename = US"Lower case letter"; break; |
| 68 |
|
|
case ucp_Lm: fulltypename = US"Modifier letter"; break; |
| 69 |
|
|
case ucp_Lo: fulltypename = US"Other letter"; break; |
| 70 |
|
|
case ucp_Lt: fulltypename = US"Title case letter"; break; |
| 71 |
|
|
case ucp_Lu: fulltypename = US"Upper case letter"; break; |
| 72 |
|
|
case ucp_Mc: fulltypename = US"Spacing mark"; break; |
| 73 |
|
|
case ucp_Me: fulltypename = US"Enclosing mark"; break; |
| 74 |
|
|
case ucp_Mn: fulltypename = US"Non-spacing mark"; break; |
| 75 |
|
|
case ucp_Nd: fulltypename = US"Decimal number"; break; |
| 76 |
|
|
case ucp_Nl: fulltypename = US"Letter number"; break; |
| 77 |
|
|
case ucp_No: fulltypename = US"Other number"; break; |
| 78 |
|
|
case ucp_Pc: fulltypename = US"Connector punctuation"; break; |
| 79 |
|
|
case ucp_Pd: fulltypename = US"Dash punctuation"; break; |
| 80 |
|
|
case ucp_Pe: fulltypename = US"Close punctuation"; break; |
| 81 |
|
|
case ucp_Pf: fulltypename = US"Final punctuation"; break; |
| 82 |
|
|
case ucp_Pi: fulltypename = US"Initial punctuation"; break; |
| 83 |
|
|
case ucp_Po: fulltypename = US"Other punctuation"; break; |
| 84 |
|
|
case ucp_Ps: fulltypename = US"Open punctuation"; break; |
| 85 |
|
|
case ucp_Sc: fulltypename = US"Currency symbol"; break; |
| 86 |
|
|
case ucp_Sk: fulltypename = US"Modifier symbol"; break; |
| 87 |
|
|
case ucp_Sm: fulltypename = US"Mathematical symbol"; break; |
| 88 |
|
|
case ucp_So: fulltypename = US"Other symbol"; break; |
| 89 |
|
|
case ucp_Zl: fulltypename = US"Line separator"; break; |
| 90 |
|
|
case ucp_Zp: fulltypename = US"Paragraph separator"; break; |
| 91 |
|
|
case ucp_Zs: fulltypename = US"Space separator"; break; |
| 92 |
|
|
} |
| 93 |
|
|
switch(script) |
| 94 |
|
|
{ |
| 95 |
|
|
case ucp_Arabic: scriptname = US"Arabic"; break; |
| 96 |
|
|
case ucp_Armenian: scriptname = US"Armenian"; break; |
| 97 |
|
|
case ucp_Balinese: scriptname = US"Balinese"; break; |
| 98 |
|
|
case ucp_Bengali: scriptname = US"Bengali"; break; |
| 99 |
|
|
case ucp_Bopomofo: scriptname = US"Bopomofo"; break; |
| 100 |
|
|
case ucp_Braille: scriptname = US"Braille"; break; |
| 101 |
|
|
case ucp_Buginese: scriptname = US"Buginese"; break; |
| 102 |
|
|
case ucp_Buhid: scriptname = US"Buhid"; break; |
| 103 |
|
|
case ucp_Canadian_Aboriginal: scriptname = US"Canadian_Aboriginal"; break; |
| 104 |
|
|
case ucp_Cherokee: scriptname = US"Cherokee"; break; |
| 105 |
|
|
case ucp_Common: scriptname = US"Common"; break; |
| 106 |
|
|
case ucp_Coptic: scriptname = US"Coptic"; break; |
| 107 |
|
|
case ucp_Cuneiform: scriptname = US"Cuneiform"; break; |
| 108 |
|
|
case ucp_Cypriot: scriptname = US"Cypriot"; break; |
| 109 |
|
|
case ucp_Cyrillic: scriptname = US"Cyrillic"; break; |
| 110 |
|
|
case ucp_Deseret: scriptname = US"Deseret"; break; |
| 111 |
|
|
case ucp_Devanagari: scriptname = US"Devanagari"; break; |
| 112 |
|
|
case ucp_Ethiopic: scriptname = US"Ethiopic"; break; |
| 113 |
|
|
case ucp_Georgian: scriptname = US"Georgian"; break; |
| 114 |
|
|
case ucp_Glagolitic: scriptname = US"Glagolitic"; break; |
| 115 |
|
|
case ucp_Gothic: scriptname = US"Gothic"; break; |
| 116 |
|
|
case ucp_Greek: scriptname = US"Greek"; break; |
| 117 |
|
|
case ucp_Gujarati: scriptname = US"Gujarati"; break; |
| 118 |
|
|
case ucp_Gurmukhi: scriptname = US"Gurmukhi"; break; |
| 119 |
|
|
case ucp_Han: scriptname = US"Han"; break; |
| 120 |
|
|
case ucp_Hangul: scriptname = US"Hangul"; break; |
| 121 |
|
|
case ucp_Hanunoo: scriptname = US"Hanunoo"; break; |
| 122 |
|
|
case ucp_Hebrew: scriptname = US"Hebrew"; break; |
| 123 |
|
|
case ucp_Hiragana: scriptname = US"Hiragana"; break; |
| 124 |
|
|
case ucp_Inherited: scriptname = US"Inherited"; break; |
| 125 |
|
|
case ucp_Kannada: scriptname = US"Kannada"; break; |
| 126 |
|
|
case ucp_Katakana: scriptname = US"Katakana"; break; |
| 127 |
|
|
case ucp_Kharoshthi: scriptname = US"Kharoshthi"; break; |
| 128 |
|
|
case ucp_Khmer: scriptname = US"Khmer"; break; |
| 129 |
|
|
case ucp_Lao: scriptname = US"Lao"; break; |
| 130 |
|
|
case ucp_Latin: scriptname = US"Latin"; break; |
| 131 |
|
|
case ucp_Limbu: scriptname = US"Limbu"; break; |
| 132 |
|
|
case ucp_Linear_B: scriptname = US"Linear_B"; break; |
| 133 |
|
|
case ucp_Malayalam: scriptname = US"Malayalam"; break; |
| 134 |
|
|
case ucp_Mongolian: scriptname = US"Mongolian"; break; |
| 135 |
|
|
case ucp_Myanmar: scriptname = US"Myanmar"; break; |
| 136 |
|
|
case ucp_New_Tai_Lue: scriptname = US"New_Tai_Lue"; break; |
| 137 |
|
|
case ucp_Nko: scriptname = US"Nko"; break; |
| 138 |
|
|
case ucp_Ogham: scriptname = US"Ogham"; break; |
| 139 |
|
|
case ucp_Old_Italic: scriptname = US"Old_Italic"; break; |
| 140 |
|
|
case ucp_Old_Persian: scriptname = US"Old_Persian"; break; |
| 141 |
|
|
case ucp_Oriya: scriptname = US"Oriya"; break; |
| 142 |
|
|
case ucp_Osmanya: scriptname = US"Osmanya"; break; |
| 143 |
|
|
case ucp_Phags_Pa: scriptname = US"Phags_Pa"; break; |
| 144 |
|
|
case ucp_Phoenician: scriptname = US"Phoenician"; break; |
| 145 |
|
|
case ucp_Runic: scriptname = US"Runic"; break; |
| 146 |
|
|
case ucp_Shavian: scriptname = US"Shavian"; break; |
| 147 |
|
|
case ucp_Sinhala: scriptname = US"Sinhala"; break; |
| 148 |
|
|
case ucp_Syloti_Nagri: scriptname = US"Syloti_Nagri"; break; |
| 149 |
|
|
case ucp_Syriac: scriptname = US"Syriac"; break; |
| 150 |
|
|
case ucp_Tagalog: scriptname = US"Tagalog"; break; |
| 151 |
|
|
case ucp_Tagbanwa: scriptname = US"Tagbanwa"; break; |
| 152 |
|
|
case ucp_Tai_Le: scriptname = US"Tai_Le"; break; |
| 153 |
|
|
case ucp_Tamil: scriptname = US"Tamil"; break; |
| 154 |
|
|
case ucp_Telugu: scriptname = US"Telugu"; break; |
| 155 |
|
|
case ucp_Thaana: scriptname = US"Thaana"; break; |
| 156 |
|
|
case ucp_Thai: scriptname = US"Thai"; break; |
| 157 |
|
|
case ucp_Tibetan: scriptname = US"Tibetan"; break; |
| 158 |
|
|
case ucp_Tifinagh: scriptname = US"Tifinagh"; break; |
| 159 |
|
|
case ucp_Ugaritic: scriptname = US"Ugaritic"; break; |
| 160 |
|
|
case ucp_Yi: scriptname = US"Yi"; break; |
| 161 |
|
|
} |
| 162 |
|
|
|
| 163 |
|
|
printf("%s: %s %s", typename, fulltypename, scriptname); |
| 164 |
|
|
othercase = _pcre_ucp_othercase(c); |
| 165 |
|
|
if (othercase >= 0) printf(" %04x", othercase); |
| 166 |
|
|
printf("\n"); |
| 167 |
|
|
} |
| 168 |
|
|
} |
| 169 |
|
|
|
| 170 |
|
|
|
| 171 |
|
|
|
| 172 |
|
|
/************************************************* |
| 173 |
|
|
* Main program * |
| 174 |
|
|
*************************************************/ |
| 175 |
|
|
|
| 176 |
|
|
int |
| 177 |
|
|
main(void) |
| 178 |
|
|
{ |
| 179 |
|
|
uschar buffer[1024]; |
| 180 |
|
|
while (fgets(CS buffer, sizeof(buffer), stdin) != NULL) |
| 181 |
|
|
{ |
| 182 |
|
|
uschar name[24]; |
| 183 |
|
|
uschar *s, *t; |
| 184 |
|
|
|
| 185 |
|
|
printf("%s", buffer); |
| 186 |
|
|
s = buffer; |
| 187 |
|
|
while (isspace(*s)) s++; |
| 188 |
|
|
if (*s == 0) continue; |
| 189 |
|
|
|
| 190 |
|
|
for (t = name; *s != 0 && !isspace(*s); s++) *t++ = *s; |
| 191 |
|
|
*t = 0; |
| 192 |
|
|
while (isspace(*s)) s++; |
| 193 |
|
|
|
| 194 |
|
|
if (strcmp(CS name, "findprop") == 0) |
| 195 |
|
|
{ |
| 196 |
|
|
while (*s != 0) |
| 197 |
|
|
{ |
| 198 |
|
|
uschar *endptr; |
| 199 |
|
|
int c = strtoul(CS s, CSS(&endptr), 16); |
| 200 |
|
|
print_prop(c); |
| 201 |
|
|
s = endptr; |
| 202 |
|
|
while (isspace(*s)) s++; |
| 203 |
|
|
} |
| 204 |
|
|
} |
| 205 |
|
|
|
| 206 |
|
|
else printf("Unknown test command %s\n", name); |
| 207 |
|
|
} |
| 208 |
|
|
|
| 209 |
|
|
return 0; |
| 210 |
|
|
} |
| 211 |
|
|
|
| 212 |
|
|
/* End */ |