| 110 |
data are unlikely. |
data are unlikely. |
| 111 |
|
|
| 112 |
July 2008: There is now a script called maint/GenerateUtt.py that can be used |
July 2008: There is now a script called maint/GenerateUtt.py that can be used |
| 113 |
to generate this data instead of maintaining it entirely by hand. |
to generate this data automatically instead of maintaining it by hand. |
| 114 |
|
|
| 115 |
The script was updated in March 2009 to generate a new EBCDIC-compliant |
The script was updated in March 2009 to generate a new EBCDIC-compliant |
| 116 |
version. Like all other character and string literals that are compared against |
version. Like all other character and string literals that are compared against |
| 123 |
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0" |
#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0" |
| 124 |
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0" |
#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0" |
| 125 |
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0" |
#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0" |
| 126 |
|
#define STRING_Batak0 STR_B STR_a STR_t STR_a STR_k "\0" |
| 127 |
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0" |
#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0" |
| 128 |
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0" |
#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0" |
| 129 |
|
#define STRING_Brahmi0 STR_B STR_r STR_a STR_h STR_m STR_i "\0" |
| 130 |
#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0" |
#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0" |
| 131 |
#define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0" |
#define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0" |
| 132 |
#define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0" |
#define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0" |
| 188 |
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0" |
#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0" |
| 189 |
#define STRING_M0 STR_M "\0" |
#define STRING_M0 STR_M "\0" |
| 190 |
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0" |
#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0" |
| 191 |
|
#define STRING_Mandaic0 STR_M STR_a STR_n STR_d STR_a STR_i STR_c "\0" |
| 192 |
#define STRING_Mc0 STR_M STR_c "\0" |
#define STRING_Mc0 STR_M STR_c "\0" |
| 193 |
#define STRING_Me0 STR_M STR_e "\0" |
#define STRING_Me0 STR_M STR_e "\0" |
| 194 |
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0" |
#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0" |
| 256 |
#define STRING_Zp0 STR_Z STR_p "\0" |
#define STRING_Zp0 STR_Z STR_p "\0" |
| 257 |
#define STRING_Zs0 STR_Z STR_s "\0" |
#define STRING_Zs0 STR_Z STR_s "\0" |
| 258 |
|
|
| 259 |
const char _pcre_utt_names[] = |
const char _pcre_utt_names[] = |
| 260 |
STRING_Any0 |
STRING_Any0 |
| 261 |
STRING_Arabic0 |
STRING_Arabic0 |
| 262 |
STRING_Armenian0 |
STRING_Armenian0 |
| 263 |
STRING_Avestan0 |
STRING_Avestan0 |
| 264 |
STRING_Balinese0 |
STRING_Balinese0 |
| 265 |
STRING_Bamum0 |
STRING_Bamum0 |
| 266 |
|
STRING_Batak0 |
| 267 |
STRING_Bengali0 |
STRING_Bengali0 |
| 268 |
STRING_Bopomofo0 |
STRING_Bopomofo0 |
| 269 |
|
STRING_Brahmi0 |
| 270 |
STRING_Braille0 |
STRING_Braille0 |
| 271 |
STRING_Buginese0 |
STRING_Buginese0 |
| 272 |
STRING_Buhid0 |
STRING_Buhid0 |
| 328 |
STRING_Lydian0 |
STRING_Lydian0 |
| 329 |
STRING_M0 |
STRING_M0 |
| 330 |
STRING_Malayalam0 |
STRING_Malayalam0 |
| 331 |
|
STRING_Mandaic0 |
| 332 |
STRING_Mc0 |
STRING_Mc0 |
| 333 |
STRING_Me0 |
STRING_Me0 |
| 334 |
STRING_Meetei_Mayek0 |
STRING_Meetei_Mayek0 |
| 396 |
STRING_Zp0 |
STRING_Zp0 |
| 397 |
STRING_Zs0; |
STRING_Zs0; |
| 398 |
|
|
| 399 |
const ucp_type_table _pcre_utt[] = { |
const ucp_type_table _pcre_utt[] = { |
| 400 |
{ 0, PT_ANY, 0 }, |
{ 0, PT_ANY, 0 }, |
| 401 |
{ 4, PT_SC, ucp_Arabic }, |
{ 4, PT_SC, ucp_Arabic }, |
| 402 |
{ 11, PT_SC, ucp_Armenian }, |
{ 11, PT_SC, ucp_Armenian }, |
| 403 |
{ 20, PT_SC, ucp_Avestan }, |
{ 20, PT_SC, ucp_Avestan }, |
| 404 |
{ 28, PT_SC, ucp_Balinese }, |
{ 28, PT_SC, ucp_Balinese }, |
| 405 |
{ 37, PT_SC, ucp_Bamum }, |
{ 37, PT_SC, ucp_Bamum }, |
| 406 |
{ 43, PT_SC, ucp_Bengali }, |
{ 43, PT_SC, ucp_Batak }, |
| 407 |
{ 51, PT_SC, ucp_Bopomofo }, |
{ 49, PT_SC, ucp_Bengali }, |
| 408 |
{ 60, PT_SC, ucp_Braille }, |
{ 57, PT_SC, ucp_Bopomofo }, |
| 409 |
{ 68, PT_SC, ucp_Buginese }, |
{ 66, PT_SC, ucp_Brahmi }, |
| 410 |
{ 77, PT_SC, ucp_Buhid }, |
{ 73, PT_SC, ucp_Braille }, |
| 411 |
{ 83, PT_GC, ucp_C }, |
{ 81, PT_SC, ucp_Buginese }, |
| 412 |
{ 85, PT_SC, ucp_Canadian_Aboriginal }, |
{ 90, PT_SC, ucp_Buhid }, |
| 413 |
{ 105, PT_SC, ucp_Carian }, |
{ 96, PT_GC, ucp_C }, |
| 414 |
{ 112, PT_PC, ucp_Cc }, |
{ 98, PT_SC, ucp_Canadian_Aboriginal }, |
| 415 |
{ 115, PT_PC, ucp_Cf }, |
{ 118, PT_SC, ucp_Carian }, |
| 416 |
{ 118, PT_SC, ucp_Cham }, |
{ 125, PT_PC, ucp_Cc }, |
| 417 |
{ 123, PT_SC, ucp_Cherokee }, |
{ 128, PT_PC, ucp_Cf }, |
| 418 |
{ 132, PT_PC, ucp_Cn }, |
{ 131, PT_SC, ucp_Cham }, |
| 419 |
{ 135, PT_PC, ucp_Co }, |
{ 136, PT_SC, ucp_Cherokee }, |
| 420 |
{ 138, PT_SC, ucp_Common }, |
{ 145, PT_PC, ucp_Cn }, |
| 421 |
{ 145, PT_SC, ucp_Coptic }, |
{ 148, PT_PC, ucp_Co }, |
| 422 |
{ 152, PT_PC, ucp_Cs }, |
{ 151, PT_SC, ucp_Common }, |
| 423 |
{ 155, PT_SC, ucp_Cuneiform }, |
{ 158, PT_SC, ucp_Coptic }, |
| 424 |
{ 165, PT_SC, ucp_Cypriot }, |
{ 165, PT_PC, ucp_Cs }, |
| 425 |
{ 173, PT_SC, ucp_Cyrillic }, |
{ 168, PT_SC, ucp_Cuneiform }, |
| 426 |
{ 182, PT_SC, ucp_Deseret }, |
{ 178, PT_SC, ucp_Cypriot }, |
| 427 |
{ 190, PT_SC, ucp_Devanagari }, |
{ 186, PT_SC, ucp_Cyrillic }, |
| 428 |
{ 201, PT_SC, ucp_Egyptian_Hieroglyphs }, |
{ 195, PT_SC, ucp_Deseret }, |
| 429 |
{ 222, PT_SC, ucp_Ethiopic }, |
{ 203, PT_SC, ucp_Devanagari }, |
| 430 |
{ 231, PT_SC, ucp_Georgian }, |
{ 214, PT_SC, ucp_Egyptian_Hieroglyphs }, |
| 431 |
{ 240, PT_SC, ucp_Glagolitic }, |
{ 235, PT_SC, ucp_Ethiopic }, |
| 432 |
{ 251, PT_SC, ucp_Gothic }, |
{ 244, PT_SC, ucp_Georgian }, |
| 433 |
{ 258, PT_SC, ucp_Greek }, |
{ 253, PT_SC, ucp_Glagolitic }, |
| 434 |
{ 264, PT_SC, ucp_Gujarati }, |
{ 264, PT_SC, ucp_Gothic }, |
| 435 |
{ 273, PT_SC, ucp_Gurmukhi }, |
{ 271, PT_SC, ucp_Greek }, |
| 436 |
{ 282, PT_SC, ucp_Han }, |
{ 277, PT_SC, ucp_Gujarati }, |
| 437 |
{ 286, PT_SC, ucp_Hangul }, |
{ 286, PT_SC, ucp_Gurmukhi }, |
| 438 |
{ 293, PT_SC, ucp_Hanunoo }, |
{ 295, PT_SC, ucp_Han }, |
| 439 |
{ 301, PT_SC, ucp_Hebrew }, |
{ 299, PT_SC, ucp_Hangul }, |
| 440 |
{ 308, PT_SC, ucp_Hiragana }, |
{ 306, PT_SC, ucp_Hanunoo }, |
| 441 |
{ 317, PT_SC, ucp_Imperial_Aramaic }, |
{ 314, PT_SC, ucp_Hebrew }, |
| 442 |
{ 334, PT_SC, ucp_Inherited }, |
{ 321, PT_SC, ucp_Hiragana }, |
| 443 |
{ 344, PT_SC, ucp_Inscriptional_Pahlavi }, |
{ 330, PT_SC, ucp_Imperial_Aramaic }, |
| 444 |
{ 366, PT_SC, ucp_Inscriptional_Parthian }, |
{ 347, PT_SC, ucp_Inherited }, |
| 445 |
{ 389, PT_SC, ucp_Javanese }, |
{ 357, PT_SC, ucp_Inscriptional_Pahlavi }, |
| 446 |
{ 398, PT_SC, ucp_Kaithi }, |
{ 379, PT_SC, ucp_Inscriptional_Parthian }, |
| 447 |
{ 405, PT_SC, ucp_Kannada }, |
{ 402, PT_SC, ucp_Javanese }, |
| 448 |
{ 413, PT_SC, ucp_Katakana }, |
{ 411, PT_SC, ucp_Kaithi }, |
| 449 |
{ 422, PT_SC, ucp_Kayah_Li }, |
{ 418, PT_SC, ucp_Kannada }, |
| 450 |
{ 431, PT_SC, ucp_Kharoshthi }, |
{ 426, PT_SC, ucp_Katakana }, |
| 451 |
{ 442, PT_SC, ucp_Khmer }, |
{ 435, PT_SC, ucp_Kayah_Li }, |
| 452 |
{ 448, PT_GC, ucp_L }, |
{ 444, PT_SC, ucp_Kharoshthi }, |
| 453 |
{ 450, PT_LAMP, 0 }, |
{ 455, PT_SC, ucp_Khmer }, |
| 454 |
{ 453, PT_SC, ucp_Lao }, |
{ 461, PT_GC, ucp_L }, |
| 455 |
{ 457, PT_SC, ucp_Latin }, |
{ 463, PT_LAMP, 0 }, |
| 456 |
{ 463, PT_SC, ucp_Lepcha }, |
{ 466, PT_SC, ucp_Lao }, |
| 457 |
{ 470, PT_SC, ucp_Limbu }, |
{ 470, PT_SC, ucp_Latin }, |
| 458 |
{ 476, PT_SC, ucp_Linear_B }, |
{ 476, PT_SC, ucp_Lepcha }, |
| 459 |
{ 485, PT_SC, ucp_Lisu }, |
{ 483, PT_SC, ucp_Limbu }, |
| 460 |
{ 490, PT_PC, ucp_Ll }, |
{ 489, PT_SC, ucp_Linear_B }, |
| 461 |
{ 493, PT_PC, ucp_Lm }, |
{ 498, PT_SC, ucp_Lisu }, |
| 462 |
{ 496, PT_PC, ucp_Lo }, |
{ 503, PT_PC, ucp_Ll }, |
| 463 |
{ 499, PT_PC, ucp_Lt }, |
{ 506, PT_PC, ucp_Lm }, |
| 464 |
{ 502, PT_PC, ucp_Lu }, |
{ 509, PT_PC, ucp_Lo }, |
| 465 |
{ 505, PT_SC, ucp_Lycian }, |
{ 512, PT_PC, ucp_Lt }, |
| 466 |
{ 512, PT_SC, ucp_Lydian }, |
{ 515, PT_PC, ucp_Lu }, |
| 467 |
{ 519, PT_GC, ucp_M }, |
{ 518, PT_SC, ucp_Lycian }, |
| 468 |
{ 521, PT_SC, ucp_Malayalam }, |
{ 525, PT_SC, ucp_Lydian }, |
| 469 |
{ 531, PT_PC, ucp_Mc }, |
{ 532, PT_GC, ucp_M }, |
| 470 |
{ 534, PT_PC, ucp_Me }, |
{ 534, PT_SC, ucp_Malayalam }, |
| 471 |
{ 537, PT_SC, ucp_Meetei_Mayek }, |
{ 544, PT_SC, ucp_Mandaic }, |
| 472 |
{ 550, PT_PC, ucp_Mn }, |
{ 552, PT_PC, ucp_Mc }, |
| 473 |
{ 553, PT_SC, ucp_Mongolian }, |
{ 555, PT_PC, ucp_Me }, |
| 474 |
{ 563, PT_SC, ucp_Myanmar }, |
{ 558, PT_SC, ucp_Meetei_Mayek }, |
| 475 |
{ 571, PT_GC, ucp_N }, |
{ 571, PT_PC, ucp_Mn }, |
| 476 |
{ 573, PT_PC, ucp_Nd }, |
{ 574, PT_SC, ucp_Mongolian }, |
| 477 |
{ 576, PT_SC, ucp_New_Tai_Lue }, |
{ 584, PT_SC, ucp_Myanmar }, |
| 478 |
{ 588, PT_SC, ucp_Nko }, |
{ 592, PT_GC, ucp_N }, |
| 479 |
{ 592, PT_PC, ucp_Nl }, |
{ 594, PT_PC, ucp_Nd }, |
| 480 |
{ 595, PT_PC, ucp_No }, |
{ 597, PT_SC, ucp_New_Tai_Lue }, |
| 481 |
{ 598, PT_SC, ucp_Ogham }, |
{ 609, PT_SC, ucp_Nko }, |
| 482 |
{ 604, PT_SC, ucp_Ol_Chiki }, |
{ 613, PT_PC, ucp_Nl }, |
| 483 |
{ 613, PT_SC, ucp_Old_Italic }, |
{ 616, PT_PC, ucp_No }, |
| 484 |
{ 624, PT_SC, ucp_Old_Persian }, |
{ 619, PT_SC, ucp_Ogham }, |
| 485 |
{ 636, PT_SC, ucp_Old_South_Arabian }, |
{ 625, PT_SC, ucp_Ol_Chiki }, |
| 486 |
{ 654, PT_SC, ucp_Old_Turkic }, |
{ 634, PT_SC, ucp_Old_Italic }, |
| 487 |
{ 665, PT_SC, ucp_Oriya }, |
{ 645, PT_SC, ucp_Old_Persian }, |
| 488 |
{ 671, PT_SC, ucp_Osmanya }, |
{ 657, PT_SC, ucp_Old_South_Arabian }, |
| 489 |
{ 679, PT_GC, ucp_P }, |
{ 675, PT_SC, ucp_Old_Turkic }, |
| 490 |
{ 681, PT_PC, ucp_Pc }, |
{ 686, PT_SC, ucp_Oriya }, |
| 491 |
{ 684, PT_PC, ucp_Pd }, |
{ 692, PT_SC, ucp_Osmanya }, |
| 492 |
{ 687, PT_PC, ucp_Pe }, |
{ 700, PT_GC, ucp_P }, |
| 493 |
{ 690, PT_PC, ucp_Pf }, |
{ 702, PT_PC, ucp_Pc }, |
| 494 |
{ 693, PT_SC, ucp_Phags_Pa }, |
{ 705, PT_PC, ucp_Pd }, |
| 495 |
{ 702, PT_SC, ucp_Phoenician }, |
{ 708, PT_PC, ucp_Pe }, |
| 496 |
{ 713, PT_PC, ucp_Pi }, |
{ 711, PT_PC, ucp_Pf }, |
| 497 |
{ 716, PT_PC, ucp_Po }, |
{ 714, PT_SC, ucp_Phags_Pa }, |
| 498 |
{ 719, PT_PC, ucp_Ps }, |
{ 723, PT_SC, ucp_Phoenician }, |
| 499 |
{ 722, PT_SC, ucp_Rejang }, |
{ 734, PT_PC, ucp_Pi }, |
| 500 |
{ 729, PT_SC, ucp_Runic }, |
{ 737, PT_PC, ucp_Po }, |
| 501 |
{ 735, PT_GC, ucp_S }, |
{ 740, PT_PC, ucp_Ps }, |
| 502 |
{ 737, PT_SC, ucp_Samaritan }, |
{ 743, PT_SC, ucp_Rejang }, |
| 503 |
{ 747, PT_SC, ucp_Saurashtra }, |
{ 750, PT_SC, ucp_Runic }, |
| 504 |
{ 758, PT_PC, ucp_Sc }, |
{ 756, PT_GC, ucp_S }, |
| 505 |
{ 761, PT_SC, ucp_Shavian }, |
{ 758, PT_SC, ucp_Samaritan }, |
| 506 |
{ 769, PT_SC, ucp_Sinhala }, |
{ 768, PT_SC, ucp_Saurashtra }, |
| 507 |
{ 777, PT_PC, ucp_Sk }, |
{ 779, PT_PC, ucp_Sc }, |
| 508 |
{ 780, PT_PC, ucp_Sm }, |
{ 782, PT_SC, ucp_Shavian }, |
| 509 |
{ 783, PT_PC, ucp_So }, |
{ 790, PT_SC, ucp_Sinhala }, |
| 510 |
{ 786, PT_SC, ucp_Sundanese }, |
{ 798, PT_PC, ucp_Sk }, |
| 511 |
{ 796, PT_SC, ucp_Syloti_Nagri }, |
{ 801, PT_PC, ucp_Sm }, |
| 512 |
{ 809, PT_SC, ucp_Syriac }, |
{ 804, PT_PC, ucp_So }, |
| 513 |
{ 816, PT_SC, ucp_Tagalog }, |
{ 807, PT_SC, ucp_Sundanese }, |
| 514 |
{ 824, PT_SC, ucp_Tagbanwa }, |
{ 817, PT_SC, ucp_Syloti_Nagri }, |
| 515 |
{ 833, PT_SC, ucp_Tai_Le }, |
{ 830, PT_SC, ucp_Syriac }, |
| 516 |
{ 840, PT_SC, ucp_Tai_Tham }, |
{ 837, PT_SC, ucp_Tagalog }, |
| 517 |
{ 849, PT_SC, ucp_Tai_Viet }, |
{ 845, PT_SC, ucp_Tagbanwa }, |
| 518 |
{ 858, PT_SC, ucp_Tamil }, |
{ 854, PT_SC, ucp_Tai_Le }, |
| 519 |
{ 864, PT_SC, ucp_Telugu }, |
{ 861, PT_SC, ucp_Tai_Tham }, |
| 520 |
{ 871, PT_SC, ucp_Thaana }, |
{ 870, PT_SC, ucp_Tai_Viet }, |
| 521 |
{ 878, PT_SC, ucp_Thai }, |
{ 879, PT_SC, ucp_Tamil }, |
| 522 |
{ 883, PT_SC, ucp_Tibetan }, |
{ 885, PT_SC, ucp_Telugu }, |
| 523 |
{ 891, PT_SC, ucp_Tifinagh }, |
{ 892, PT_SC, ucp_Thaana }, |
| 524 |
{ 900, PT_SC, ucp_Ugaritic }, |
{ 899, PT_SC, ucp_Thai }, |
| 525 |
{ 909, PT_SC, ucp_Vai }, |
{ 904, PT_SC, ucp_Tibetan }, |
| 526 |
{ 913, PT_ALNUM, 0 }, |
{ 912, PT_SC, ucp_Tifinagh }, |
| 527 |
{ 917, PT_PXSPACE, 0 }, |
{ 921, PT_SC, ucp_Ugaritic }, |
| 528 |
{ 921, PT_SPACE, 0 }, |
{ 930, PT_SC, ucp_Vai }, |
| 529 |
{ 925, PT_WORD, 0 }, |
{ 934, PT_ALNUM, 0 }, |
| 530 |
{ 929, PT_SC, ucp_Yi }, |
{ 938, PT_PXSPACE, 0 }, |
| 531 |
{ 932, PT_GC, ucp_Z }, |
{ 942, PT_SPACE, 0 }, |
| 532 |
{ 934, PT_PC, ucp_Zl }, |
{ 946, PT_WORD, 0 }, |
| 533 |
{ 937, PT_PC, ucp_Zp }, |
{ 950, PT_SC, ucp_Yi }, |
| 534 |
{ 940, PT_PC, ucp_Zs } |
{ 953, PT_GC, ucp_Z }, |
| 535 |
|
{ 955, PT_PC, ucp_Zl }, |
| 536 |
|
{ 958, PT_PC, ucp_Zp }, |
| 537 |
|
{ 961, PT_PC, ucp_Zs } |
| 538 |
}; |
}; |
| 539 |
|
|
| 540 |
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); |
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); |