| 6 |
# is indexed by script name. In order to reduce the number of relocations when |
# is indexed by script name. In order to reduce the number of relocations when |
| 7 |
# loading the library, the names are held as a single large string, with |
# loading the library, the names are held as a single large string, with |
| 8 |
# offsets in the table. This is tedious to maintain by hand. Therefore, this |
# offsets in the table. This is tedious to maintain by hand. Therefore, this |
| 9 |
# script is used to generate the table. The output is sent to stdout. |
# script is used to generate the table. The output is sent to stdout; usually |
| 10 |
|
# that should be directed to a temporary file. Then pcre_tables.c can be edited |
| 11 |
|
# by replacing the relevant definitions and table therein with the temporary |
| 12 |
|
# file. |
| 13 |
|
|
| 14 |
# Modified by PH 17-March-2009 to generate the more verbose form that works |
# Modified by PH 17-March-2009 to generate the more verbose form that works |
| 15 |
# for UTF-support in EBCDIC as well as ASCII environments. |
# for UTF-support in EBCDIC as well as ASCII environments. |
| 16 |
# Modified by PH 01-March-2010 to add new scripts from Unicode 5.2.0. |
# Modified by PH 01-March-2010 to add new scripts for Unicode 5.2.0. |
| 17 |
# Modified by PH 04-May-2010 to add new "X.." special categories. |
# Modified by PH 04-May-2010 to add new "X.." special categories. |
| 18 |
|
# Modified by PH 30-April-2011 to add new scripts for Unicode 6.0.0 |
| 19 |
|
|
| 20 |
script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \ |
script_names = ['Arabic', 'Armenian', 'Bengali', 'Bopomofo', 'Braille', 'Buginese', 'Buhid', 'Canadian_Aboriginal', \ |
| 21 |
'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \ |
'Cherokee', 'Common', 'Coptic', 'Cypriot', 'Cyrillic', 'Deseret', 'Devanagari', 'Ethiopic', 'Georgian', \ |
| 32 |
'Avestan', 'Bamum', 'Egyptian_Hieroglyphs', 'Imperial_Aramaic', \ |
'Avestan', 'Bamum', 'Egyptian_Hieroglyphs', 'Imperial_Aramaic', \ |
| 33 |
'Inscriptional_Pahlavi', 'Inscriptional_Parthian', \ |
'Inscriptional_Pahlavi', 'Inscriptional_Parthian', \ |
| 34 |
'Javanese', 'Kaithi', 'Lisu', 'Meetei_Mayek', \ |
'Javanese', 'Kaithi', 'Lisu', 'Meetei_Mayek', \ |
| 35 |
'Old_South_Arabian', 'Old_Turkic', 'Samaritan', 'Tai_Tham', 'Tai_Viet' |
'Old_South_Arabian', 'Old_Turkic', 'Samaritan', 'Tai_Tham', 'Tai_Viet', \ |
| 36 |
|
# New for Unicode 6.0.0 |
| 37 |
|
'Batak', 'Brahmi', 'Mandaic' |
| 38 |
] |
] |
| 39 |
|
|
| 40 |
category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', |
category_names = ['Cc', 'Cf', 'Cn', 'Co', 'Cs', 'Ll', 'Lm', 'Lo', 'Lt', 'Lu', |