| 3 |
# Multistage table builder |
# Multistage table builder |
| 4 |
# (c) Peter Kankowski, 2008 |
# (c) Peter Kankowski, 2008 |
| 5 |
|
|
| 6 |
|
# This script was submitted to the PCRE project by Peter Kankowski as part of |
| 7 |
|
# the upgrading of Unicode property support. The new code speeds up property |
| 8 |
|
# matching many times. The script is for the use of PCRE maintainers, to |
| 9 |
|
# generate the pcre_ucd.c file that contains a digested form of the Unicode |
| 10 |
|
# data tables. |
| 11 |
|
|
| 12 |
|
# The script should be run in the maint subdirectory, using the command |
| 13 |
|
# |
| 14 |
|
# ./MultiStage2.py >../pcre_ucd.c |
| 15 |
|
# |
| 16 |
|
# It requires three Unicode data tables, DerivedGeneralCategory.txt, |
| 17 |
|
# Scripts.txt, and UnicodeData.txt, to be in the Unicode.tables subdirectory. |
| 18 |
|
|
| 19 |
|
# Added with minor modifications: |
| 20 |
|
# Added #! line at start |
| 21 |
|
# Removed tabs |
| 22 |
|
# Made it work with Python 2.4 by rewriting two statements that needed 2.5 |
| 23 |
|
# Consequent code tidy |
| 24 |
|
# Adjusted file names to Unicode.tables directory |
| 25 |
|
# |
| 26 |
|
# Philip Hazel, 02 July 2008 |
| 27 |
|
|
| 28 |
|
|
| 29 |
import re |
import re |
| 30 |
import string |
import string |
| 31 |
import sys |
import sys |
| 62 |
|
|
| 63 |
m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0]) |
m = re.match(r'([0-9a-fA-F]+)(\.\.([0-9a-fA-F]+))?$', chardata[0]) |
| 64 |
char = int(m.group(1), 16) |
char = int(m.group(1), 16) |
| 65 |
#PH last = char if m.group(3) is None else int(m.group(3), 16) |
# PH last = char if m.group(3) is None else int(m.group(3), 16) |
| 66 |
if m.group(3) is None: |
if m.group(3) is None: |
| 67 |
last = char |
last = char |
| 68 |
else: |
else: |
| 127 |
for i in range(0, len(table), ELEMS_PER_LINE): |
for i in range(0, len(table), ELEMS_PER_LINE): |
| 128 |
print fmt % (table[i:i+ELEMS_PER_LINE] + (i * mult,)) |
print fmt % (table[i:i+ELEMS_PER_LINE] + (i * mult,)) |
| 129 |
else: |
else: |
| 130 |
#PH fmt = "%3d," * (ELEMS_PER_LINE if block_size > ELEMS_PER_LINE else block_size) + "\n" |
# PH fmt = "%3d," * (ELEMS_PER_LINE if block_size > ELEMS_PER_LINE else block_size) + "\n" |
| 131 |
if block_size > ELEMS_PER_LINE: |
if block_size > ELEMS_PER_LINE: |
| 132 |
fmt = "%3d," * ELEMS_PER_LINE + "\n" |
fmt = "%3d," * ELEMS_PER_LINE + "\n" |
| 133 |
|
fmt = fmt * (block_size / ELEMS_PER_LINE) |
| 134 |
else: |
else: |
| 135 |
fmt = "%3d," * block_size + "\n" |
fmt = "%3d," * block_size + "\n" |
| 136 |
if block_size > ELEMS_PER_LINE: |
# PH if block_size > ELEMS_PER_LINE: |
| 137 |
fmt = fmt * (block_size / ELEMS_PER_LINE) |
# PH fmt = fmt * (block_size / ELEMS_PER_LINE) |
| 138 |
for i in range(0, len(table), block_size): |
for i in range(0, len(table), block_size): |
| 139 |
print ("/* block %d */\n" + fmt) % ((i / block_size,) + table[i:i+block_size]) |
print ("/* block %d */\n" + fmt) % ((i / block_size,) + table[i:i+block_size]) |
| 140 |
print "};\n" |
print "};\n" |