| 1 |
nigel |
77 |
/************************************************* |
| 2 |
|
|
* Perl-Compatible Regular Expressions * |
| 3 |
|
|
*************************************************/ |
| 4 |
|
|
|
| 5 |
|
|
/* PCRE is a library of functions to support regular expressions whose syntax |
| 6 |
|
|
and semantics are as close as possible to those of the Perl 5 language. |
| 7 |
|
|
|
| 8 |
|
|
Written by Philip Hazel |
| 9 |
|
|
Copyright (c) 1997-2005 University of Cambridge |
| 10 |
|
|
|
| 11 |
|
|
----------------------------------------------------------------------------- |
| 12 |
|
|
Redistribution and use in source and binary forms, with or without |
| 13 |
|
|
modification, are permitted provided that the following conditions are met: |
| 14 |
|
|
|
| 15 |
|
|
* Redistributions of source code must retain the above copyright notice, |
| 16 |
|
|
this list of conditions and the following disclaimer. |
| 17 |
|
|
|
| 18 |
|
|
* Redistributions in binary form must reproduce the above copyright |
| 19 |
|
|
notice, this list of conditions and the following disclaimer in the |
| 20 |
|
|
documentation and/or other materials provided with the distribution. |
| 21 |
|
|
|
| 22 |
|
|
* Neither the name of the University of Cambridge nor the names of its |
| 23 |
|
|
contributors may be used to endorse or promote products derived from |
| 24 |
|
|
this software without specific prior written permission. |
| 25 |
|
|
|
| 26 |
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 27 |
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 28 |
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 29 |
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 30 |
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 31 |
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 32 |
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 33 |
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 34 |
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 35 |
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 36 |
|
|
POSSIBILITY OF SUCH DAMAGE. |
| 37 |
|
|
----------------------------------------------------------------------------- |
| 38 |
|
|
*/ |
| 39 |
|
|
|
| 40 |
|
|
|
| 41 |
|
|
/* This module contains some fixed tables that are used by more than one of the |
| 42 |
nigel |
85 |
PCRE code modules. The tables are also #included by the pcretest program, which |
| 43 |
|
|
uses macros to change their names from _pcre_xxx to xxxx, thereby avoiding name |
| 44 |
|
|
clashes with the library. */ |
| 45 |
nigel |
77 |
|
| 46 |
|
|
|
| 47 |
|
|
#include "pcre_internal.h" |
| 48 |
|
|
|
| 49 |
|
|
|
| 50 |
|
|
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that |
| 51 |
|
|
the definition is next to the definition of the opcodes in internal.h. */ |
| 52 |
|
|
|
| 53 |
|
|
const uschar _pcre_OP_lengths[] = { OP_LENGTHS }; |
| 54 |
|
|
|
| 55 |
|
|
|
| 56 |
|
|
|
| 57 |
|
|
/************************************************* |
| 58 |
|
|
* Tables for UTF-8 support * |
| 59 |
|
|
*************************************************/ |
| 60 |
|
|
|
| 61 |
|
|
/* These are the breakpoints for different numbers of bytes in a UTF-8 |
| 62 |
|
|
character. */ |
| 63 |
|
|
|
| 64 |
|
|
const int _pcre_utf8_table1[] = |
| 65 |
|
|
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; |
| 66 |
|
|
|
| 67 |
|
|
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int); |
| 68 |
|
|
|
| 69 |
|
|
/* These are the indicator bits and the mask for the data bits to set in the |
| 70 |
|
|
first byte of a character, indexed by the number of additional bytes. */ |
| 71 |
|
|
|
| 72 |
|
|
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; |
| 73 |
|
|
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
| 74 |
|
|
|
| 75 |
|
|
/* Table of the number of extra characters, indexed by the first character |
| 76 |
|
|
masked with 0x3f. The highest number for a valid UTF-8 character is in fact |
| 77 |
|
|
0x3d. */ |
| 78 |
|
|
|
| 79 |
|
|
const uschar _pcre_utf8_table4[] = { |
| 80 |
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 81 |
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 82 |
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
| 83 |
|
|
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; |
| 84 |
|
|
|
| 85 |
|
|
/* This table translates Unicode property names into code values for the |
| 86 |
nigel |
85 |
ucp_findchar() function. */ |
| 87 |
nigel |
77 |
|
| 88 |
|
|
const ucp_type_table _pcre_utt[] = { |
| 89 |
|
|
{ "C", 128 + ucp_C }, |
| 90 |
|
|
{ "Cc", ucp_Cc }, |
| 91 |
|
|
{ "Cf", ucp_Cf }, |
| 92 |
|
|
{ "Cn", ucp_Cn }, |
| 93 |
|
|
{ "Co", ucp_Co }, |
| 94 |
|
|
{ "Cs", ucp_Cs }, |
| 95 |
|
|
{ "L", 128 + ucp_L }, |
| 96 |
|
|
{ "Ll", ucp_Ll }, |
| 97 |
|
|
{ "Lm", ucp_Lm }, |
| 98 |
|
|
{ "Lo", ucp_Lo }, |
| 99 |
|
|
{ "Lt", ucp_Lt }, |
| 100 |
|
|
{ "Lu", ucp_Lu }, |
| 101 |
|
|
{ "M", 128 + ucp_M }, |
| 102 |
|
|
{ "Mc", ucp_Mc }, |
| 103 |
|
|
{ "Me", ucp_Me }, |
| 104 |
|
|
{ "Mn", ucp_Mn }, |
| 105 |
|
|
{ "N", 128 + ucp_N }, |
| 106 |
|
|
{ "Nd", ucp_Nd }, |
| 107 |
|
|
{ "Nl", ucp_Nl }, |
| 108 |
|
|
{ "No", ucp_No }, |
| 109 |
|
|
{ "P", 128 + ucp_P }, |
| 110 |
|
|
{ "Pc", ucp_Pc }, |
| 111 |
|
|
{ "Pd", ucp_Pd }, |
| 112 |
|
|
{ "Pe", ucp_Pe }, |
| 113 |
|
|
{ "Pf", ucp_Pf }, |
| 114 |
|
|
{ "Pi", ucp_Pi }, |
| 115 |
|
|
{ "Po", ucp_Po }, |
| 116 |
|
|
{ "Ps", ucp_Ps }, |
| 117 |
|
|
{ "S", 128 + ucp_S }, |
| 118 |
|
|
{ "Sc", ucp_Sc }, |
| 119 |
|
|
{ "Sk", ucp_Sk }, |
| 120 |
|
|
{ "Sm", ucp_Sm }, |
| 121 |
|
|
{ "So", ucp_So }, |
| 122 |
|
|
{ "Z", 128 + ucp_Z }, |
| 123 |
|
|
{ "Zl", ucp_Zl }, |
| 124 |
|
|
{ "Zp", ucp_Zp }, |
| 125 |
|
|
{ "Zs", ucp_Zs } |
| 126 |
|
|
}; |
| 127 |
|
|
|
| 128 |
|
|
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); |
| 129 |
|
|
|
| 130 |
|
|
/* End of pcre_tables.c */ |