| 6 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
| 7 |
|
|
| 8 |
Written by Philip Hazel |
Written by Philip Hazel |
| 9 |
Copyright (c) 1997-2009 University of Cambridge |
Copyright (c) 1997-2012 University of Cambridge |
| 10 |
|
|
| 11 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 12 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
| 37 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 38 |
*/ |
*/ |
| 39 |
|
|
| 40 |
|
#ifndef PCRE_INCLUDED |
| 41 |
|
|
| 42 |
/* This module contains some fixed tables that are used by more than one of the |
/* This module contains some fixed tables that are used by more than one of the |
| 43 |
PCRE code modules. The tables are also #included by the pcretest program, which |
PCRE code modules. The tables are also #included by the pcretest program, which |
| 51 |
|
|
| 52 |
#include "pcre_internal.h" |
#include "pcre_internal.h" |
| 53 |
|
|
| 54 |
|
#endif /* PCRE_INCLUDED */ |
| 55 |
|
|
| 56 |
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that |
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that |
| 57 |
the definition is next to the definition of the opcodes in pcre_internal.h. */ |
the definition is next to the definition of the opcodes in pcre_internal.h. */ |
| 58 |
|
|
| 59 |
const uschar _pcre_OP_lengths[] = { OP_LENGTHS }; |
const pcre_uint8 PRIV(OP_lengths)[] = { OP_LENGTHS }; |
| 60 |
|
|
| 61 |
|
|
| 62 |
|
|
| 67 |
/* These are the breakpoints for different numbers of bytes in a UTF-8 |
/* These are the breakpoints for different numbers of bytes in a UTF-8 |
| 68 |
character. */ |
character. */ |
| 69 |
|
|
| 70 |
#ifdef SUPPORT_UTF8 |
#if (defined SUPPORT_UTF && defined COMPILE_PCRE8) \ |
| 71 |
|
|| (defined PCRE_INCLUDED && defined SUPPORT_PCRE16) |
| 72 |
|
|
| 73 |
const int _pcre_utf8_table1[] = |
/* These tables are also required by pcretest in 16 bit mode. */ |
| 74 |
|
|
| 75 |
|
const int PRIV(utf8_table1)[] = |
| 76 |
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; |
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; |
| 77 |
|
|
| 78 |
const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int); |
const int PRIV(utf8_table1_size) = sizeof(PRIV(utf8_table1)) / sizeof(int); |
| 79 |
|
|
| 80 |
/* These are the indicator bits and the mask for the data bits to set in the |
/* These are the indicator bits and the mask for the data bits to set in the |
| 81 |
first byte of a character, indexed by the number of additional bytes. */ |
first byte of a character, indexed by the number of additional bytes. */ |
| 82 |
|
|
| 83 |
const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; |
const int PRIV(utf8_table2)[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; |
| 84 |
const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
const int PRIV(utf8_table3)[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
| 85 |
|
|
| 86 |
/* Table of the number of extra bytes, indexed by the first byte masked with |
/* Table of the number of extra bytes, indexed by the first byte masked with |
| 87 |
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ |
0x3f. The highest number for a valid UTF-8 first byte is in fact 0x3d. */ |
| 88 |
|
|
| 89 |
const uschar _pcre_utf8_table4[] = { |
const pcre_uint8 PRIV(utf8_table4)[] = { |
| 90 |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 91 |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 92 |
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
| 93 |
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; |
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; |
| 94 |
|
|
| 95 |
#ifdef SUPPORT_JIT |
#endif /* (SUPPORT_UTF && COMPILE_PCRE8) || (PCRE_INCLUDED && SUPPORT_PCRE16)*/ |
|
/* Full table of the number of extra bytes when the |
|
|
character code is greater or equal than 0xc0. |
|
|
See _pcre_utf8_table4 above. */ |
|
| 96 |
|
|
| 97 |
const uschar _pcre_utf8_char_sizes[] = { |
#ifdef SUPPORT_UTF |
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
|
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
|
|
3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4, |
|
|
}; |
|
|
#endif |
|
| 98 |
|
|
| 99 |
/* Table to translate from particular type value to the general value. */ |
/* Table to translate from particular type value to the general value. */ |
| 100 |
|
|
| 101 |
const int _pcre_ucp_gentype[] = { |
const int PRIV(ucp_gentype)[] = { |
| 102 |
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */ |
ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */ |
| 103 |
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */ |
ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */ |
| 104 |
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */ |
ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */ |
| 110 |
}; |
}; |
| 111 |
|
|
| 112 |
#ifdef SUPPORT_JIT |
#ifdef SUPPORT_JIT |
| 113 |
/* This table reverses _pcre_ucp_gentype. We can save the cost |
/* This table reverses PRIV(ucp_gentype). We can save the cost |
| 114 |
of a memory load. */ |
of a memory load. */ |
| 115 |
|
|
| 116 |
const int _pcre_ucp_typerange[] = { |
const int PRIV(ucp_typerange)[] = { |
| 117 |
ucp_Cc, ucp_Cs, |
ucp_Cc, ucp_Cs, |
| 118 |
ucp_Ll, ucp_Lu, |
ucp_Ll, ucp_Lu, |
| 119 |
ucp_Mc, ucp_Mn, |
ucp_Mc, ucp_Mn, |
| 122 |
ucp_Sc, ucp_So, |
ucp_Sc, ucp_So, |
| 123 |
ucp_Zl, ucp_Zs, |
ucp_Zl, ucp_Zs, |
| 124 |
}; |
}; |
| 125 |
#endif |
#endif /* SUPPORT_JIT */ |
| 126 |
|
|
| 127 |
/* The pcre_utt[] table below translates Unicode property names into type and |
/* The pcre_utt[] table below translates Unicode property names into type and |
| 128 |
code values. It is searched by binary chop, so must be in collating sequence of |
code values. It is searched by binary chop, so must be in collating sequence of |
| 280 |
#define STRING_Zp0 STR_Z STR_p "\0" |
#define STRING_Zp0 STR_Z STR_p "\0" |
| 281 |
#define STRING_Zs0 STR_Z STR_s "\0" |
#define STRING_Zs0 STR_Z STR_s "\0" |
| 282 |
|
|
| 283 |
const char _pcre_utt_names[] = |
const char PRIV(utt_names)[] = |
| 284 |
STRING_Any0 |
STRING_Any0 |
| 285 |
STRING_Arabic0 |
STRING_Arabic0 |
| 286 |
STRING_Armenian0 |
STRING_Armenian0 |
| 420 |
STRING_Zp0 |
STRING_Zp0 |
| 421 |
STRING_Zs0; |
STRING_Zs0; |
| 422 |
|
|
| 423 |
const ucp_type_table _pcre_utt[] = { |
const ucp_type_table PRIV(utt)[] = { |
| 424 |
{ 0, PT_ANY, 0 }, |
{ 0, PT_ANY, 0 }, |
| 425 |
{ 4, PT_SC, ucp_Arabic }, |
{ 4, PT_SC, ucp_Arabic }, |
| 426 |
{ 11, PT_SC, ucp_Armenian }, |
{ 11, PT_SC, ucp_Armenian }, |
| 561 |
{ 961, PT_PC, ucp_Zs } |
{ 961, PT_PC, ucp_Zs } |
| 562 |
}; |
}; |
| 563 |
|
|
| 564 |
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); |
const int PRIV(utt_size) = sizeof(PRIV(utt)) / sizeof(ucp_type_table); |
| 565 |
|
|
| 566 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF */ |
| 567 |
|
|
| 568 |
/* End of pcre_tables.c */ |
/* End of pcre_tables.c */ |