/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 227 by ph10, Tue Aug 21 15:00:15 2007 UTC revision 240 by ph10, Tue Sep 11 15:47:20 2007 UTC
# Line 43  supporting internal functions that are n Line 43  supporting internal functions that are n
43    
44    
45  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
46  #include <config.h>  #include "config.h"
47  #endif  #endif
48    
49  #define NLBLOCK cd             /* Block containing newline information */  #define NLBLOCK cd             /* Block containing newline information */
# Line 140  static const short int escapes[] = { Line 140  static const short int escapes[] = {
140  #endif  #endif
141    
142    
143  /* Table of special "verbs" like (*PRUNE) */  /* Table of special "verbs" like (*PRUNE). This is a short table, so it is
144    searched linearly. Put all the names into a single string, in order to reduce
145    the number of relocations when a shared library is dynamically linked. */
146    
147  typedef struct verbitem {  typedef struct verbitem {
   const char *name;  
148    int   len;    int   len;
149    int   op;    int   op;
150  } verbitem;  } verbitem;
151    
152    static const char verbnames[] =
153      "ACCEPT\0"
154      "COMMIT\0"
155      "F\0"
156      "FAIL\0"
157      "PRUNE\0"
158      "SKIP\0"
159      "THEN";
160    
161  static verbitem verbs[] = {  static verbitem verbs[] = {
162    { "ACCEPT", 6, OP_ACCEPT },    { 6, OP_ACCEPT },
163    { "COMMIT", 6, OP_COMMIT },    { 6, OP_COMMIT },
164    { "F",      1, OP_FAIL },    { 1, OP_FAIL },
165    { "FAIL",   4, OP_FAIL },    { 4, OP_FAIL },
166    { "PRUNE",  5, OP_PRUNE },    { 5, OP_PRUNE },
167    { "SKIP",   4, OP_SKIP  },    { 4, OP_SKIP  },
168    { "THEN",   4, OP_THEN  }    { 4, OP_THEN  }
169  };  };
170    
171  static int verbcount = sizeof(verbs)/sizeof(verbitem);  static int verbcount = sizeof(verbs)/sizeof(verbitem);
172    
173    
174  /* Tables of names of POSIX character classes and their lengths. The list is  /* Tables of names of POSIX character classes and their lengths. The names are
175  terminated by a zero length entry. The first three must be alpha, lower, upper,  now all in a single string, to reduce the number of relocations when a shared
176  as this is assumed for handling case independence. */  library is dynamically loaded. The list of lengths is terminated by a zero
177    length entry. The first three must be alpha, lower, upper, as this is assumed
178  static const char *const posix_names[] = {  for handling case independence. */
179    "alpha", "lower", "upper",  
180    "alnum", "ascii", "blank", "cntrl", "digit", "graph",  static const char posix_names[] =
181    "print", "punct", "space", "word",  "xdigit" };    "alpha\0"  "lower\0"  "upper\0"  "alnum\0"  "ascii\0"  "blank\0"
182      "cntrl\0"  "digit\0"  "graph\0"  "print\0"  "punct\0"  "space\0"
183      "word\0"   "xdigit";
184    
185  static const uschar posix_name_lengths[] = {  static const uschar posix_name_lengths[] = {
186    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
# Line 207  static const int posix_class_maps[] = { Line 219  static const int posix_class_maps[] = {
219  /* The texts of compile-time error messages. These are "char *" because they  /* The texts of compile-time error messages. These are "char *" because they
220  are passed to the outside world. Do not ever re-use any error number, because  are passed to the outside world. Do not ever re-use any error number, because
221  they are documented. Always add a new error instead. Messages marked DEAD below  they are documented. Always add a new error instead. Messages marked DEAD below
222  are no longer used. */  are no longer used. This used to be a table of strings, but in order to reduce
223    the number of relocations needed when a shared library is loaded dynamically,
224  static const char *error_texts[] = {  it is now one long string. We cannot use a table of offsets, because the
225    "no error",  lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
226    "\\ at end of pattern",  simply count through to the one we want - this isn't a performance issue
227    "\\c at end of pattern",  because these strings are used only when there is a compilation error. */
228    "unrecognized character follows \\",  
229    "numbers out of order in {} quantifier",  static const char error_texts[] =
230      "no error\0"
231      "\\ at end of pattern\0"
232      "\\c at end of pattern\0"
233      "unrecognized character follows \\\0"
234      "numbers out of order in {} quantifier\0"
235    /* 5 */    /* 5 */
236    "number too big in {} quantifier",    "number too big in {} quantifier\0"
237    "missing terminating ] for character class",    "missing terminating ] for character class\0"
238    "invalid escape sequence in character class",    "invalid escape sequence in character class\0"
239    "range out of order in character class",    "range out of order in character class\0"
240    "nothing to repeat",    "nothing to repeat\0"
241    /* 10 */    /* 10 */
242    "operand of unlimited repeat could match the empty string",  /** DEAD **/    "operand of unlimited repeat could match the empty string\0"  /** DEAD **/
243    "internal error: unexpected repeat",    "internal error: unexpected repeat\0"
244    "unrecognized character after (?",    "unrecognized character after (?\0"
245    "POSIX named classes are supported only within a class",    "POSIX named classes are supported only within a class\0"
246    "missing )",    "missing )\0"
247    /* 15 */    /* 15 */
248    "reference to non-existent subpattern",    "reference to non-existent subpattern\0"
249    "erroffset passed as NULL",    "erroffset passed as NULL\0"
250    "unknown option bit(s) set",    "unknown option bit(s) set\0"
251    "missing ) after comment",    "missing ) after comment\0"
252    "parentheses nested too deeply",  /** DEAD **/    "parentheses nested too deeply\0"  /** DEAD **/
253    /* 20 */    /* 20 */
254    "regular expression is too large",    "regular expression is too large\0"
255    "failed to get memory",    "failed to get memory\0"
256    "unmatched parentheses",    "unmatched parentheses\0"
257    "internal error: code overflow",    "internal error: code overflow\0"
258    "unrecognized character after (?<",    "unrecognized character after (?<\0"
259    /* 25 */    /* 25 */
260    "lookbehind assertion is not fixed length",    "lookbehind assertion is not fixed length\0"
261    "malformed number or name after (?(",    "malformed number or name after (?(\0"
262    "conditional group contains more than two branches",    "conditional group contains more than two branches\0"
263    "assertion expected after (?(",    "assertion expected after (?(\0"
264    "(?R or (?[+-]digits must be followed by )",    "(?R or (?[+-]digits must be followed by )\0"
265    /* 30 */    /* 30 */
266    "unknown POSIX class name",    "unknown POSIX class name\0"
267    "POSIX collating elements are not supported",    "POSIX collating elements are not supported\0"
268    "this version of PCRE is not compiled with PCRE_UTF8 support",    "this version of PCRE is not compiled with PCRE_UTF8 support\0"
269    "spare error",  /** DEAD **/    "spare error\0"  /** DEAD **/
270    "character value in \\x{...} sequence is too large",    "character value in \\x{...} sequence is too large\0"
271    /* 35 */    /* 35 */
272    "invalid condition (?(0)",    "invalid condition (?(0)\0"
273    "\\C not allowed in lookbehind assertion",    "\\C not allowed in lookbehind assertion\0"
274    "PCRE does not support \\L, \\l, \\N, \\U, or \\u",    "PCRE does not support \\L, \\l, \\N, \\U, or \\u\0"
275    "number after (?C is > 255",    "number after (?C is > 255\0"
276    "closing ) for (?C expected",    "closing ) for (?C expected\0"
277    /* 40 */    /* 40 */
278    "recursive call could loop indefinitely",    "recursive call could loop indefinitely\0"
279    "unrecognized character after (?P",    "unrecognized character after (?P\0"
280    "syntax error in subpattern name (missing terminator)",    "syntax error in subpattern name (missing terminator)\0"
281    "two named subpatterns have the same name",    "two named subpatterns have the same name\0"
282    "invalid UTF-8 string",    "invalid UTF-8 string\0"
283    /* 45 */    /* 45 */
284    "support for \\P, \\p, and \\X has not been compiled",    "support for \\P, \\p, and \\X has not been compiled\0"
285    "malformed \\P or \\p sequence",    "malformed \\P or \\p sequence\0"
286    "unknown property name after \\P or \\p",    "unknown property name after \\P or \\p\0"
287    "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)",    "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " characters)\0"
288    "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")",    "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0"
289    /* 50 */    /* 50 */
290    "repeated subpattern is too long",    /** DEAD **/    "repeated subpattern is too long\0"    /** DEAD **/
291    "octal value is greater than \\377 (not in UTF-8 mode)",    "octal value is greater than \\377 (not in UTF-8 mode)\0"
292    "internal error: overran compiling workspace",    "internal error: overran compiling workspace\0"
293    "internal error: previously-checked referenced subpattern not found",    "internal error: previously-checked referenced subpattern not found\0"
294    "DEFINE group contains more than one branch",    "DEFINE group contains more than one branch\0"
295    /* 55 */    /* 55 */
296    "repeating a DEFINE group is not allowed",    "repeating a DEFINE group is not allowed\0"
297    "inconsistent NEWLINE options",    "inconsistent NEWLINE options\0"
298    "\\g is not followed by a braced name or an optionally braced non-zero number",    "\\g is not followed by a braced name or an optionally braced non-zero number\0"
299    "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number",    "(?+ or (?- or (?(+ or (?(- must be followed by a non-zero number\0"
300    "(*VERB) with an argument is not supported",    "(*VERB) with an argument is not supported\0"
301    /* 60 */    /* 60 */
302    "(*VERB) not recognized",    "(*VERB) not recognized\0"
303    "number is too big"    "number is too big";
 };  
304    
305    
306  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 420  static BOOL Line 436  static BOOL
436    
437    
438  /*************************************************  /*************************************************
439    *            Find an error text                  *
440    *************************************************/
441    
442    /* The error texts are now all in one long string, to save on relocations. As
443    some of the text is of unknown length, we can't use a table of offsets.
444    Instead, just count through the strings. This is not a performance issue
445    because it happens only when there has been a compilation error.
446    
447    Argument:   the error number
448    Returns:    pointer to the error string
449    */
450    
451    static const char *
452    find_error_text(int n)
453    {
454    const char *s = error_texts;
455    for (; n > 0; n--) while (*s++ != 0);
456    return s;
457    }
458    
459    
460    /*************************************************
461  *            Handle escapes                      *  *            Handle escapes                      *
462  *************************************************/  *************************************************/
463    
# Line 776  top = _pcre_utt_size; Line 814  top = _pcre_utt_size;
814  while (bot < top)  while (bot < top)
815    {    {
816    i = (bot + top) >> 1;    i = (bot + top) >> 1;
817    c = strcmp(name, _pcre_utt[i].name);    c = strcmp(name, _pcre_utt_names + _pcre_utt[i].name_offset);
818    if (c == 0)    if (c == 0)
819      {      {
820      *dptr = _pcre_utt[i].value;      *dptr = _pcre_utt[i].value;
# Line 1733  Returns: a value representing the na Line 1771  Returns: a value representing the na
1771  static int  static int
1772  check_posix_name(const uschar *ptr, int len)  check_posix_name(const uschar *ptr, int len)
1773  {  {
1774    const char *pn = posix_names;
1775  register int yield = 0;  register int yield = 0;
1776  while (posix_name_lengths[yield] != 0)  while (posix_name_lengths[yield] != 0)
1777    {    {
1778    if (len == posix_name_lengths[yield] &&    if (len == posix_name_lengths[yield] &&
1779      strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;      strncmp((const char *)ptr, pn, len) == 0) return yield;
1780      pn += posix_name_lengths[yield] + 1;
1781    yield++;    yield++;
1782    }    }
1783  return -1;  return -1;
# Line 2976  for (;; ptr++) Line 3016  for (;; ptr++)
3016    
3017        oldptr = ptr;        oldptr = ptr;
3018    
3019          /* Remember \r or \n */
3020    
3021          if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF;
3022    
3023          /* Check for range */
3024    
3025        if (!inescq && ptr[1] == '-')        if (!inescq && ptr[1] == '-')
3026          {          {
3027          int d;          int d;
# Line 3043  for (;; ptr++) Line 3089  for (;; ptr++)
3089    
3090          if (d == c) goto LONE_SINGLE_CHARACTER;  /* A few lines below */          if (d == c) goto LONE_SINGLE_CHARACTER;  /* A few lines below */
3091    
3092            /* Remember \r or \n */
3093    
3094            if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF;
3095    
3096          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless          /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
3097          matching, we have to use an XCLASS with extra data items. Caseless          matching, we have to use an XCLASS with extra data items. Caseless
3098          matching for characters > 127 is available only if UCP support is          matching for characters > 127 is available only if UCP support is
# Line 3196  for (;; ptr++) Line 3246  for (;; ptr++)
3246        goto FAILED;        goto FAILED;
3247        }        }
3248    
3249    
3250    /* This code has been disabled because it would mean that \s counts as
3251    an explicit \r or \n reference, and that's not really what is wanted. Now
3252    we set the flag only if there is a literal "\r" or "\n" in the class. */
3253    
3254    #if 0
3255      /* Remember whether \r or \n are in this class */      /* Remember whether \r or \n are in this class */
3256    
3257      if (negate_class)      if (negate_class)
3258        {        {
3259        if ((classbits[1] & 0x24) != 0x24) cd->external_options |= PCRE_HASCRORLF;        if ((classbits[1] & 0x24) != 0x24) cd->external_flags |= PCRE_HASCRORLF;
3260        }        }
3261      else      else
3262        {        {
3263        if ((classbits[1] & 0x24) != 0) cd->external_options |= PCRE_HASCRORLF;        if ((classbits[1] & 0x24) != 0) cd->external_flags |= PCRE_HASCRORLF;
3264        }        }
3265    #endif
3266    
3267    
3268      /* If class_charcount is 1, we saw precisely one character whose value is      /* If class_charcount is 1, we saw precisely one character whose value is
3269      less than 256. As long as there were no characters >= 128 and there was no      less than 256. As long as there were no characters >= 128 and there was no
# Line 3498  for (;; ptr++) Line 3556  for (;; ptr++)
3556        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
3557        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
3558    
3559        if (repeat_max != 1) cd->nopartial = TRUE;        if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
3560    
3561        /* Combine the op_type with the repeat_type */        /* Combine the op_type with the repeat_type */
3562    
# Line 3648  for (;; ptr++) Line 3706  for (;; ptr++)
3706        /* All real repeats make it impossible to handle partial matching (maybe        /* All real repeats make it impossible to handle partial matching (maybe
3707        one day we will be able to remove this restriction). */        one day we will be able to remove this restriction). */
3708    
3709        if (repeat_max != 1) cd->nopartial = TRUE;        if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
3710    
3711        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
3712          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 4006  for (;; ptr++) Line 4064  for (;; ptr++)
4064      if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)      if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
4065        {        {
4066        int i, namelen;        int i, namelen;
4067          const char *vn = verbnames;
4068        const uschar *name = ++ptr;        const uschar *name = ++ptr;
4069        previous = NULL;        previous = NULL;
4070        while ((cd->ctypes[*++ptr] & ctype_letter) != 0);        while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
# Line 4023  for (;; ptr++) Line 4082  for (;; ptr++)
4082        for (i = 0; i < verbcount; i++)        for (i = 0; i < verbcount; i++)
4083          {          {
4084          if (namelen == verbs[i].len &&          if (namelen == verbs[i].len &&
4085              strncmp((char *)name, verbs[i].name, namelen) == 0)              strncmp((char *)name, vn, namelen) == 0)
4086            {            {
4087            *code = verbs[i].op;            *code = verbs[i].op;
4088            if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;            if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
4089            break;            break;
4090            }            }
4091            vn += verbs[i].len + 1;
4092          }          }
4093        if (i < verbcount) continue;        if (i < verbcount) continue;
4094        *errorcodeptr = ERR60;        *errorcodeptr = ERR60;
# Line 4645  for (;; ptr++) Line 4705  for (;; ptr++)
4705    
4706              case 'J':    /* Record that it changed in the external options */              case 'J':    /* Record that it changed in the external options */
4707              *optset |= PCRE_DUPNAMES;              *optset |= PCRE_DUPNAMES;
4708              cd->external_options |= PCRE_JCHANGED;              cd->external_flags |= PCRE_JCHANGED;
4709              break;              break;
4710    
4711              case 'i': *optset |= PCRE_CASELESS; break;              case 'i': *optset |= PCRE_CASELESS; break;
# Line 5065  for (;; ptr++) Line 5125  for (;; ptr++)
5125      /* Remember if \r or \n were seen */      /* Remember if \r or \n were seen */
5126    
5127      if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')      if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')
5128        cd->external_options |= PCRE_HASCRORLF;        cd->external_flags |= PCRE_HASCRORLF;
5129    
5130      /* Set the first and required bytes appropriately. If no previous first      /* Set the first and required bytes appropriately. If no previous first
5131      byte, set it from this character, but revert to none on a zero repeat.      byte, set it from this character, but revert to none on a zero repeat.
# Line 5745  cd->fcc = tables + fcc_offset; Line 5805  cd->fcc = tables + fcc_offset;
5805  cd->cbits = tables + cbits_offset;  cd->cbits = tables + cbits_offset;
5806  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
5807    
5808  /* Check for newline settings at the start of the pattern, and remember the  /* Check for global one-time settings at the start of the pattern, and remember
5809  offset for later. */  the offset for later. */
5810    
5811  if (ptr[0] == '(' && ptr[1] == '*')  while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*')
5812    {    {
5813    int newnl = 0;    int newnl = 0;
5814    if (strncmp((char *)(ptr+2), "CR)", 3) == 0)    int newbsr = 0;
5815      { skipatstart = 5; newnl = PCRE_NEWLINE_CR; }  
5816    else if (strncmp((char *)(ptr+2), "LF)", 3)  == 0)    if (strncmp((char *)(ptr+skipatstart+2), "CR)", 3) == 0)
5817      { skipatstart = 5; newnl = PCRE_NEWLINE_LF; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
5818    else if (strncmp((char *)(ptr+2), "CRLF)", 5)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), "LF)", 3)  == 0)
5819      { skipatstart = 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }      { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
5820    else if (strncmp((char *)(ptr+2), "ANY)", 4) == 0)    else if (strncmp((char *)(ptr+skipatstart+2), "CRLF)", 5)  == 0)
5821      { skipatstart = 6; newnl = PCRE_NEWLINE_ANY; }      { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
5822    else if (strncmp((char *)(ptr+2), "ANYCRLF)", 8)  == 0)    else if (strncmp((char *)(ptr+skipatstart+2), "ANY)", 4) == 0)
5823      { skipatstart = 10; newnl = PCRE_NEWLINE_ANYCRLF; }      { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
5824    if (skipatstart > 0)    else if (strncmp((char *)(ptr+skipatstart+2), "ANYCRLF)", 8)  == 0)
5825        { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
5826    
5827      else if (strncmp((char *)(ptr+skipatstart+2), "BSR_ANYCRLF)", 12) == 0)
5828        { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
5829      else if (strncmp((char *)(ptr+skipatstart+2), "BSR_UNICODE)", 12) == 0)
5830        { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
5831    
5832      if (newnl != 0)
5833      options = (options & ~PCRE_NEWLINE_BITS) | newnl;      options = (options & ~PCRE_NEWLINE_BITS) | newnl;
5834      else if (newbsr != 0)
5835        options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;
5836      else break;
5837      }
5838    
5839    /* Check validity of \R options. */
5840    
5841    switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
5842      {
5843      case 0:
5844      case PCRE_BSR_ANYCRLF:
5845      case PCRE_BSR_UNICODE:
5846      break;
5847      default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
5848    }    }
5849    
5850  /* Handle different types of newline. The three bits give seven cases. The  /* Handle different types of newline. The three bits give seven cases. The
# Line 5834  cd->hwm = cworkspace; Line 5916  cd->hwm = cworkspace;
5916  cd->start_pattern = (const uschar *)pattern;  cd->start_pattern = (const uschar *)pattern;
5917  cd->end_pattern = (const uschar *)(pattern + strlen(pattern));  cd->end_pattern = (const uschar *)(pattern + strlen(pattern));
5918  cd->req_varyopt = 0;  cd->req_varyopt = 0;
 cd->nopartial = FALSE;  
5919  cd->external_options = options;  cd->external_options = options;
5920    cd->external_flags = 0;
5921    
5922  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we
5923  don't need to look at the result of the function here. The initial options have  don't need to look at the result of the function here. The initial options have
# Line 5874  if (re == NULL) Line 5956  if (re == NULL)
5956    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN;
5957    }    }
5958    
5959  /* Put in the magic number, and save the sizes, initial options, and character  /* Put in the magic number, and save the sizes, initial options, internal
5960  table pointer. NULL is used for the default character tables. The nullpad field  flags, and character table pointer. NULL is used for the default character
5961  is at the end; it's there to help in the case when a regex compiled on a system  tables. The nullpad field is at the end; it's there to help in the case when a
5962  with 4-byte pointers is run on another with 8-byte pointers. */  regex compiled on a system with 4-byte pointers is run on another with 8-byte
5963    pointers. */
5964    
5965  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
5966  re->size = size;  re->size = size;
5967  re->options = cd->external_options;  re->options = cd->external_options;
5968    re->flags = cd->external_flags;
5969  re->dummy1 = 0;  re->dummy1 = 0;
5970  re->first_byte = 0;  re->first_byte = 0;
5971  re->req_byte = 0;  re->req_byte = 0;
# Line 5906  codestart = cd->name_table + re->name_en Line 5990  codestart = cd->name_table + re->name_en
5990  cd->start_code = codestart;  cd->start_code = codestart;
5991  cd->hwm = cworkspace;  cd->hwm = cworkspace;
5992  cd->req_varyopt = 0;  cd->req_varyopt = 0;
 cd->nopartial = FALSE;  
5993  cd->had_accept = FALSE;  cd->had_accept = FALSE;
5994    
5995  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
# Line 5920  code = (uschar *)codestart; Line 6003  code = (uschar *)codestart;
6003    &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);    &errorcode, FALSE, FALSE, 0, &firstbyte, &reqbyte, NULL, cd, NULL);
6004  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
6005  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
6006    re->flags = cd->external_flags;
6007    
 if (cd->nopartial) re->options |= PCRE_NOPARTIAL;  
6008  if (cd->had_accept) reqbyte = -1;   /* Must disable after (*ACCEPT) */  if (cd->had_accept) reqbyte = -1;   /* Must disable after (*ACCEPT) */
6009    
6010  /* If not reached end of pattern on success, there's an excess bracket. */  /* If not reached end of pattern on success, there's an excess bracket. */
# Line 5964  if (errorcode != 0) Line 6047  if (errorcode != 0)
6047    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
6048    *erroroffset = ptr - (const uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
6049    PCRE_EARLY_ERROR_RETURN2:    PCRE_EARLY_ERROR_RETURN2:
6050    *errorptr = error_texts[errorcode];    *errorptr = find_error_text(errorcode);
6051    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
6052    return NULL;    return NULL;
6053    }    }
# Line 5993  if ((re->options & PCRE_ANCHORED) == 0) Line 6076  if ((re->options & PCRE_ANCHORED) == 0)
6076        int ch = firstbyte & 255;        int ch = firstbyte & 255;
6077        re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&        re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&
6078           cd->fcc[ch] == ch)? ch : firstbyte;           cd->fcc[ch] == ch)? ch : firstbyte;
6079        re->options |= PCRE_FIRSTSET;        re->flags |= PCRE_FIRSTSET;
6080        }        }
6081      else if (is_startline(codestart, 0, cd->backref_map))      else if (is_startline(codestart, 0, cd->backref_map))
6082        re->options |= PCRE_STARTLINE;        re->flags |= PCRE_STARTLINE;
6083      }      }
6084    }    }
6085    
# Line 6010  if (reqbyte >= 0 && Line 6093  if (reqbyte >= 0 &&
6093    int ch = reqbyte & 255;    int ch = reqbyte & 255;
6094    re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&    re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&
6095      cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;      cd->fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;
6096    re->options |= PCRE_REQCHSET;    re->flags |= PCRE_REQCHSET;
6097    }    }
6098    
6099  /* Print out the compiled data if debugging is enabled. This is never the  /* Print out the compiled data if debugging is enabled. This is never the
# Line 6023  printf("Length = %d top_bracket = %d top Line 6106  printf("Length = %d top_bracket = %d top
6106    
6107  printf("Options=%08x\n", re->options);  printf("Options=%08x\n", re->options);
6108    
6109  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->flags & PCRE_FIRSTSET) != 0)
6110    {    {
6111    int ch = re->first_byte & 255;    int ch = re->first_byte & 255;
6112    const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?    const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)?
# Line 6032  if ((re->options & PCRE_FIRSTSET) != 0) Line 6115  if ((re->options & PCRE_FIRSTSET) != 0)
6115      else printf("First char = \\x%02x%s\n", ch, caseless);      else printf("First char = \\x%02x%s\n", ch, caseless);
6116    }    }
6117    
6118  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
6119    {    {
6120    int ch = re->req_byte & 255;    int ch = re->req_byte & 255;
6121    const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?    const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)?
# Line 6049  was compiled can be seen. */ Line 6132  was compiled can be seen. */
6132  if (code - codestart > length)  if (code - codestart > length)
6133    {    {
6134    (pcre_free)(re);    (pcre_free)(re);
6135    *errorptr = error_texts[ERR23];    *errorptr = find_error_text(ERR23);
6136    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (uschar *)pattern;
6137    if (errorcodeptr != NULL) *errorcodeptr = ERR23;    if (errorcodeptr != NULL) *errorcodeptr = ERR23;
6138    return NULL;    return NULL;

Legend:
Removed from v.227  
changed lines
  Added in v.240

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12