/[pcre]/code/tags/pcre-3.0/pcre.c
ViewVC logotype

Diff of /code/tags/pcre-3.0/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 5 by nigel, Sat Feb 24 21:38:05 2007 UTC revision 15 by nigel, Sat Feb 24 21:38:25 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1998 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 33  restrictions: Line 33  restrictions:
33    
34  /* #define DEBUG */  /* #define DEBUG */
35    
36    /* Use a macro for debugging printing, 'cause that eliminates the the use
37    of #ifdef inline, and there are *still* stupid compilers about that don't like
38    indented pre-processor statements. I suppose it's only been 10 years... */
39    
40    #ifdef DEBUG
41    #define DPRINTF(p) printf p
42    #else
43    #define DPRINTF(p) /*nothing*/
44    #endif
45    
46  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
47  the external pcre header. */  the external pcre header. */
# Line 40  the external pcre header. */ Line 49  the external pcre header. */
49  #include "internal.h"  #include "internal.h"
50    
51    
52    /* Allow compilation as C++ source code, should anybody want to do that. */
53    
54    #ifdef __cplusplus
55    #define class pcre_class
56    #endif
57    
58    
59  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
60    
61  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
62  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
63    
64  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
65    
66  #ifdef DEBUG  #ifdef DEBUG
67  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
68      "End", "\\A", "\\B", "\\b", "\\D", "\\d",
69    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",
70    "not",    "not",
71    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
72    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
73    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
74    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
75    "class", "Ref",    "class", "negclass", "Ref",
76    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
77    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
78  };  };
# Line 66  are simple data values; negative values Line 83  are simple data values; negative values
83  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
84  is invalid. */  is invalid. */
85    
86  static short int escapes[] = {  static const short int escapes[] = {
87      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
89    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 81  static short int escapes[] = { Line 98  static short int escapes[] = {
98    
99  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
100    
101  static BOOL compile_regex(int, int *,uschar **,uschar **,char **);  static BOOL
102      compile_regex(int, int *, uschar **, const uschar **, const char **);
103    
104  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
105  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 98  typedef struct match_data { Line 116  typedef struct match_data {
116    BOOL   noteol;                /* NOTEOL flag */    BOOL   noteol;                /* NOTEOL flag */
117    BOOL   dotall;                /* Dot matches any char */    BOOL   dotall;                /* Dot matches any char */
118    BOOL   endonly;               /* Dollar not before final \n */    BOOL   endonly;               /* Dollar not before final \n */
119    uschar *start_subject;        /* Start of the subject string */    const uschar *start_subject;  /* Start of the subject string */
120    uschar *end_subject;          /* End of the subject string */    const uschar *end_subject;    /* End of the subject string */
121    jmp_buf fail_env;             /* Environment for longjump() break out */    jmp_buf fail_env;             /* Environment for longjump() break out */
122    uschar *end_match_ptr;        /* Subject position at end match */    const uschar *end_match_ptr;  /* Subject position at end match */
123    int     end_offset_top;       /* Highwater mark at end of match */    int     end_offset_top;       /* Highwater mark at end of match */
124  } match_data;  } match_data;
125    
# Line 126  void (*pcre_free)(void *) = free; Line 144  void (*pcre_free)(void *) = free;
144  *          Return version string                 *  *          Return version string                 *
145  *************************************************/  *************************************************/
146    
147  char *  const char *
148  pcre_version(void)  pcre_version(void)
149  {  {
150  return PCRE_VERSION;  return PCRE_VERSION;
# Line 156  Returns: number of identifying ex Line 174  Returns: number of identifying ex
174  int  int
175  pcre_info(const pcre *external_re, int *optptr, int *first_char)  pcre_info(const pcre *external_re, int *optptr, int *first_char)
176  {  {
177  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
178  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
179  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
180  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
# Line 186  Arguments: Line 204  Arguments:
204  Returns:     nothing  Returns:     nothing
205  */  */
206    
207  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
208    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
209  {  {
210  int c;  int c;
211  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 295  do { Line 314  do {
314        /* Check a class or a back reference for a zero minimum */        /* Check a class or a back reference for a zero minimum */
315    
316        case OP_CLASS:        case OP_CLASS:
317          case OP_NEGCLASS:
318        case OP_REF:        case OP_REF:
319        cc += (*cc == OP_REF)? 2 : 33;        cc += (*cc == OP_REF)? 2 : 33;
320    
# Line 360  Returns: zero or positive => a data Line 380  Returns: zero or positive => a data
380  */  */
381    
382  static int  static int
383  check_escape(uschar **ptrptr, char **errorptr, int bracount, int options,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
384    BOOL isclass)    int options, BOOL isclass)
385  {  {
386  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
387  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
388  int i;  int i;
389    
# Line 382  else if ((i = escapes[c - '0']) != 0) c Line 402  else if ((i = escapes[c - '0']) != 0) c
402    
403  else  else
404    {    {
405    uschar *oldptr;    const uschar *oldptr;
406    switch (c)    switch (c)
407      {      {
408      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 502  Returns: TRUE or FALSE Line 522  Returns: TRUE or FALSE
522  */  */
523    
524  static BOOL  static BOOL
525  is_counted_repeat(uschar *p)  is_counted_repeat(const uschar *p)
526  {  {
527  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;
528  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;
# Line 537  Returns: pointer to '}' on success; Line 557  Returns: pointer to '}' on success;
557               current ptr on error, with errorptr set               current ptr on error, with errorptr set
558  */  */
559    
560  static uschar *  static const uschar *
561  read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)
562  {  {
563  int min = 0;  int min = 0;
564  int max = -1;  int max = -1;
# Line 592  Returns: TRUE on success Line 612  Returns: TRUE on success
612  */  */
613    
614  static BOOL  static BOOL
615  compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_branch(int options, int *brackets, uschar **codeptr,
616    char **errorptr)    const uschar **ptrptr, const char **errorptr)
617  {  {
618  int repeat_type, op_type;  int repeat_type, op_type;
619  int repeat_min, repeat_max;  int repeat_min, repeat_max;
620  int bravalue, length;  int bravalue, length;
621  register int c;  register int c;
622  register uschar *code = *codeptr;  register uschar *code = *codeptr;
623  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
624    const uschar *oldptr;
625  uschar *previous = NULL;  uschar *previous = NULL;
 uschar *oldptr;  
626  uschar class[32];  uschar class[32];
627    
628  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
# Line 659  for (;; ptr++) Line 679  for (;; ptr++)
679    
680      case '[':      case '[':
681      previous = code;      previous = code;
     *code++ = OP_CLASS;  
682    
683      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag, and use a
684        different opcode. This only matters if caseless matching is specified at
685        runtime. */
686    
687      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
688        {        {
689        negate_class = TRUE;        negate_class = TRUE;
690          *code++ = OP_NEGCLASS;
691        c = *(++ptr);        c = *(++ptr);
692        }        }
693      else negate_class = FALSE;      else
694          {
695          negate_class = FALSE;
696          *code++ = OP_CLASS;
697          }
698    
699      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
700      character. */      character. */
# Line 697  for (;; ptr++) Line 723  for (;; ptr++)
723        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
724        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
725        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
726        Inside a class (and only there) it is treated as backslash. Elsewhere        Inside a class (and only there) it is treated as backspace. Elsewhere
727        it marks a word boundary. Other escapes have preset maps ready to        it marks a word boundary. Other escapes have preset maps ready to
728        or into the one we are building. We assume they have more than one        or into the one we are building. We assume they have more than one
729        character in them, so set class_count bigger than one. */        character in them, so set class_count bigger than one. */
# Line 976  for (;; ptr++) Line 1002  for (;; ptr++)
1002            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1003            }            }
1004    
1005          /* Insert an UPTO if the max is greater than the min. */          /* If the maximum is unlimited, insert an OP_STAR. */
1006    
1007            if (repeat_max < 0)
1008              {
1009              *code++ = c;
1010              *code++ = OP_STAR + repeat_type;
1011              }
1012    
1013            /* Else insert an UPTO if the max is greater than the min. */
1014    
1015          if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
1016            {            {
1017            *code++ = c;            *code++ = c;
1018            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 996  for (;; ptr++) Line 1030  for (;; ptr++)
1030      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1031      stuff after it. */      stuff after it. */
1032    
1033      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1034                 *previous == OP_REF)
1035        {        {
1036        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1037          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1022  for (;; ptr++) Line 1057  for (;; ptr++)
1057      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA)
1058        {        {
1059        int i;        int i;
1060        int length = code - previous;        int len = code - previous;
1061    
1062        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_max == -1 && could_be_empty(previous))
1063          {          {
# Line 1039  for (;; ptr++) Line 1074  for (;; ptr++)
1074          {          {
1075          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1076            {            {
1077            memcpy(code, previous, length);            memcpy(code, previous, len);
1078            code += length;            code += len;
1079            }            }
1080          }          }
1081    
# Line 1052  for (;; ptr++) Line 1087  for (;; ptr++)
1087          {          {
1088          if (repeat_min == 0)          if (repeat_min == 0)
1089            {            {
1090            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1091            code++;            code++;
1092            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1093            }            }
1094    
1095          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1096            {            {
1097            memcpy(code, previous, length);            memcpy(code, previous, len);
1098            code += length;            code += len;
1099            }            }
1100    
1101          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1102            {            {
1103            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1104            memcpy(code, previous, length);            memcpy(code, previous, len);
1105            code += length;            code += len;
1106            }            }
1107          }          }
1108    
# Line 1214  for (;; ptr++) Line 1249  for (;; ptr++)
1249        continue;        continue;
1250        }        }
1251    
1252      /* Reset and fall through */      /* Data character: reset and fall through */
1253    
1254      ptr = oldptr;      ptr = oldptr;
1255      c = '\\';      c = '\\';
# Line 1268  for (;; ptr++) Line 1303  for (;; ptr++)
1303      the next state. */      the next state. */
1304    
1305      previous[1] = length;      previous[1] = length;
1306      ptr--;      if (length < 255) ptr--;
1307      break;      break;
1308      }      }
1309    }                   /* end of big loop */    }                   /* end of big loop */
# Line 1305  Returns: TRUE on success Line 1340  Returns: TRUE on success
1340  */  */
1341    
1342  static BOOL  static BOOL
1343  compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_regex(int options, int *brackets, uschar **codeptr,
1344    char **errorptr)    const uschar **ptrptr, const char **errorptr)
1345  {  {
1346  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1347  uschar *code = *codeptr;  uschar *code = *codeptr;
1348  uschar *start_bracket = code;  uschar *start_bracket = code;
1349    
# Line 1374  Returns: TRUE or FALSE Line 1409  Returns: TRUE or FALSE
1409  */  */
1410    
1411  static BOOL  static BOOL
1412  is_anchored(register uschar *code, BOOL multiline)  is_anchored(register const uschar *code, BOOL multiline)
1413  {  {
1414  do {  do {
1415     int op = (int)code[3];     int op = (int)code[3];
# Line 1403  Returns: TRUE or FALSE Line 1438  Returns: TRUE or FALSE
1438  */  */
1439    
1440  static BOOL  static BOOL
1441  is_startline(uschar *code)  is_startline(const uschar *code)
1442  {  {
1443  do {  do {
1444     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)
# Line 1488  Returns: pointer to compiled data Line 1523  Returns: pointer to compiled data
1523  */  */
1524    
1525  pcre *  pcre *
1526  pcre_compile(const char *pattern, int options, char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1527    int *erroroffset)    int *erroroffset)
1528  {  {
1529  real_pcre *re;  real_pcre *re;
# Line 1498  int runlength; Line 1533  int runlength;
1533  int c, size;  int c, size;
1534  int bracount = 0;  int bracount = 0;
1535  int brastack[200];  int brastack[200];
 int brastackptr = 0;  
1536  int top_backref = 0;  int top_backref = 0;
1537  uschar *code, *ptr;  unsigned int brastackptr = 0;
1538    uschar *code;
1539    const uschar *ptr;
1540    
1541  #ifdef DEBUG  #ifdef DEBUG
1542  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1527  if ((options & ~PUBLIC_OPTIONS) != 0) Line 1563  if ((options & ~PUBLIC_OPTIONS) != 0)
1563    return NULL;    return NULL;
1564    }    }
1565    
1566  #ifdef DEBUG  DPRINTF(("------------------------------------------------------------------\n"));
1567  printf("------------------------------------------------------------------\n");  DPRINTF(("%s\n", pattern));
 printf("%s\n", pattern);  
 #endif  
1568    
1569  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
1570  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1539  internal flag settings. Make an attempt Line 1573  internal flag settings. Make an attempt
1573  if an "extended" flag setting appears late in the pattern. We can't be so  if an "extended" flag setting appears late in the pattern. We can't be so
1574  clever for #-comments. */  clever for #-comments. */
1575    
1576  ptr = (uschar *)(pattern - 1);  ptr = (const uschar *)(pattern - 1);
1577  while ((c = *(++ptr)) != 0)  while ((c = *(++ptr)) != 0)
1578    {    {
1579    int min, max;    int min, max;
# Line 1566  while ((c = *(++ptr)) != 0) Line 1600  while ((c = *(++ptr)) != 0)
1600    
1601      case '\\':      case '\\':
1602        {        {
1603        uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
1604        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1605        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1606        if (c >= 0)        if (c >= 0)
# Line 1645  while ((c = *(++ptr)) != 0) Line 1679  while ((c = *(++ptr)) != 0)
1679        {        {
1680        if (*ptr == '\\')        if (*ptr == '\\')
1681          {          {
1682          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
1683          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1684          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
1685          }          }
1686        else class_charcount++;        else class_charcount++;
1687        ptr++;        ptr++;
# Line 1662  while ((c = *(++ptr)) != 0) Line 1696  while ((c = *(++ptr)) != 0)
1696    
1697        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
1698    
1699        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
1700          {          {
1701          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
1702          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
# Line 1770  while ((c = *(++ptr)) != 0) Line 1804  while ((c = *(++ptr)) != 0)
1804      continue;      continue;
1805    
1806      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
1807      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
1808        0 this is an unmatched bracket which will generate an error, but take care
1809        not to try to access brastack[-1]. */
1810    
1811      case ')':      case ')':
1812      length += 3;      length += 3;
1813        {        {
1814        int min = 1;        int minval = 1;
1815        int max = 1;        int maxval = 1;
1816        int duplength = length - brastack[--brastackptr];        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
1817    
1818        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
1819        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
1820    
1821        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
1822          {          {
1823          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
1824          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1825          }          }
1826        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
1827        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
1828        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
1829    
1830        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If there is a minimum > 1 we have to replicate up to minval-1 times;
1831        there is a limited maximum we have to replicate up to max-1 times and        if there is a limited maximum we have to replicate up to maxval-1 times
1832        allow for a BRAZERO item before each optional copy, as we also have to        and allow for a BRAZERO item before each optional copy, as we also have
1833        do before the first copy if the minimum is zero. */        to do before the first copy if the minimum is zero. */
1834    
1835        if (min == 0) length++;        if (minval == 0) length++;
1836          else if (min > 1) length += (min - 1) * duplength;          else if (minval > 1) length += (minval - 1) * duplength;
1837        if (max > min) length += (max - min) * (duplength + 1);        if (maxval > minval) length += (maxval - minval) * (duplength + 1);
1838        }        }
   
1839      continue;      continue;
1840    
1841      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1831  while ((c = *(++ptr)) != 0) Line 1866  while ((c = *(++ptr)) != 0)
1866    
1867        if (c == '\\')        if (c == '\\')
1868          {          {
1869          uschar *saveptr = ptr;          const uschar *saveptr = ptr;
1870          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE);
1871          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
1872          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
# Line 1861  if (length > 65539) Line 1896  if (length > 65539)
1896    }    }
1897    
1898  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
1899  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
1900    rather than just "code", because it has been reported that one broken compiler
1901    fails on "code" because it is also an independent variable. It should make no
1902    difference to the value of the offsetof(). */
1903    
1904  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
1905  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
1906    
1907  if (re == NULL)  if (re == NULL)
# Line 1872  if (re == NULL) Line 1910  if (re == NULL)
1910    return NULL;    return NULL;
1911    }    }
1912    
1913    /* Put in the magic number and the options. */
1914    
1915  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
1916  re->options = options;  re->options = options;
1917    
# Line 1879  re->options = options; Line 1919  re->options = options;
1919  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
1920  of the function here. */  of the function here. */
1921    
1922  ptr = (uschar *)pattern;  ptr = (const uschar *)pattern;
1923  code = re->code;  code = re->code;
1924  *code = OP_BRA;  *code = OP_BRA;
1925  bracount = 0;  bracount = 0;
# Line 1906  if (*errorptr != NULL) Line 1946  if (*errorptr != NULL)
1946    {    {
1947    (pcre_free)(re);    (pcre_free)(re);
1948    PCRE_ERROR_RETURN:    PCRE_ERROR_RETURN:
1949    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
1950    return NULL;    return NULL;
1951    }    }
1952    
# Line 1922  if ((options & PCRE_ANCHORED) == 0) Line 1962  if ((options & PCRE_ANCHORED) == 0)
1962      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
1963    else    else
1964      {      {
1965      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code);
1966      if (c >= 0)      if (ch >= 0)
1967        {        {
1968        re->first_char = c;        re->first_char = ch;
1969        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
1970        }        }
1971      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 2017  while (code < code_end) Line 2057  while (code < code_end)
2057      case OP_MINUPTO:      case OP_MINUPTO:
2058      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2059        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2060      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2061      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2062      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2063      code += 3;      code += 3;
# Line 2062  while (code < code_end) Line 2102  while (code < code_end)
2102    
2103      case OP_REF:      case OP_REF:
2104      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2105      break;      code ++;
2106        goto CLASS_REF_REPEAT;
2107    
2108      case OP_CLASS:      case OP_CLASS:
2109        case OP_NEGCLASS:
2110        {        {
2111        int i, min, max;        int i, min, max;
2112    
2113        code++;        if (*code++ == OP_CLASS) printf("    [");
2114        printf("    [");          else printf("   ^[");
2115    
2116        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2117          {          {
# Line 2092  while (code < code_end) Line 2134  while (code < code_end)
2134        printf("]");        printf("]");
2135        code += 32;        code += 32;
2136    
2137          CLASS_REF_REPEAT:
2138    
2139        switch(*code)        switch(*code)
2140          {          {
2141          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2204  Returns: TRUE if matched Line 2248  Returns: TRUE if matched
2248  */  */
2249    
2250  static BOOL  static BOOL
2251  match_ref(int number, register uschar *eptr, int length, match_data *md)  match_ref(int number, register const uschar *eptr, int length, match_data *md)
2252  {  {
2253  uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[number];
2254    
2255  #ifdef DEBUG  #ifdef DEBUG
2256  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2253  Returns: TRUE if matched Line 2297  Returns: TRUE if matched
2297  */  */
2298    
2299  static BOOL  static BOOL
2300  match(register uschar *eptr, register uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode, int offset_top,
2301    match_data *md)    match_data *md)
2302  {  {
2303  for (;;)  for (;;)
# Line 2261  for (;;) Line 2305  for (;;)
2305    int min, max, ctype;    int min, max, ctype;
2306    register int i;    register int i;
2307    register int c;    register int c;
2308    BOOL minimize;    BOOL minimize = FALSE;
2309    
2310    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening bracket. Check the alternative branches in turn, failing if none
2311    match. We have to set the start offset if required and there is space    match. We have to set the start offset if required and there is space
# Line 2274  for (;;) Line 2318  for (;;)
2318    if ((int)*ecode >= OP_BRA)    if ((int)*ecode >= OP_BRA)
2319      {      {
2320      int number = (*ecode - OP_BRA) << 1;      int number = (*ecode - OP_BRA) << 1;
2321      int save_offset1, save_offset2;      int save_offset1 = 0, save_offset2 = 0;
2322    
2323      #ifdef DEBUG      DPRINTF(("start bracket %d\n", number/2));
     printf("start bracket %d\n", number/2);  
     #endif  
2324    
2325      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2326        {        {
# Line 2286  for (;;) Line 2328  for (;;)
2328        save_offset2 = md->offset_vector[number+1];        save_offset2 = md->offset_vector[number+1];
2329        md->offset_vector[number] = eptr - md->start_subject;        md->offset_vector[number] = eptr - md->start_subject;
2330    
2331        #ifdef DEBUG        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
       printf("saving %d %d\n", save_offset1, save_offset2);  
       #endif  
2332        }        }
2333    
2334      /* Recurse for all the alternatives. */      /* Recurse for all the alternatives. */
# Line 2300  for (;;) Line 2340  for (;;)
2340        }        }
2341      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2342    
2343      #ifdef DEBUG      DPRINTF(("bracket %d failed\n", number/2));
     printf("bracket %d failed\n", number/2);  
     #endif  
2344    
2345      if (number > 0 && number < md->offset_end)      if (number > 0 && number < md->offset_end)
2346        {        {
# Line 2401  for (;;) Line 2439  for (;;)
2439    
2440      case OP_BRAZERO:      case OP_BRAZERO:
2441        {        {
2442        uschar *next = ecode+1;        const uschar *next = ecode+1;
2443        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md)) return TRUE;
2444        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2445        ecode = next + 3;        ecode = next + 3;
# Line 2410  for (;;) Line 2448  for (;;)
2448    
2449      case OP_BRAMINZERO:      case OP_BRAMINZERO:
2450        {        {
2451        uschar *next = ecode+1;        const uschar *next = ecode+1;
2452        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
2453        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md)) return TRUE;
2454        ecode++;        ecode++;
# Line 2426  for (;;) Line 2464  for (;;)
2464      case OP_KETRMAX:      case OP_KETRMAX:
2465        {        {
2466        int number;        int number;
2467        uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
2468    
2469        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)
2470          {          {
# Line 2441  for (;;) Line 2479  for (;;)
2479    
2480        number = (*prev - OP_BRA) << 1;        number = (*prev - OP_BRA) << 1;
2481    
2482        #ifdef DEBUG        DPRINTF(("end bracket %d\n", number/2));
       printf("end bracket %d\n", number/2);  
       #endif  
2483    
2484        if (number > 0)        if (number > 0)
2485          {          {
# Line 2675  for (;;) Line 2711  for (;;)
2711    
2712        else        else
2713          {          {
2714          uschar *pp = eptr;          const uschar *pp = eptr;
2715          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2716            {            {
2717            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(number, eptr, length, md)) break;
# Line 2695  for (;;) Line 2731  for (;;)
2731      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
2732      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. If caseless
2733      matching was set at runtime but not at compile time, we have to check both      matching was set at runtime but not at compile time, we have to check both
2734      versions of a character. */      versions of a character, and we have to behave differently for positive and
2735        negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2736        treated differently. */
2737    
2738      case OP_CLASS:      case OP_CLASS:
2739        case OP_NEGCLASS:
2740        {        {
2741        uschar *data = ecode + 1;  /* Save for matching */        BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2742        ecode += 33;               /* Advance past the item */        const uschar *data = ecode + 1;  /* Save for matching */
2743          ecode += 33;                     /* Advance past the item */
2744    
2745        switch (*ecode)        switch (*ecode)
2746          {          {
# Line 2727  for (;;) Line 2767  for (;;)
2767          break;          break;
2768    
2769          default:               /* No repeat follows */          default:               /* No repeat follows */
2770          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
2771          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
2772          }          }
2773    
2774        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2744  for (;;) Line 2777  for (;;)
2777          {          {
2778          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
2779          c = *eptr++;          c = *eptr++;
2780          if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2781          if (md->runtime_caseless)          /* Either not runtime caseless, or it was a positive class. For
2782            runtime caseless, continue if either case is in the map. */
2783    
2784            if (!nasty_case)
2785              {
2786              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2787              if (md->runtime_caseless)
2788                {
2789                c = pcre_fcc[c];
2790                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2791                }
2792              }
2793    
2794            /* Runtime caseless and it was a negative class. Continue only if
2795            both cases are in the map. */
2796    
2797            else
2798            {            {
2799              if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2800            c = pcre_fcc[c];            c = pcre_fcc[c];
2801            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
2802            }            }
2803    
2804          return FALSE;          return FALSE;
2805          }          }
2806    
# Line 2768  for (;;) Line 2819  for (;;)
2819            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md)) return TRUE;
2820            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
2821            c = *eptr++;            c = *eptr++;
2822            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2823            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2824              runtime caseless, continue if either case is in the map. */
2825    
2826              if (!nasty_case)
2827              {              {
2828                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2829                if (md->runtime_caseless)
2830                  {
2831                  c = pcre_fcc[c];
2832                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2833                  }
2834                }
2835    
2836              /* Runtime caseless and it was a negative class. Continue only if
2837              both cases are in the map. */
2838    
2839              else
2840                {
2841                if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2842              c = pcre_fcc[c];              c = pcre_fcc[c];
2843              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2844              }              }
2845    
2846            return FALSE;            return FALSE;
2847            }            }
2848          /* Control never gets here */          /* Control never gets here */
# Line 2783  for (;;) Line 2852  for (;;)
2852    
2853        else        else
2854          {          {
2855          uschar *pp = eptr;          const uschar *pp = eptr;
2856          for (i = min; i < max; eptr++, i++)          for (i = min; i < max; eptr++, i++)
2857            {            {
2858            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2859            c = *eptr;            c = *eptr;
2860            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2861            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2862              runtime caseless, continue if either case is in the map. */
2863    
2864              if (!nasty_case)
2865                {
2866                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2867                if (md->runtime_caseless)
2868                  {
2869                  c = pcre_fcc[c];
2870                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2871                  }
2872                }
2873    
2874              /* Runtime caseless and it was a negative class. Continue only if
2875              both cases are in the map. */
2876    
2877              else
2878              {              {
2879                if ((data[c/8] & (1 << (c&7))) == 0) break;
2880              c = pcre_fcc[c];              c = pcre_fcc[c];
2881              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2882              }              }
2883    
2884            break;            break;
2885            }            }
2886    
# Line 2811  for (;;) Line 2898  for (;;)
2898        register int length = ecode[1];        register int length = ecode[1];
2899        ecode += 2;        ecode += 2;
2900    
2901        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
2902        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
2903          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
2904        else        else
# Line 2822  for (;;) Line 2909  for (;;)
2909          }          }
2910        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
2911        printf("\n");        printf("\n");
2912        #endif  #endif
2913    
2914        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
2915        if (md->caseless)        if (md->caseless)
# Line 2879  for (;;) Line 2966  for (;;)
2966      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
2967      characters and work backwards. */      characters and work backwards. */
2968    
2969      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
2970      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
2971    
2972      if (md->caseless)      if (md->caseless)
2973        {        {
# Line 2901  for (;;) Line 2986  for (;;)
2986          }          }
2987        else        else
2988          {          {
2989          uschar *pp = eptr;          const uschar *pp = eptr;
2990          for (i = min; i < max; i++)          for (i = min; i < max; i++)
2991            {            {
2992            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;
# Line 2931  for (;;) Line 3016  for (;;)
3016          }          }
3017        else        else
3018          {          {
3019          uschar *pp = eptr;          const uschar *pp = eptr;
3020          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3021            {            {
3022            if (eptr >= md->end_subject || c != *eptr) break;            if (eptr >= md->end_subject || c != *eptr) break;
# Line 2947  for (;;) Line 3032  for (;;)
3032      /* Match a negated single character */      /* Match a negated single character */
3033    
3034      case OP_NOT:      case OP_NOT:
3035      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3036      ecode++;      ecode++;
3037      if (md->caseless)      if (md->caseless)
3038        {        {
# Line 3006  for (;;) Line 3091  for (;;)
3091      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3092      characters and work backwards. */      characters and work backwards. */
3093    
3094      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3095      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3096    
3097      if (md->caseless)      if (md->caseless)
3098        {        {
# Line 3028  for (;;) Line 3111  for (;;)
3111          }          }
3112        else        else
3113          {          {
3114          uschar *pp = eptr;          const uschar *pp = eptr;
3115          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3116            {            {
3117            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;
# Line 3058  for (;;) Line 3141  for (;;)
3141          }          }
3142        else        else
3143          {          {
3144          uschar *pp = eptr;          const uschar *pp = eptr;
3145          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3146            {            {
3147            if (eptr >= md->end_subject || c == *eptr) break;            if (eptr >= md->end_subject || c == *eptr) break;
# Line 3175  for (;;) Line 3258  for (;;)
3258    
3259      else      else
3260        {        {
3261        uschar *pp = eptr;        const uschar *pp = eptr;
3262        switch(ctype)        switch(ctype)
3263          {          {
3264          case OP_ANY:          case OP_ANY:
# Line 3259  for (;;) Line 3342  for (;;)
3342      /* There's been some horrible disaster. */      /* There's been some horrible disaster. */
3343    
3344      default:      default:
3345      #ifdef DEBUG      DPRINTF(("Unknown opcode %d\n", *ecode));
     printf("Unknown opcode %d\n", *ecode);  
     #endif  
3346      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;      md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
3347      return FALSE;      return FALSE;
3348      }      }
# Line 3277  for (;;) Line 3358  for (;;)
3358    
3359    
3360  /*************************************************  /*************************************************
3361    *         Segregate setjmp()                     *
3362    *************************************************/
3363    
3364    /* The -Wall option of gcc gives warnings for all local variables when setjmp()
3365    is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
3366    hide it in a separate function. This is called only when PCRE_EXTRA is set,
3367    since it's needed only for the extension \X option, and with any luck, a good
3368    compiler will spot the tail recursion and compile it efficiently.
3369    
3370    Arguments:
3371       eptr        pointer in subject
3372       ecode       position in code
3373       offset_top  current top pointer
3374       md          pointer to "static" info for the match
3375    
3376    Returns:       TRUE if matched
3377    */
3378    
3379    static BOOL
3380    match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3381      match_data *match_block)
3382    {
3383    return setjmp(match_block->fail_env) == 0 &&
3384          match(eptr, ecode, offset_top, match_block);
3385    }
3386    
3387    
3388    
3389    /*************************************************
3390  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
3391  *************************************************/  *************************************************/
3392    
# Line 3303  int Line 3413  int
3413  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3414    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int options, int *offsets, int offsetcount)
3415  {  {
3416  int resetcount;  int resetcount, ocount;
 int ocount = offsetcount;  
3417  int first_char = -1;  int first_char = -1;
3418  match_data match_block;  match_data match_block;
3419  uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3420  uschar *start_match = (uschar *)subject;  const uschar *start_match = (const uschar *)subject;
3421  uschar *end_subject;  const uschar *end_subject;
3422  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3423  real_pcre_extra *extra = (real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3424    BOOL using_temporary_offsets = FALSE;
3425  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3426  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3427    
# Line 3321  if (re == NULL || subject == NULL || Line 3431  if (re == NULL || subject == NULL ||
3431     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3432  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
3433    
3434  match_block.start_subject = (uschar *)subject;  match_block.start_subject = (const uschar *)subject;
3435  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
3436  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
3437    
# Line 3340  match_block.errorcode = PCRE_ERROR_NOMAT Line 3450  match_block.errorcode = PCRE_ERROR_NOMAT
3450    
3451  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
3452  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
3453  Otherwise, we can use the vector supplied, rounding down the size of it to a  Otherwise, we can use the vector supplied, rounding down its size to a multiple
3454  multiple of 2. */  of 2. */
3455    
3456  ocount &= (-2);  ocount = offsetcount & (-2);
3457  if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3458    {    {
3459    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3460    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3461    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3462    #ifdef DEBUG    using_temporary_offsets = TRUE;
3463    printf("Got memory to hold back references\n");    DPRINTF(("Got memory to hold back references\n"));
   #endif  
3464    }    }
3465  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
3466    
# Line 3404  if (!anchored) Line 3513  if (!anchored)
3513    
3514  do  do
3515    {    {
3516      int rc;
3517    register int *iptr = match_block.offset_vector;    register int *iptr = match_block.offset_vector;
3518    register int *iend = iptr + resetcount;    register int *iend = iptr + resetcount;
3519    
# Line 3445  do Line 3555  do
3555        }        }
3556      }      }
3557    
3558    #ifdef DEBUG  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
3559    printf(">>>> Match against: ");    printf(">>>> Match against: ");
3560    pchars(start_match, end_subject - start_match, TRUE, &match_block);    pchars(start_match, end_subject - start_match, TRUE, &match_block);
3561    printf("\n");    printf("\n");
3562    #endif  #endif
3563    
3564    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
3565    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
# Line 3459  do Line 3569  do
3569    if certain parts of the pattern were not used.    if certain parts of the pattern were not used.
3570    
3571    Before starting the match, we have to set up a longjmp() target to enable    Before starting the match, we have to set up a longjmp() target to enable
3572    the "cut" operation to fail a match completely without backtracking. */    the "cut" operation to fail a match completely without backtracking. This
3573      is done in a separate function to avoid compiler warnings. We need not do
3574      it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3575      enabled. */
3576    
3577    if (setjmp(match_block.fail_env) == 0 &&    if ((re->options & PCRE_EXTRA) != 0)
       match(start_match, re->code, 2, &match_block))  
3578      {      {
3579      int rc;      if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3580          continue;
3581      if (ocount != offsetcount)      }
3582        {    else if (!match(start_match, re->code, 2, &match_block)) continue;
       if (offsetcount >= 4)  
         {  
         memcpy(offsets + 2, match_block.offset_vector + 2,  
           (offsetcount - 2) * sizeof(int));  
         #ifdef DEBUG  
         printf("Copied offsets; freeing temporary memory\n");  
         #endif  
         }  
       if (match_block.end_offset_top > offsetcount)  
         match_block.offset_overflow = TRUE;  
3583    
3584        #ifdef DEBUG    /* Copy the offset information from temporary store if necessary */
       printf("Freeing temporary memory\n");  
       #endif  
3585    
3586        (pcre_free)(match_block.offset_vector);    if (using_temporary_offsets)
3587        {
3588        if (offsetcount >= 4)
3589          {
3590          memcpy(offsets + 2, match_block.offset_vector + 2,
3591            (offsetcount - 2) * sizeof(int));
3592          DPRINTF(("Copied offsets from temporary memory\n"));
3593        }        }
3594        if (match_block.end_offset_top > offsetcount)
3595          match_block.offset_overflow = TRUE;
3596    
3597      rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;      DPRINTF(("Freeing temporary memory\n"));
3598        (pcre_free)(match_block.offset_vector);
3599        }
3600    
3601      if (match_block.offset_end < 2) rc = 0; else    rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
       {  
       offsets[0] = start_match - match_block.start_subject;  
       offsets[1] = match_block.end_match_ptr - match_block.start_subject;  
       }  
3602    
3603      #ifdef DEBUG    if (match_block.offset_end < 2) rc = 0; else
3604      printf(">>>> returning %d\n", rc);      {
3605      #endif      offsets[0] = start_match - match_block.start_subject;
3606      return rc;      offsets[1] = match_block.end_match_ptr - match_block.start_subject;
3607      }      }
3608    
3609      DPRINTF((">>>> returning %d\n", rc));
3610      return rc;
3611    }    }
3612  while (!anchored &&  while (!anchored &&
3613         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
3614         start_match++ < end_subject);         start_match++ < end_subject);
3615    
3616  #ifdef DEBUG  if (using_temporary_offsets)
3617  printf(">>>> returning %d\n", match_block.errorcode);    {
3618  #endif    DPRINTF(("Freeing temporary memory\n"));
3619      (pcre_free)(match_block.offset_vector);
3620      }
3621    
3622    DPRINTF((">>>> returning %d\n", match_block.errorcode));
3623    
3624  return match_block.errorcode;  return match_block.errorcode;
3625  }  }

Legend:
Removed from v.5  
changed lines
  Added in v.15

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12