/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 52 by nigel, Sat Feb 24 21:39:37 2007 UTC revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997-2000 University of Cambridge             Copyright (c) 1997-2001 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 60  the external pcre header. */ Line 60  the external pcre header. */
60  #endif  #endif
61    
62    
63  /* Number of items on the nested bracket stacks at compile time. This should  /* Maximum number of items on the nested bracket stacks at compile time. This
64  not be set greater than 200. */  applies to the nesting of all kinds of parentheses. It does not limit
65    un-nested, non-capturing parentheses. This number can be made bigger if
66    necessary - it is used to dimension one int and one unsigned char vector at
67    compile time. */
68    
69  #define BRASTACK_SIZE 200  #define BRASTACK_SIZE 200
70    
# Line 95  static const char *OP_names[] = { Line 98  static const char *OP_names[] = {
98    "class", "Ref", "Recurse",    "class", "Ref", "Recurse",
99    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
100    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
101    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Branumber", "Bra"
102  };  };
103  #endif  #endif
104    
# Line 111  static const short int escapes[] = { Line 114  static const short int escapes[] = {
114      0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */      0,      0,      0,      0,      0,      0,      0,      0,   /* H - O */
115      0,      0,      0, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */      0,      0,      0, -ESC_S,      0,      0,      0, -ESC_W,   /* P - W */
116      0,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */      0,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */
117    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */    '`',      7, -ESC_b,      0, -ESC_d,  ESC_E,  ESC_F,      0,   /* ` - g */
118      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */      0,      0,      0,      0,      0,      0,  ESC_N,      0,   /* h - o */
119      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */      0,      0,  ESC_R, -ESC_s,  ESC_T,      0,      0, -ESC_w,   /* p - w */
120      0,      0, -ESC_z                                            /* x - z */      0,      0, -ESC_z                                            /* x - z */
121  };  };
122    
# Line 814  for (;;) Line 817  for (;;)
817      /* Skip over things that don't match chars */      /* Skip over things that don't match chars */
818    
819      case OP_REVERSE:      case OP_REVERSE:
820        case OP_BRANUMBER:
821        case OP_CREF:
822      cc++;      cc++;
823      /* Fall through */      /* Fall through */
824    
     case OP_CREF:  
825      case OP_OPT:      case OP_OPT:
826      cc++;      cc++;
827      /* Fall through */      /* Fall through */
# Line 871  for (;;) Line 875  for (;;)
875      /* Check a class for variable quantification */      /* Check a class for variable quantification */
876    
877      case OP_CLASS:      case OP_CLASS:
878      cc += (*cc == OP_REF)? 2 : 33;      cc += 33;
879    
880      switch (*cc)      switch (*cc)
881        {        {
# Line 978  return -1; Line 982  return -1;
982    
983  Arguments:  Arguments:
984    options      the option bits    options      the option bits
985    brackets     points to number of brackets used    brackets     points to number of extracting brackets used
986    code         points to the pointer to the current code point    code         points to the pointer to the current code point
987    ptrptr       points to the current pattern pointer    ptrptr       points to the current pattern pointer
988    errorptr     points to pointer to error message    errorptr     points to pointer to error message
# Line 1029  for (;; ptr++) Line 1033  for (;; ptr++)
1033    int class_charcount;    int class_charcount;
1034    int class_lastchar;    int class_lastchar;
1035    int newoptions;    int newoptions;
1036    int condref;    int skipbytes;
1037    int subreqchar;    int subreqchar;
1038    
1039    c = *ptr;    c = *ptr;
# Line 1040  for (;; ptr++) Line 1044  for (;; ptr++)
1044        {        {
1045        /* The space before the ; is to avoid a warning on a silly compiler        /* The space before the ; is to avoid a warning on a silly compiler
1046        on the Macintosh. */        on the Macintosh. */
1047        while ((c = *(++ptr)) != 0 && c != '\n') ;        while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
1048        continue;        continue;
1049        }        }
1050      }      }
# Line 1578  for (;; ptr++) Line 1582  for (;; ptr++)
1582        OP_BRAZERO in front of it, and because the group appears once in the        OP_BRAZERO in front of it, and because the group appears once in the
1583        data, whereas in other cases it appears the minimum number of times. For        data, whereas in other cases it appears the minimum number of times. For
1584        this reason, it is simplest to treat this case separately, as otherwise        this reason, it is simplest to treat this case separately, as otherwise
1585        the code gets far too mess. There are several special subcases when the        the code gets far too messy. There are several special subcases when the
1586        minimum is zero. */        minimum is zero. */
1587    
1588        if (repeat_min == 0)        if (repeat_min == 0)
# Line 1729  for (;; ptr++) Line 1733  for (;; ptr++)
1733    
1734      case '(':      case '(':
1735      newoptions = options;      newoptions = options;
1736      condref = -1;      skipbytes = 0;
1737    
1738      if (*(++ptr) == '?')      if (*(++ptr) == '?')
1739        {        {
# Line 1752  for (;; ptr++) Line 1756  for (;; ptr++)
1756          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
1757          if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)          if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1758            {            {
1759            condref = *ptr - '0';            int condref = *ptr - '0';
1760            while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';            while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
1761            if (condref == 0)            if (condref == 0)
1762              {              {
# Line 1760  for (;; ptr++) Line 1764  for (;; ptr++)
1764              goto FAILED;              goto FAILED;
1765              }              }
1766            ptr++;            ptr++;
1767              code[3] = OP_CREF;
1768              code[4] = condref >> 8;
1769              code[5] = condref & 255;
1770              skipbytes = 3;
1771            }            }
1772          else ptr--;          else ptr--;
1773          break;          break;
# Line 1862  for (;; ptr++) Line 1870  for (;; ptr++)
1870          }          }
1871        }        }
1872    
1873      /* Else we have a referencing group; adjust the opcode. */      /* Else we have a referencing group; adjust the opcode. If the bracket
1874        number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and
1875        arrange for the true number to follow later, in an OP_BRANUMBER item. */
1876    
1877      else      else
1878        {        {
1879        if (++(*brackets) > EXTRACT_MAX)        if (++(*brackets) > EXTRACT_BASIC_MAX)
1880          {          {
1881          *errorptr = ERR13;          bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1;
1882          goto FAILED;          code[3] = OP_BRANUMBER;
1883            code[4] = *brackets >> 8;
1884            code[5] = *brackets & 255;
1885            skipbytes = 3;
1886          }          }
1887        bravalue = OP_BRA + *brackets;        else bravalue = OP_BRA + *brackets;
1888        }        }
1889    
1890      /* Process nested bracketed re. Assertions may not be repeated, but other      /* Process nested bracketed re. Assertions may not be repeated, but other
# Line 1887  for (;; ptr++) Line 1900  for (;; ptr++)
1900           options | PCRE_INGROUP,       /* Set for all nested groups */           options | PCRE_INGROUP,       /* Set for all nested groups */
1901           ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?           ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
1902             newoptions & PCRE_IMS : -1, /* Pass ims options if changed */             newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
1903           brackets,                     /* Bracket level */           brackets,                     /* Extracting bracket count */
1904           &tempcode,                    /* Where to put code (updated) */           &tempcode,                    /* Where to put code (updated) */
1905           &ptr,                         /* Input pointer (updated) */           &ptr,                         /* Input pointer (updated) */
1906           errorptr,                     /* Where to put an error message */           errorptr,                     /* Where to put an error message */
1907           (bravalue == OP_ASSERTBACK ||           (bravalue == OP_ASSERTBACK ||
1908            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1909           condref,                      /* Condition reference number */           skipbytes,                    /* Skip over OP_COND/OP_BRANUMBER */
1910           &subreqchar,                  /* For possible last char */           &subreqchar,                  /* For possible last char */
1911           &subcountlits,                /* For literal count */           &subcountlits,                /* For literal count */
1912           cd))                          /* Tables block */           cd))                          /* Tables block */
# Line 1907  for (;; ptr++) Line 1920  for (;; ptr++)
1920      /* If this is a conditional bracket, check that there are no more than      /* If this is a conditional bracket, check that there are no more than
1921      two branches in the group. */      two branches in the group. */
1922    
1923      if (bravalue == OP_COND)      else if (bravalue == OP_COND)
1924        {        {
1925        uschar *tc = code;        uschar *tc = code;
1926        condcount = 0;        condcount = 0;
# Line 1974  for (;; ptr++) Line 1987  for (;; ptr++)
1987        {        {
1988        if (-c >= ESC_REF)        if (-c >= ESC_REF)
1989          {          {
1990            int number = -c - ESC_REF;
1991          previous = code;          previous = code;
1992          *code++ = OP_REF;          *code++ = OP_REF;
1993          *code++ = -c - ESC_REF;          *code++ = number >> 8;
1994            *code++ = number & 255;
1995          }          }
1996        else        else
1997          {          {
# Line 2011  for (;; ptr++) Line 2026  for (;; ptr++)
2026            {            {
2027            /* The space before the ; is to avoid a warning on a silly compiler            /* The space before the ; is to avoid a warning on a silly compiler
2028            on the Macintosh. */            on the Macintosh. */
2029            while ((c = *(++ptr)) != 0 && c != '\n') ;            while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
2030            if (c == 0) break;            if (c == 0) break;
2031            continue;            continue;
2032            }            }
# Line 2100  Argument: Line 2115  Argument:
2115    ptrptr      -> the address of the current pattern pointer    ptrptr      -> the address of the current pattern pointer
2116    errorptr    -> pointer to error message    errorptr    -> pointer to error message
2117    lookbehind  TRUE if this is a lookbehind assertion    lookbehind  TRUE if this is a lookbehind assertion
2118    condref     >= 0 for OPT_CREF setting at start of conditional group    skipbytes   skip this many bytes at start (for OP_COND, OP_BRANUMBER)
2119    reqchar     -> place to put the last required character, or a negative number    reqchar     -> place to put the last required character, or a negative number
2120    countlits   -> place to put the shortest literal count of any branch    countlits   -> place to put the shortest literal count of any branch
2121    cd          points to the data block with tables pointers    cd          points to the data block with tables pointers
# Line 2110  Returns: TRUE on success Line 2125  Returns: TRUE on success
2125    
2126  static BOOL  static BOOL
2127  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
2128    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int skipbytes,
2129    int *reqchar, int *countlits, compile_data *cd)    int *reqchar, int *countlits, compile_data *cd)
2130  {  {
2131  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
# Line 2123  int branchreqchar, branchcountlits; Line 2138  int branchreqchar, branchcountlits;
2138    
2139  *reqchar = -1;  *reqchar = -1;
2140  *countlits = INT_MAX;  *countlits = INT_MAX;
2141  code += 3;  code += 3 + skipbytes;
   
 /* At the start of a reference-based conditional group, insert the reference  
 number as an OP_CREF item. */  
   
 if (condref >= 0)  
   {  
   *code++ = OP_CREF;  
   *code++ = condref;  
   }  
2142    
2143  /* Loop for each alternative branch */  /* Loop for each alternative branch */
2144    
# Line 2284  for (;;) Line 2290  for (;;)
2290      break;      break;
2291    
2292      case OP_CREF:      case OP_CREF:
2293      code += 2;      case OP_BRANUMBER:
2294        code += 3;
2295      break;      break;
2296    
2297      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
# Line 2547  while ((c = *(++ptr)) != 0) Line 2554  while ((c = *(++ptr)) != 0)
2554    {    {
2555    int min, max;    int min, max;
2556    int class_charcount;    int class_charcount;
2557      int bracket_length;
2558    
2559    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
2560      {      {
# Line 2555  while ((c = *(++ptr)) != 0) Line 2563  while ((c = *(++ptr)) != 0)
2563        {        {
2564        /* The space before the ; is to avoid a warning on a silly compiler        /* The space before the ; is to avoid a warning on a silly compiler
2565        on the Macintosh. */        on the Macintosh. */
2566        while ((c = *(++ptr)) != 0 && c != '\n') ;        while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
2567        continue;        continue;
2568        }        }
2569      }      }
# Line 2581  while ((c = *(++ptr)) != 0) Line 2589  while ((c = *(++ptr)) != 0)
2589        }        }
2590      length++;      length++;
2591    
2592      /* A back reference needs an additional char, plus either one or 5      /* A back reference needs an additional 2 bytes, plus either one or 5
2593      bytes for a repeat. We also need to keep the value of the highest      bytes for a repeat. We also need to keep the value of the highest
2594      back reference. */      back reference. */
2595    
# Line 2589  while ((c = *(++ptr)) != 0) Line 2597  while ((c = *(++ptr)) != 0)
2597        {        {
2598        int refnum = -c - ESC_REF;        int refnum = -c - ESC_REF;
2599        if (refnum > top_backref) top_backref = refnum;        if (refnum > top_backref) top_backref = refnum;
2600        length++;   /* For single back reference */        length += 2;   /* For single back reference */
2601        if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))        if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2602          {          {
2603          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
# Line 2687  while ((c = *(++ptr)) != 0) Line 2695  while ((c = *(++ptr)) != 0)
2695    
2696      case '(':      case '(':
2697      branch_newextra = 0;      branch_newextra = 0;
2698        bracket_length = 3;
2699    
2700      /* Handle special forms of bracket, which all start (? */      /* Handle special forms of bracket, which all start (? */
2701    
# Line 2754  while ((c = *(++ptr)) != 0) Line 2763  while ((c = *(++ptr)) != 0)
2763          if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)          if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2764            {            {
2765            ptr += 4;            ptr += 4;
2766            length += 2;            length += 3;
2767            while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;            while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2768            if (*ptr != ')')            if (*ptr != ')')
2769              {              {
# Line 2881  while ((c = *(++ptr)) != 0) Line 2890  while ((c = *(++ptr)) != 0)
2890        }        }
2891    
2892      /* Extracting brackets must be counted so we can process escapes in a      /* Extracting brackets must be counted so we can process escapes in a
2893      Perlish way. */      Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to
2894        need an additional 3 bytes of store per extracting bracket. */
2895    
2896      else bracount++;      else
2897          {
2898          bracount++;
2899          if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3;
2900          }
2901    
2902      /* Non-special forms of bracket. Save length for computing whole length      /* Save length for computing whole length at end if there's a repeat that
2903      at end if there's a repeat that requires duplication of the group. Also      requires duplication of the group. Also save the current value of
2904      save the current value of branch_extra, and start the new group with      branch_extra, and start the new group with the new value. If non-zero, this
2905      the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3      will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */
     for a lookbehind assertion. */  
2906    
2907      if (brastackptr >= sizeof(brastack)/sizeof(int))      if (brastackptr >= sizeof(brastack)/sizeof(int))
2908        {        {
# Line 2901  while ((c = *(++ptr)) != 0) Line 2914  while ((c = *(++ptr)) != 0)
2914      branch_extra = branch_newextra;      branch_extra = branch_newextra;
2915    
2916      brastack[brastackptr++] = length;      brastack[brastackptr++] = length;
2917      length += 3;      length += bracket_length;
2918      continue;      continue;
2919    
2920      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
# Line 2981  while ((c = *(++ptr)) != 0) Line 2994  while ((c = *(++ptr)) != 0)
2994            {            {
2995            /* The space before the ; is to avoid a warning on a silly compiler            /* The space before the ; is to avoid a warning on a silly compiler
2996            on the Macintosh. */            on the Macintosh. */
2997            while ((c = *(++ptr)) != 0 && c != '\n') ;            while ((c = *(++ptr)) != 0 && c != NEWLINE) ;
2998            continue;            continue;
2999            }            }
3000          }          }
# Line 3062  ptr = (const uschar *)pattern; Line 3075  ptr = (const uschar *)pattern;
3075  code = re->code;  code = re->code;
3076  *code = OP_BRA;  *code = OP_BRA;
3077  bracount = 0;  bracount = 0;
3078  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, 0,
3079    &reqchar, &countlits, &compile_block);    &reqchar, &countlits, &compile_block);
3080  re->top_bracket = bracount;  re->top_bracket = bracount;
3081  re->top_backref = top_backref;  re->top_backref = top_backref;
# Line 3176  while (code < code_end) Line 3189  while (code < code_end)
3189    
3190    if (*code >= OP_BRA)    if (*code >= OP_BRA)
3191      {      {
3192      printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      if (*code - OP_BRA > EXTRACT_BASIC_MAX)
3193          printf("%3d Bra extra", (code[1] << 8) + code[2]);
3194        else
3195          printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);
3196      code += 2;      code += 2;
3197      }      }
3198    
# Line 3187  while (code < code_end) Line 3203  while (code < code_end)
3203      code++;      code++;
3204      break;      break;
3205    
     case OP_COND:  
     printf("%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     printf(" %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
3206      case OP_CHARS:      case OP_CHARS:
3207      charlength = *(++code);      charlength = *(++code);
3208      printf("%3d ", charlength);      printf("%3d ", charlength);
# Line 3213  while (code < code_end) Line 3219  while (code < code_end)
3219      case OP_ASSERTBACK:      case OP_ASSERTBACK:
3220      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
3221      case OP_ONCE:      case OP_ONCE:
     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
3222      case OP_REVERSE:      case OP_REVERSE:
3223        case OP_BRANUMBER:
3224        case OP_COND:
3225        case OP_CREF:
3226      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
3227      code += 2;      code += 2;
3228      break;      break;
# Line 3290  while (code < code_end) Line 3295  while (code < code_end)
3295      break;      break;
3296    
3297      case OP_REF:      case OP_REF:
3298      printf("    \\%d", *(++code));      printf("    \\%d", (code[1] << 8) | code[2]);
3299      code ++;      code += 3;
3300      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
3301    
3302      case OP_CLASS:      case OP_CLASS:
# Line 3504  for (;;) Line 3509  for (;;)
3509    
3510    if (op > OP_BRA)    if (op > OP_BRA)
3511      {      {
3512        int offset;
3513      int number = op - OP_BRA;      int number = op - OP_BRA;
3514      int offset = number << 1;  
3515        /* For extended extraction brackets (large number), we have to fish out the
3516        number from a dummy opcode at the start. */
3517    
3518        if (number > EXTRACT_BASIC_MAX) number = (ecode[4] << 8) | ecode[5];
3519        offset = number << 1;
3520    
3521  #ifdef DEBUG  #ifdef DEBUG
3522      printf("start bracket %d subject=", number);      printf("start bracket %d subject=", number);
# Line 3535  for (;;) Line 3546  for (;;)
3546        md->offset_vector[offset] = save_offset1;        md->offset_vector[offset] = save_offset1;
3547        md->offset_vector[offset+1] = save_offset2;        md->offset_vector[offset+1] = save_offset2;
3548        md->offset_vector[md->offset_end - number] = save_offset3;        md->offset_vector[md->offset_end - number] = save_offset3;
3549    
3550        return FALSE;        return FALSE;
3551        }        }
3552    
# Line 3567  for (;;) Line 3579  for (;;)
3579      case OP_COND:      case OP_COND:
3580      if (ecode[3] == OP_CREF)         /* Condition is extraction test */      if (ecode[3] == OP_CREF)         /* Condition is extraction test */
3581        {        {
3582        int offset = ecode[4] << 1;    /* Doubled reference number */        int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled ref number */
3583        return match(eptr,        return match(eptr,
3584          ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?          ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
3585            5 : 3 + (ecode[1] << 8) + ecode[2]),            6 : 3 + (ecode[1] << 8) + ecode[2]),
3586          offset_top, md, ims, eptrb, match_isgroup);          offset_top, md, ims, eptrb, match_isgroup);
3587        }        }
3588    
# Line 3590  for (;;) Line 3602  for (;;)
3602        }        }
3603      /* Control never reaches here */      /* Control never reaches here */
3604    
3605      /* Skip over conditional reference data if encountered (should not be) */      /* Skip over conditional reference or large extraction number data if
3606        encountered. */
3607    
3608      case OP_CREF:      case OP_CREF:
3609      ecode += 2;      case OP_BRANUMBER:
3610        ecode += 3;
3611      break;      break;
3612    
3613      /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched      /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
# Line 3859  for (;;) Line 3873  for (;;)
3873    
3874        if (*prev != OP_COND)        if (*prev != OP_COND)
3875          {          {
3876            int offset;
3877          int number = *prev - OP_BRA;          int number = *prev - OP_BRA;
3878          int offset = number << 1;  
3879            /* For extended extraction brackets (large number), we have to fish out
3880            the number from a dummy opcode at the start. */
3881    
3882            if (number > EXTRACT_BASIC_MAX) number = (prev[4] << 8) | prev[5];
3883            offset = number << 1;
3884    
3885  #ifdef DEBUG  #ifdef DEBUG
3886          printf("end bracket %d", number);          printf("end bracket %d", number);
# Line 3920  for (;;) Line 3940  for (;;)
3940      if (md->notbol && eptr == md->start_subject) return FALSE;      if (md->notbol && eptr == md->start_subject) return FALSE;
3941      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
3942        {        {
3943        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;        if (eptr != md->start_subject && eptr[-1] != NEWLINE) return FALSE;
3944        ecode++;        ecode++;
3945        break;        break;
3946        }        }
# Line 3939  for (;;) Line 3959  for (;;)
3959      case OP_DOLL:      case OP_DOLL:
3960      if ((ims & PCRE_MULTILINE) != 0)      if ((ims & PCRE_MULTILINE) != 0)
3961        {        {
3962        if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }        if (eptr < md->end_subject) { if (*eptr != NEWLINE) return FALSE; }
3963          else { if (md->noteol) return FALSE; }          else { if (md->noteol) return FALSE; }
3964        ecode++;        ecode++;
3965        break;        break;
# Line 3950  for (;;) Line 3970  for (;;)
3970        if (!md->endonly)        if (!md->endonly)
3971          {          {
3972          if (eptr < md->end_subject - 1 ||          if (eptr < md->end_subject - 1 ||
3973             (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;             (eptr == md->end_subject - 1 && *eptr != NEWLINE)) return FALSE;
3974    
3975          ecode++;          ecode++;
3976          break;          break;
# Line 3969  for (;;) Line 3989  for (;;)
3989    
3990      case OP_EODN:      case OP_EODN:
3991      if (eptr < md->end_subject - 1 ||      if (eptr < md->end_subject - 1 ||
3992         (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;         (eptr == md->end_subject - 1 && *eptr != NEWLINE)) return FALSE;
3993      ecode++;      ecode++;
3994      break;      break;
3995    
# Line 3991  for (;;) Line 4011  for (;;)
4011      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
4012    
4013      case OP_ANY:      case OP_ANY:
4014      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)
4015        return FALSE;        return FALSE;
4016      if (eptr++ >= md->end_subject) return FALSE;      if (eptr++ >= md->end_subject) return FALSE;
4017  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 4054  for (;;) Line 4074  for (;;)
4074      case OP_REF:      case OP_REF:
4075        {        {
4076        int length;        int length;
4077        int offset = ecode[1] << 1;                /* Doubled reference number */        int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled ref number */
4078        ecode += 2;                                /* Advance past the item */        ecode += 3;                                     /* Advance past item */
4079    
4080        /* If the reference is unset, set the length to be longer than the amount        /* If the reference is unset, set the length to be longer than the amount
4081        of subject left; this ensures that every attempt at a match fails. We        of subject left; this ensures that every attempt at a match fails. We
# Line 4599  for (;;) Line 4619  for (;;)
4619          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
4620            {            {
4621            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject ||
4622               (*eptr++ == '\n' && (ims & PCRE_DOTALL) == 0))               (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))
4623              return FALSE;              return FALSE;
4624            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4625            }            }
# Line 4608  for (;;) Line 4628  for (;;)
4628  #endif  #endif
4629        /* Non-UTF8 can be faster */        /* Non-UTF8 can be faster */
4630        if ((ims & PCRE_DOTALL) == 0)        if ((ims & PCRE_DOTALL) == 0)
4631          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }          { for (i = 1; i <= min; i++) if (*eptr++ == NEWLINE) return FALSE; }
4632        else eptr += min;        else eptr += min;
4633        break;        break;
4634    
# Line 4663  for (;;) Line 4683  for (;;)
4683          switch(ctype)          switch(ctype)
4684            {            {
4685            case OP_ANY:            case OP_ANY:
4686            if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;            if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) return FALSE;
4687  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4688            if (md->utf8)            if (md->utf8)
4689              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
# Line 4718  for (;;) Line 4738  for (;;)
4738              {              {
4739              for (i = min; i < max; i++)              for (i = min; i < max; i++)
4740                {                {
4741                if (eptr >= md->end_subject || *eptr++ == '\n') break;                if (eptr >= md->end_subject || *eptr++ == NEWLINE) break;
4742                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
4743                }                }
4744              }              }
# Line 4738  for (;;) Line 4758  for (;;)
4758            {            {
4759            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4760              {              {
4761              if (eptr >= md->end_subject || *eptr == '\n') break;              if (eptr >= md->end_subject || *eptr == NEWLINE) break;
4762              eptr++;              eptr++;
4763              }              }
4764            }            }
# Line 4879  const uschar *req_char_ptr = start_match Line 4899  const uschar *req_char_ptr = start_match
4899  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
4900  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
4901  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
4902  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored;
4903  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline;
4904    
4905  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;  if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
4906    
# Line 4888  if (re == NULL || subject == NULL || Line 4908  if (re == NULL || subject == NULL ||
4908     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
4909  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
4910    
4911    anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4912    startline = (re->options & PCRE_STARTLINE) != 0;
4913    
4914  match_block.start_pattern = re->code;  match_block.start_pattern = re->code;
4915  match_block.start_subject = (const uschar *)subject;  match_block.start_subject = (const uschar *)subject;
4916  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
# Line 5016  do Line 5039  do
5039      {      {
5040      if (start_match > match_block.start_subject + start_offset)      if (start_match > match_block.start_subject + start_offset)
5041        {        {
5042        while (start_match < end_subject && start_match[-1] != '\n')        while (start_match < end_subject && start_match[-1] != NEWLINE)
5043          start_match++;          start_match++;
5044        }        }
5045      }      }

Legend:
Removed from v.52  
changed lines
  Added in v.53

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12