/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 219 by ph10, Thu Aug 16 11:46:40 2007 UTC revision 342 by ph10, Sun Apr 20 17:10:13 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 43  pattern matching using an NFA algorithm, Line 43  pattern matching using an NFA algorithm,
43  possible. There are also some static supporting functions. */  possible. There are also some static supporting functions. */
44    
45  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
46  #include <config.h>  #include "config.h"
47  #endif  #endif
48    
49  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
# Line 1148  for (;;) Line 1148  for (;;)
1148      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1149      break;      break;
1150    
1151      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1152      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1153      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1154      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1155      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1156    
1157      case OP_BRAZERO:      case OP_BRAZERO:
1158        {        {
# Line 1174  for (;;) Line 1174  for (;;)
1174        }        }
1175      break;      break;
1176    
1177        case OP_SKIPZERO:
1178          {
1179          next = ecode+1;
1180          do next += GET(next,1); while (*next == OP_ALT);
1181          ecode = next + 1 + LINK_SIZE;
1182          }
1183        break;
1184    
1185      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1186    
1187      case OP_KET:      case OP_KET:
# Line 1421  for (;;) Line 1429  for (;;)
1429      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1430    
1431      case OP_ANY:      case OP_ANY:
1432      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1433        {      /* Fall through */
1434        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
1435        }      case OP_ALLANY:
1436      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1437      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1438      ecode++;      ecode++;
1439      break;      break;
1440    
# Line 1526  for (;;) Line 1533  for (;;)
1533        case 0x000d:        case 0x000d:
1534        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;        if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
1535        break;        break;
1536    
1537        case 0x000a:        case 0x000a:
1538          break;
1539    
1540        case 0x000b:        case 0x000b:
1541        case 0x000c:        case 0x000c:
1542        case 0x0085:        case 0x0085:
1543        case 0x2028:        case 0x2028:
1544        case 0x2029:        case 0x2029:
1545          if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
1546        break;        break;
1547        }        }
1548      ecode++;      ecode++;
# Line 1719  for (;;) Line 1730  for (;;)
1730      case OP_REF:      case OP_REF:
1731        {        {
1732        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1733        ecode += 3;                                 /* Advance past item */        ecode += 3;
1734    
1735        /* If the reference is unset, set the length to be longer than the amount        /* If the reference is unset, there are two possibilities:
1736        of subject left; this ensures that every attempt at a match fails. We  
1737        can't just fail here, because of the possibility of quantifiers with zero        (a) In the default, Perl-compatible state, set the length to be longer
1738        minima. */        than the amount of subject left; this ensures that every attempt at a
1739          match fails. We can't just fail here, because of the possibility of
1740        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        quantifiers with zero minima.
1741          md->end_subject - eptr + 1 :  
1742          md->offset_vector[offset+1] - md->offset_vector[offset];        (b) If the JavaScript compatibility flag is set, set the length to zero
1743          so that the back reference matches an empty string.
1744    
1745          Otherwise, set the length to the length of what was matched by the
1746          referenced subpattern. */
1747    
1748          if (offset >= offset_top || md->offset_vector[offset] < 0)
1749            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1750          else
1751            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1752    
1753        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1754    
# Line 2931  for (;;) Line 2951  for (;;)
2951          case OP_ANY:          case OP_ANY:
2952          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2953            {            {
2954            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))  
2955              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2956            eptr++;            eptr++;
2957            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2958            }            }
2959          break;          break;
2960    
2961            case OP_ALLANY:
2962            for (i = 1; i <= min; i++)
2963              {
2964              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2965              eptr++;
2966              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2967              }
2968            break;
2969    
2970          case OP_ANYBYTE:          case OP_ANYBYTE:
2971          eptr += min;          eptr += min;
2972          break;          break;
# Line 2954  for (;;) Line 2982  for (;;)
2982              case 0x000d:              case 0x000d:
2983              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
2984              break;              break;
2985    
2986              case 0x000a:              case 0x000a:
2987                break;
2988    
2989              case 0x000b:              case 0x000b:
2990              case 0x000c:              case 0x000c:
2991              case 0x0085:              case 0x0085:
2992              case 0x2028:              case 0x2028:
2993              case 0x2029:              case 0x2029:
2994                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2995              break;              break;
2996              }              }
2997            }            }
# Line 3143  for (;;) Line 3175  for (;;)
3175        switch(ctype)        switch(ctype)
3176          {          {
3177          case OP_ANY:          case OP_ANY:
3178          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3179            {            {
3180            for (i = 1; i <= min; i++)            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3181              {            eptr++;
             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
             eptr++;  
             }  
3182            }            }
3183          else eptr += min;          break;
3184    
3185            case OP_ALLANY:
3186            eptr += min;
3187          break;          break;
3188    
3189          case OP_ANYBYTE:          case OP_ANYBYTE:
# Line 3172  for (;;) Line 3204  for (;;)
3204              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;              if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3205              break;              break;
3206              case 0x000a:              case 0x000a:
3207                break;
3208    
3209              case 0x000b:              case 0x000b:
3210              case 0x000c:              case 0x000c:
3211              case 0x0085:              case 0x0085:
3212                if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3213              break;              break;
3214              }              }
3215            }            }
# Line 3405  for (;;) Line 3440  for (;;)
3440            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3441            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3442            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3443                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
                 IS_NEWLINE(eptr)))  
3444              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3445    
3446            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3447            switch(ctype)            switch(ctype)
3448              {              {
3449              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3450              break;              case OP_ALLANY:
   
3451              case OP_ANYBYTE:              case OP_ANYBYTE:
3452              break;              break;
3453    
# Line 3426  for (;;) Line 3459  for (;;)
3459                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3460                break;                break;
3461                case 0x000a:                case 0x000a:
3462                  break;
3463    
3464                case 0x000b:                case 0x000b:
3465                case 0x000c:                case 0x000c:
3466                case 0x0085:                case 0x0085:
3467                case 0x2028:                case 0x2028:
3468                case 0x2029:                case 0x2029:
3469                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3470                break;                break;
3471                }                }
3472              break;              break;
# Line 3563  for (;;) Line 3599  for (;;)
3599            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3600            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3601            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3602                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3603              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3604    
3605            c = *eptr++;            c = *eptr++;
3606            switch(ctype)            switch(ctype)
3607              {              {
3608              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
3609              break;              case OP_ALLANY:
   
3610              case OP_ANYBYTE:              case OP_ANYBYTE:
3611              break;              break;
3612    
# Line 3582  for (;;) Line 3617  for (;;)
3617                case 0x000d:                case 0x000d:
3618                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;                if (eptr < md->end_subject && *eptr == 0x0a) eptr++;
3619                break;                break;
3620    
3621                case 0x000a:                case 0x000a:
3622                  break;
3623    
3624                case 0x000b:                case 0x000b:
3625                case 0x000c:                case 0x000c:
3626                case 0x0085:                case 0x0085:
3627                  if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
3628                break;                break;
3629                }                }
3630              break;              break;
# Line 3821  for (;;) Line 3860  for (;;)
3860            case OP_ANY:            case OP_ANY:
3861            if (max < INT_MAX)            if (max < INT_MAX)
3862              {              {
3863              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
3864                {                {
3865                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3866                  {                eptr++;
3867                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3868                }                }
3869              }              }
3870    
# Line 3845  for (;;) Line 3872  for (;;)
3872    
3873            else            else
3874              {              {
3875              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
3876                {                {
3877                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3878                  {                eptr++;
3879                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3880                }                }
3881              else              }
3882              break;
3883    
3884              case OP_ALLANY:
3885              if (max < INT_MAX)
3886                {
3887                for (i = min; i < max; i++)
3888                {                {
3889                eptr = md->end_subject;                if (eptr >= md->end_subject) break;
3890                  eptr++;
3891                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3892                }                }
3893              }              }
3894              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3895            break;            break;
3896    
3897            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 3883  for (;;) Line 3916  for (;;)
3916                }                }
3917              else              else
3918                {                {
3919                if (c != 0x000a && c != 0x000b && c != 0x000c &&                if (c != 0x000a &&
3920                    c != 0x0085 && c != 0x2028 && c != 0x2029)                    (md->bsr_anycrlf ||
3921                       (c != 0x000b && c != 0x000c &&
3922                        c != 0x0085 && c != 0x2028 && c != 0x2029)))
3923                  break;                  break;
3924                eptr += len;                eptr += len;
3925                }                }
# Line 4044  for (;;) Line 4079  for (;;)
4079          switch(ctype)          switch(ctype)
4080            {            {
4081            case OP_ANY:            case OP_ANY:
4082            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4083              {              {
4084              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4085                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4086              }              }
4087            /* For DOTALL case, fall through and treat as \C */            break;
4088    
4089              case OP_ALLANY:
4090            case OP_ANYBYTE:            case OP_ANYBYTE:
4091            c = max - min;            c = max - min;
4092            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 4074  for (;;) Line 4106  for (;;)
4106                }                }
4107              else              else
4108                {                {
4109                if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085)                if (c != 0x000a &&
4110                      (md->bsr_anycrlf ||
4111                        (c != 0x000b && c != 0x000c && c != 0x0085)))
4112                  break;                  break;
4113                eptr++;                eptr++;
4114                }                }
# Line 4224  HEAP_RETURN: Line 4258  HEAP_RETURN:
4258  switch (frame->Xwhere)  switch (frame->Xwhere)
4259    {    {
4260    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4261    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4262    LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4263    LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4264    LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)    LBL(53) LBL(54)
4265    LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47) LBL(48)  #ifdef SUPPORT_UTF8
4266    LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4267      LBL(32) LBL(34) LBL(42) LBL(46)
4268    #ifdef SUPPORT_UCP
4269      LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4270    #endif  /* SUPPORT_UCP */
4271    #endif  /* SUPPORT_UTF8 */
4272    default:    default:
4273    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4274    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
# Line 4408  if (re->magic_number != MAGIC_NUMBER) Line 4447  if (re->magic_number != MAGIC_NUMBER)
4447  /* Set up other data */  /* Set up other data */
4448    
4449  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
4450  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
4451  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
4452    
4453  /* The code starts after the real_pcre block and the capture name table. */  /* The code starts after the real_pcre block and the capture name table. */
# Line 4423  end_subject = md->end_subject; Line 4462  end_subject = md->end_subject;
4462    
4463  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4464  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4465    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4466    
4467  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4468  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
# Line 4435  md->recursive = NULL; Line 4475  md->recursive = NULL;
4475  md->lcc = tables + lcc_offset;  md->lcc = tables + lcc_offset;
4476  md->ctypes = tables + ctypes_offset;  md->ctypes = tables + ctypes_offset;
4477    
4478    /* Handle different \R options. */
4479    
4480    switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
4481      {
4482      case 0:
4483      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
4484        md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
4485      else
4486    #ifdef BSR_ANYCRLF
4487      md->bsr_anycrlf = TRUE;
4488    #else
4489      md->bsr_anycrlf = FALSE;
4490    #endif
4491      break;
4492    
4493      case PCRE_BSR_ANYCRLF:
4494      md->bsr_anycrlf = TRUE;
4495      break;
4496    
4497      case PCRE_BSR_UNICODE:
4498      md->bsr_anycrlf = FALSE;
4499      break;
4500    
4501      default: return PCRE_ERROR_BADNEWLINE;
4502      }
4503    
4504  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
4505  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
4506    
4507  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
4508         PCRE_NEWLINE_BITS)          (pcre_uint32)options) & PCRE_NEWLINE_BITS)
4509    {    {
4510    case 0: newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
4511    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
# Line 4478  else Line 4544  else
4544  /* Partial matching is supported only for a restricted set of regexes at the  /* Partial matching is supported only for a restricted set of regexes at the
4545  moment. */  moment. */
4546    
4547  if (md->partial && (re->options & PCRE_NOPARTIAL) != 0)  if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
4548    return PCRE_ERROR_BADPARTIAL;    return PCRE_ERROR_BADPARTIAL;
4549    
4550  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 4555  studied, there may be a bitmap of possib Line 4621  studied, there may be a bitmap of possib
4621    
4622  if (!anchored)  if (!anchored)
4623    {    {
4624    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
4625      {      {
4626      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
4627      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 4570  if (!anchored) Line 4636  if (!anchored)
4636  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
4637  character" set. */  character" set. */
4638    
4639  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
4640    {    {
4641    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
4642    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 4617  for(;;) Line 4683  for(;;)
4683      if (first_byte_caseless)      if (first_byte_caseless)
4684        while (start_match < end_subject &&        while (start_match < end_subject &&
4685               md->lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4686          start_match++;          { NEXTCHAR(start_match); }
4687      else      else
4688        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4689          start_match++;          { NEXTCHAR(start_match); }
4690      }      }
4691    
4692    /* Or to just after a linebreak for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
# Line 4630  for(;;) Line 4696  for(;;)
4696      if (start_match > md->start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4697        {        {
4698        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4699          start_match++;          { NEXTCHAR(start_match); }
4700    
4701        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4702        and we are now at a LF, advance the match position by one more character.        and we are now at a LF, advance the match position by one more character.
# Line 4651  for(;;) Line 4717  for(;;)
4717      while (start_match < end_subject)      while (start_match < end_subject)
4718        {        {
4719        register unsigned int c = *start_match;        register unsigned int c = *start_match;
4720        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;        if ((start_bits[c/8] & (1 << (c&7))) == 0)
4721            { NEXTCHAR(start_match); }
4722          else break;
4723        }        }
4724      }      }
4725    
# Line 4785  for(;;) Line 4853  for(;;)
4853    
4854    if (anchored || start_match > end_subject) break;    if (anchored || start_match > end_subject) break;
4855    
4856    /* If we have just passed a CR and the newline option is CRLF or ANY or    /* If we have just passed a CR and we are now at a LF, and the pattern does
4857    ANYCRLF, and we are now at a LF, advance the match position by one more    not contain any explicit matches for \r or \n, and the newline option is CRLF
4858    character. */    or ANY or ANYCRLF, advance the match position by one more character. */
4859    
4860    if (start_match[-1] == '\r' &&    if (start_match[-1] == '\r' &&
4861         (md->nltype == NLTYPE_ANY ||        start_match < end_subject &&
4862          md->nltype == NLTYPE_ANYCRLF ||        *start_match == '\n' &&
4863          md->nllen == 2) &&        (re->flags & PCRE_HASCRORLF) == 0 &&
4864         start_match < end_subject &&          (md->nltype == NLTYPE_ANY ||
4865         *start_match == '\n')           md->nltype == NLTYPE_ANYCRLF ||
4866             md->nllen == 2))
4867      start_match++;      start_match++;
4868    
4869    }   /* End of for(;;) "bumpalong" loop */    }   /* End of for(;;) "bumpalong" loop */

Legend:
Removed from v.219  
changed lines
  Added in v.342

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12