/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 236 by ph10, Tue Sep 11 12:57:06 2007 UTC revision 351 by ph10, Fri Jul 4 18:27:16 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 1148  for (;;) Line 1148  for (;;)
1148      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1149      break;      break;
1150    
1151      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1152      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1153      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1154      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1155      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1156    
1157      case OP_BRAZERO:      case OP_BRAZERO:
1158        {        {
# Line 1174  for (;;) Line 1174  for (;;)
1174        }        }
1175      break;      break;
1176    
1177        case OP_SKIPZERO:
1178          {
1179          next = ecode+1;
1180          do next += GET(next,1); while (*next == OP_ALT);
1181          ecode = next + 1 + LINK_SIZE;
1182          }
1183        break;
1184    
1185      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1186    
1187      case OP_KET:      case OP_KET:
# Line 1421  for (;;) Line 1429  for (;;)
1429      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1430    
1431      case OP_ANY:      case OP_ANY:
1432      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1433        {      /* Fall through */
1434        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
1435        }      case OP_ALLANY:
1436      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1437      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1438      ecode++;      ecode++;
1439      break;      break;
1440    
# Line 1646  for (;;) Line 1653  for (;;)
1653      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1654      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1655        {        {
1656        int chartype, script;        const ucd_record * prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1657    
1658        switch(ecode[1])        switch(ecode[1])
1659          {          {
# Line 1656  for (;;) Line 1662  for (;;)
1662          break;          break;
1663    
1664          case PT_LAMP:          case PT_LAMP:
1665          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1666               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1667               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1668            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1669           break;           break;
1670    
1671          case PT_GC:          case PT_GC:
1672          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1673            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1674          break;          break;
1675    
1676          case PT_PC:          case PT_PC:
1677          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1678            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1679          break;          break;
1680    
1681          case PT_SC:          case PT_SC:
1682          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1683            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1684          break;          break;
1685    
# Line 1692  for (;;) Line 1698  for (;;)
1698      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1699      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1700        {        {
1701        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1702        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1703        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1704          {          {
# Line 1702  for (;;) Line 1707  for (;;)
1707            {            {
1708            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1709            }            }
1710          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1711          if (category != ucp_M) break;          if (category != ucp_M) break;
1712          eptr += len;          eptr += len;
1713          }          }
# Line 1723  for (;;) Line 1728  for (;;)
1728      case OP_REF:      case OP_REF:
1729        {        {
1730        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1731        ecode += 3;                                 /* Advance past item */        ecode += 3;
1732    
1733          /* If the reference is unset, there are two possibilities:
1734    
1735          (a) In the default, Perl-compatible state, set the length to be longer
1736          than the amount of subject left; this ensures that every attempt at a
1737          match fails. We can't just fail here, because of the possibility of
1738          quantifiers with zero minima.
1739    
1740        /* If the reference is unset, set the length to be longer than the amount        (b) If the JavaScript compatibility flag is set, set the length to zero
1741        of subject left; this ensures that every attempt at a match fails. We        so that the back reference matches an empty string.
1742        can't just fail here, because of the possibility of quantifiers with zero  
1743        minima. */        Otherwise, set the length to the length of what was matched by the
1744          referenced subpattern. */
1745        length = (offset >= offset_top || md->offset_vector[offset] < 0)?  
1746          md->end_subject - eptr + 1 :        if (offset >= offset_top || md->offset_vector[offset] < 0)
1747          md->offset_vector[offset+1] - md->offset_vector[offset];          length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1748          else
1749            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1750    
1751        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1752    
# Line 2158  for (;;) Line 2172  for (;;)
2172          if (fc != dc)          if (fc != dc)
2173            {            {
2174  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2175            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2176  #endif  #endif
2177              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2178            }            }
# Line 2249  for (;;) Line 2263  for (;;)
2263  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2264          unsigned int othercase;          unsigned int othercase;
2265          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2266              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2267            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2268          else oclength = 0;          else oclength = 0;
2269  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 2854  for (;;) Line 2868  for (;;)
2868              {              {
2869              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2870              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2871              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2872              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2873                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
2874                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2867  for (;;) Line 2881  for (;;)
2881              {              {
2882              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2883              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2884              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2885              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2886                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2887              }              }
# Line 2878  for (;;) Line 2892  for (;;)
2892              {              {
2893              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2894              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2895              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2896              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2897                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2898              }              }
# Line 2889  for (;;) Line 2903  for (;;)
2903              {              {
2904              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2905              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2906              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
2907              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2908                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2909              }              }
# Line 2908  for (;;) Line 2922  for (;;)
2922          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2923            {            {
2924            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2925            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
2926            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2927            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2928              {              {
# Line 2917  for (;;) Line 2931  for (;;)
2931                {                {
2932                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2933                }                }
2934              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2935              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2936              eptr += len;              eptr += len;
2937              }              }
# Line 2935  for (;;) Line 2949  for (;;)
2949          case OP_ANY:          case OP_ANY:
2950          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2951            {            {
2952            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))  
2953              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2954            eptr++;            eptr++;
2955            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2956            }            }
2957          break;          break;
2958    
2959            case OP_ALLANY:
2960            for (i = 1; i <= min; i++)
2961              {
2962              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2963              eptr++;
2964              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
2965              }
2966            break;
2967    
2968          case OP_ANYBYTE:          case OP_ANYBYTE:
2969          eptr += min;          eptr += min;
2970          break;          break;
# Line 3151  for (;;) Line 3173  for (;;)
3173        switch(ctype)        switch(ctype)
3174          {          {
3175          case OP_ANY:          case OP_ANY:
3176          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3177            {            {
3178            for (i = 1; i <= min; i++)            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3179              {            eptr++;
             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
             eptr++;  
             }  
3180            }            }
3181          else eptr += min;          break;
3182    
3183            case OP_ALLANY:
3184            eptr += min;
3185          break;          break;
3186    
3187          case OP_ANYBYTE:          case OP_ANYBYTE:
# Line 3325  for (;;) Line 3347  for (;;)
3347              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3348              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3349              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3350              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3351              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3352                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3353                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3340  for (;;) Line 3362  for (;;)
3362              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3363              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3364              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3365              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3366              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3367                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3368              }              }
# Line 3353  for (;;) Line 3375  for (;;)
3375              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3376              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3377              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3378              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3379              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3380                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3381              }              }
# Line 3366  for (;;) Line 3388  for (;;)
3388              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3389              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3390              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3391              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3392              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3393                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3394              }              }
# Line 3388  for (;;) Line 3410  for (;;)
3410            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3411            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3412            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3413            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3414            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3415            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3416              {              {
# Line 3397  for (;;) Line 3419  for (;;)
3419                {                {
3420                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3421                }                }
3422              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3423              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3424              eptr += len;              eptr += len;
3425              }              }
# Line 3416  for (;;) Line 3438  for (;;)
3438            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3439            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3440            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3441                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
                 IS_NEWLINE(eptr)))  
3442              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3443    
3444            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3445            switch(ctype)            switch(ctype)
3446              {              {
3447              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3448              break;              case OP_ALLANY:
   
3449              case OP_ANYBYTE:              case OP_ANYBYTE:
3450              break;              break;
3451    
# Line 3577  for (;;) Line 3597  for (;;)
3597            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3598            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3599            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3600                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3601              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3602    
3603            c = *eptr++;            c = *eptr++;
3604            switch(ctype)            switch(ctype)
3605              {              {
3606              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
3607              break;              case OP_ALLANY:
   
3608              case OP_ANYBYTE:              case OP_ANYBYTE:
3609              break;              break;
3610    
# Line 3718  for (;;) Line 3737  for (;;)
3737              int len = 1;              int len = 1;
3738              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3739              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3740              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3741              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3742                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3743                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3733  for (;;) Line 3752  for (;;)
3752              int len = 1;              int len = 1;
3753              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3754              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3755              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3756              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3757                break;                break;
3758              eptr+= len;              eptr+= len;
# Line 3746  for (;;) Line 3765  for (;;)
3765              int len = 1;              int len = 1;
3766              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3767              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3768              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3769              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3770                break;                break;
3771              eptr+= len;              eptr+= len;
# Line 3759  for (;;) Line 3778  for (;;)
3778              int len = 1;              int len = 1;
3779              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3780              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3781              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3782              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3783                break;                break;
3784              eptr+= len;              eptr+= len;
# Line 3788  for (;;) Line 3807  for (;;)
3807            {            {
3808            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3809            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3810            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3811            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3812            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3813              {              {
# Line 3797  for (;;) Line 3816  for (;;)
3816                {                {
3817                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3818                }                }
3819              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3820              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3821              eptr += len;              eptr += len;
3822              }              }
# Line 3819  for (;;) Line 3838  for (;;)
3838                BACKCHAR(eptr);                BACKCHAR(eptr);
3839                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3840                }                }
3841              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3842              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3843              eptr--;              eptr--;
3844              }              }
# Line 3839  for (;;) Line 3858  for (;;)
3858            case OP_ANY:            case OP_ANY:
3859            if (max < INT_MAX)            if (max < INT_MAX)
3860              {              {
3861              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
3862                {                {
3863                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3864                  {                eptr++;
3865                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3866                }                }
3867              }              }
3868    
# Line 3863  for (;;) Line 3870  for (;;)
3870    
3871            else            else
3872              {              {
3873              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
3874                {                {
3875                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3876                  {                eptr++;
3877                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3878                }                }
3879              else              }
3880              break;
3881    
3882              case OP_ALLANY:
3883              if (max < INT_MAX)
3884                {
3885                for (i = min; i < max; i++)
3886                {                {
3887                eptr = md->end_subject;                if (eptr >= md->end_subject) break;
3888                  eptr++;
3889                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3890                }                }
3891              }              }
3892              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3893            break;            break;
3894    
3895            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 4064  for (;;) Line 4077  for (;;)
4077          switch(ctype)          switch(ctype)
4078            {            {
4079            case OP_ANY:            case OP_ANY:
4080            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4081              {              {
4082              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4083                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4084              }              }
4085            /* For DOTALL case, fall through and treat as \C */            break;
4086    
4087              case OP_ALLANY:
4088            case OP_ANYBYTE:            case OP_ANYBYTE:
4089            c = max - min;            c = max - min;
4090            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 4246  HEAP_RETURN: Line 4256  HEAP_RETURN:
4256  switch (frame->Xwhere)  switch (frame->Xwhere)
4257    {    {
4258    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)    LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
4259    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)    LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
4260    LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)    LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
4261    LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)    LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
4262    LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40)    LBL(53) LBL(54)
4263    LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47) LBL(48)  #ifdef SUPPORT_UTF8
4264    LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54)    LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30)
4265      LBL(32) LBL(34) LBL(42) LBL(46)
4266    #ifdef SUPPORT_UCP
4267      LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
4268    #endif  /* SUPPORT_UCP */
4269    #endif  /* SUPPORT_UTF8 */
4270    default:    default:
4271    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));    DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
4272    return PCRE_ERROR_INTERNAL;    return PCRE_ERROR_INTERNAL;
# Line 4445  end_subject = md->end_subject; Line 4460  end_subject = md->end_subject;
4460    
4461  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4462  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4463    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4464    
4465  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4466  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
# Line 4469  switch (options & (PCRE_BSR_ANYCRLF|PCRE Line 4485  switch (options & (PCRE_BSR_ANYCRLF|PCRE
4485    md->bsr_anycrlf = TRUE;    md->bsr_anycrlf = TRUE;
4486  #else  #else
4487    md->bsr_anycrlf = FALSE;    md->bsr_anycrlf = FALSE;
4488  #endif  #endif
4489    break;    break;
4490    
4491    case PCRE_BSR_ANYCRLF:    case PCRE_BSR_ANYCRLF:
# Line 4665  for(;;) Line 4681  for(;;)
4681      if (first_byte_caseless)      if (first_byte_caseless)
4682        while (start_match < end_subject &&        while (start_match < end_subject &&
4683               md->lcc[*start_match] != first_byte)               md->lcc[*start_match] != first_byte)
4684          start_match++;          { NEXTCHAR(start_match); }
4685      else      else
4686        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4687          start_match++;          { NEXTCHAR(start_match); }
4688      }      }
4689    
4690    /* Or to just after a linebreak for a multiline match if possible */    /* Or to just after a linebreak for a multiline match if possible */
# Line 4678  for(;;) Line 4694  for(;;)
4694      if (start_match > md->start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4695        {        {
4696        while (start_match <= end_subject && !WAS_NEWLINE(start_match))        while (start_match <= end_subject && !WAS_NEWLINE(start_match))
4697          start_match++;          { NEXTCHAR(start_match); }
4698    
4699        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4700        and we are now at a LF, advance the match position by one more character.        and we are now at a LF, advance the match position by one more character.
# Line 4699  for(;;) Line 4715  for(;;)
4715      while (start_match < end_subject)      while (start_match < end_subject)
4716        {        {
4717        register unsigned int c = *start_match;        register unsigned int c = *start_match;
4718        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;        if ((start_bits[c/8] & (1 << (c&7))) == 0)
4719            { NEXTCHAR(start_match); }
4720          else break;
4721        }        }
4722      }      }
4723    

Legend:
Removed from v.236  
changed lines
  Added in v.351

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12