/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 298 by ph10, Thu Jan 10 17:09:12 2008 UTC revision 381 by ph10, Tue Mar 3 16:08:23 2009 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2009 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 158  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 761  for (;;) Line 787  for (;;)
787    
788      case OP_COND:      case OP_COND:
789      case OP_SCOND:      case OP_SCOND:
790        /* Because of the way auto-callout works during compile, a callout item is
791        inserted between OP_COND and an assertion condition. */
792    
793        if (ecode[LINK_SIZE+1] == OP_CALLOUT)
794          {
795          if (pcre_callout != NULL)
796            {
797            pcre_callout_block cb;
798            cb.version          = 1;   /* Version 1 of the callout block */
799            cb.callout_number   = ecode[LINK_SIZE+2];
800            cb.offset_vector    = md->offset_vector;
801            cb.subject          = (PCRE_SPTR)md->start_subject;
802            cb.subject_length   = md->end_subject - md->start_subject;
803            cb.start_match      = mstart - md->start_subject;
804            cb.current_position = eptr - md->start_subject;
805            cb.pattern_position = GET(ecode, LINK_SIZE + 3);
806            cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
807            cb.capture_top      = offset_top/2;
808            cb.capture_last     = md->capture_last;
809            cb.callout_data     = md->callout_data;
810            if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
811            if (rrc < 0) RRETURN(rrc);
812            }
813          ecode += _pcre_OP_lengths[OP_CALLOUT];
814          }
815    
816        /* Now see what the actual condition is */
817    
818      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */      if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
819        {        {
820        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/        offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
# Line 1148  for (;;) Line 1202  for (;;)
1202      do ecode += GET(ecode,1); while (*ecode == OP_ALT);      do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1203      break;      break;
1204    
1205      /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating      /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1206      that it may occur zero times. It may repeat infinitely, or not at all -      indicating that it may occur zero times. It may repeat infinitely, or not
1207      i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper      at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1208      repeat limits are compiled as a number of copies, with the optional ones      with fixed upper repeat limits are compiled as a number of copies, with the
1209      preceded by BRAZERO or BRAMINZERO. */      optional ones preceded by BRAZERO or BRAMINZERO. */
1210    
1211      case OP_BRAZERO:      case OP_BRAZERO:
1212        {        {
# Line 1174  for (;;) Line 1228  for (;;)
1228        }        }
1229      break;      break;
1230    
1231        case OP_SKIPZERO:
1232          {
1233          next = ecode+1;
1234          do next += GET(next,1); while (*next == OP_ALT);
1235          ecode = next + 1 + LINK_SIZE;
1236          }
1237        break;
1238    
1239      /* End of a group, repeated or non-repeating. */      /* End of a group, repeated or non-repeating. */
1240    
1241      case OP_KET:      case OP_KET:
# Line 1421  for (;;) Line 1483  for (;;)
1483      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
1484    
1485      case OP_ANY:      case OP_ANY:
1486      if ((ims & PCRE_DOTALL) == 0)      if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
1487        {      /* Fall through */
1488        if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
1489        }      case OP_ALLANY:
1490      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
1491      if (utf8)      if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
       while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
1492      ecode++;      ecode++;
1493      break;      break;
1494    
# Line 1646  for (;;) Line 1707  for (;;)
1707      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1708      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1709        {        {
1710        int chartype, script;        const ucd_record * prop = GET_UCD(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1711    
1712        switch(ecode[1])        switch(ecode[1])
1713          {          {
# Line 1656  for (;;) Line 1716  for (;;)
1716          break;          break;
1717    
1718          case PT_LAMP:          case PT_LAMP:
1719          if ((chartype == ucp_Lu ||          if ((prop->chartype == ucp_Lu ||
1720               chartype == ucp_Ll ||               prop->chartype == ucp_Ll ||
1721               chartype == ucp_Lt) == (op == OP_NOTPROP))               prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
1722            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1723           break;           break;
1724    
1725          case PT_GC:          case PT_GC:
1726          if ((ecode[2] != category) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1727            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1728          break;          break;
1729    
1730          case PT_PC:          case PT_PC:
1731          if ((ecode[2] != chartype) == (op == OP_PROP))          if ((ecode[2] != prop->chartype) == (op == OP_PROP))
1732            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1733          break;          break;
1734    
1735          case PT_SC:          case PT_SC:
1736          if ((ecode[2] != script) == (op == OP_PROP))          if ((ecode[2] != prop->script) == (op == OP_PROP))
1737            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1738          break;          break;
1739    
# Line 1692  for (;;) Line 1752  for (;;)
1752      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);      if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
1753      GETCHARINCTEST(c, eptr);      GETCHARINCTEST(c, eptr);
1754        {        {
1755        int chartype, script;        int category = UCD_CATEGORY(c);
       int category = _pcre_ucp_findprop(c, &chartype, &script);  
1756        if (category == ucp_M) RRETURN(MATCH_NOMATCH);        if (category == ucp_M) RRETURN(MATCH_NOMATCH);
1757        while (eptr < md->end_subject)        while (eptr < md->end_subject)
1758          {          {
# Line 1702  for (;;) Line 1761  for (;;)
1761            {            {
1762            GETCHARLEN(c, eptr, len);            GETCHARLEN(c, eptr, len);
1763            }            }
1764          category = _pcre_ucp_findprop(c, &chartype, &script);          category = UCD_CATEGORY(c);
1765          if (category != ucp_M) break;          if (category != ucp_M) break;
1766          eptr += len;          eptr += len;
1767          }          }
# Line 1723  for (;;) Line 1782  for (;;)
1782      case OP_REF:      case OP_REF:
1783        {        {
1784        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */        offset = GET2(ecode, 1) << 1;               /* Doubled ref number */
1785        ecode += 3;                                 /* Advance past item */        ecode += 3;
1786    
1787        /* If the reference is unset, set the length to be longer than the amount        /* If the reference is unset, there are two possibilities:
1788        of subject left; this ensures that every attempt at a match fails. We  
1789        can't just fail here, because of the possibility of quantifiers with zero        (a) In the default, Perl-compatible state, set the length to be longer
1790        minima. */        than the amount of subject left; this ensures that every attempt at a
1791          match fails. We can't just fail here, because of the possibility of
1792        length = (offset >= offset_top || md->offset_vector[offset] < 0)?        quantifiers with zero minima.
1793          md->end_subject - eptr + 1 :  
1794          md->offset_vector[offset+1] - md->offset_vector[offset];        (b) If the JavaScript compatibility flag is set, set the length to zero
1795          so that the back reference matches an empty string.
1796    
1797          Otherwise, set the length to the length of what was matched by the
1798          referenced subpattern. */
1799    
1800          if (offset >= offset_top || md->offset_vector[offset] < 0)
1801            length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1;
1802          else
1803            length = md->offset_vector[offset+1] - md->offset_vector[offset];
1804    
1805        /* Set up for repetition, or handle the non-repeated case */        /* Set up for repetition, or handle the non-repeated case */
1806    
# Line 2158  for (;;) Line 2226  for (;;)
2226          if (fc != dc)          if (fc != dc)
2227            {            {
2228  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2229            if (dc != _pcre_ucp_othercase(fc))            if (dc != UCD_OTHERCASE(fc))
2230  #endif  #endif
2231              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
2232            }            }
# Line 2249  for (;;) Line 2317  for (;;)
2317  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2318          unsigned int othercase;          unsigned int othercase;
2319          if ((ims & PCRE_CASELESS) != 0 &&          if ((ims & PCRE_CASELESS) != 0 &&
2320              (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)              (othercase = UCD_OTHERCASE(fc)) != fc)
2321            oclength = _pcre_ord2utf8(othercase, occhars);            oclength = _pcre_ord2utf8(othercase, occhars);
2322          else oclength = 0;          else oclength = 0;
2323  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
# Line 2569  for (;;) Line 2637  for (;;)
2637              {              {
2638              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2639              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2640                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2641              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2642              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2643              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
2644                RRETURN(MATCH_NOMATCH);  
2645              }              }
2646            }            }
2647          else          else
# Line 2678  for (;;) Line 2747  for (;;)
2747              {              {
2748              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2749              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2750                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2751              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2752              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2753              }              }
2754            }            }
2755          else          else
# Line 2854  for (;;) Line 2923  for (;;)
2923              {              {
2924              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2925              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2926              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2927              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
2928                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
2929                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 2867  for (;;) Line 2936  for (;;)
2936              {              {
2937              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2938              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2939              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2940              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
2941                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2942              }              }
# Line 2878  for (;;) Line 2947  for (;;)
2947              {              {
2948              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2949              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2950              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
2951              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
2952                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2953              }              }
# Line 2889  for (;;) Line 2958  for (;;)
2958              {              {
2959              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2960              GETCHARINCTEST(c, eptr);              GETCHARINCTEST(c, eptr);
2961              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
2962              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
2963                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
2964              }              }
# Line 2908  for (;;) Line 2977  for (;;)
2977          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
2978            {            {
2979            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
2980            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
2981            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
2982            while (eptr < md->end_subject)            while (eptr < md->end_subject)
2983              {              {
# Line 2917  for (;;) Line 2986  for (;;)
2986                {                {
2987                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
2988                }                }
2989              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
2990              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
2991              eptr += len;              eptr += len;
2992              }              }
# Line 2935  for (;;) Line 3004  for (;;)
3004          case OP_ANY:          case OP_ANY:
3005          for (i = 1; i <= min; i++)          for (i = 1; i <= min; i++)
3006            {            {
3007            if (eptr >= md->end_subject ||            if (eptr >= md->end_subject || IS_NEWLINE(eptr))
                ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))  
3008              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3009            eptr++;            eptr++;
3010            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;            while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3011            }            }
3012          break;          break;
3013    
3014            case OP_ALLANY:
3015            for (i = 1; i <= min; i++)
3016              {
3017              if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3018              eptr++;
3019              while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3020              }
3021            break;
3022    
3023          case OP_ANYBYTE:          case OP_ANYBYTE:
3024          eptr += min;          eptr += min;
3025          break;          break;
# Line 3151  for (;;) Line 3228  for (;;)
3228        switch(ctype)        switch(ctype)
3229          {          {
3230          case OP_ANY:          case OP_ANY:
3231          if ((ims & PCRE_DOTALL) == 0)          for (i = 1; i <= min; i++)
3232            {            {
3233            for (i = 1; i <= min; i++)            if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
3234              {            eptr++;
             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);  
             eptr++;  
             }  
3235            }            }
3236          else eptr += min;          break;
3237    
3238            case OP_ALLANY:
3239            eptr += min;
3240          break;          break;
3241    
3242          case OP_ANYBYTE:          case OP_ANYBYTE:
# Line 3325  for (;;) Line 3402  for (;;)
3402              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3403              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3404              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3405              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3406              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3407                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3408                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3340  for (;;) Line 3417  for (;;)
3417              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3418              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3419              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3420              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3421              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3422                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3423              }              }
# Line 3353  for (;;) Line 3430  for (;;)
3430              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3431              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3432              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3433              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3434              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3435                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3436              }              }
# Line 3366  for (;;) Line 3443  for (;;)
3443              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3444              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);              if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3445              GETCHARINC(c, eptr);              GETCHARINC(c, eptr);
3446              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3447              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3448                RRETURN(MATCH_NOMATCH);                RRETURN(MATCH_NOMATCH);
3449              }              }
# Line 3388  for (;;) Line 3465  for (;;)
3465            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3466            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
3467            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3468            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3469            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);            if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
3470            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3471              {              {
# Line 3397  for (;;) Line 3474  for (;;)
3474                {                {
3475                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3476                }                }
3477              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3478              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3479              eptr += len;              eptr += len;
3480              }              }
# Line 3416  for (;;) Line 3493  for (;;)
3493            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
3494            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3495            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3496                 (ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 &&                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
                 IS_NEWLINE(eptr)))  
3497              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3498    
3499            GETCHARINC(c, eptr);            GETCHARINC(c, eptr);
3500            switch(ctype)            switch(ctype)
3501              {              {
3502              case OP_ANY:        /* This is the DOTALL case */              case OP_ANY:        /* This is the non-NL case */
3503              break;              case OP_ALLANY:
   
3504              case OP_ANYBYTE:              case OP_ANYBYTE:
3505              break;              break;
3506    
# Line 3577  for (;;) Line 3652  for (;;)
3652            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
3653            if (rrc != MATCH_NOMATCH) RRETURN(rrc);            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3654            if (fi >= max || eptr >= md->end_subject ||            if (fi >= max || eptr >= md->end_subject ||
3655                 ((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr)))                 (ctype == OP_ANY && IS_NEWLINE(eptr)))
3656              RRETURN(MATCH_NOMATCH);              RRETURN(MATCH_NOMATCH);
3657    
3658            c = *eptr++;            c = *eptr++;
3659            switch(ctype)            switch(ctype)
3660              {              {
3661              case OP_ANY:   /* This is the DOTALL case */              case OP_ANY:     /* This is the non-NL case */
3662              break;              case OP_ALLANY:
   
3663              case OP_ANYBYTE:              case OP_ANYBYTE:
3664              break;              break;
3665    
# Line 3718  for (;;) Line 3792  for (;;)
3792              int len = 1;              int len = 1;
3793              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3794              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3795              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3796              if ((prop_chartype == ucp_Lu ||              if ((prop_chartype == ucp_Lu ||
3797                   prop_chartype == ucp_Ll ||                   prop_chartype == ucp_Ll ||
3798                   prop_chartype == ucp_Lt) == prop_fail_result)                   prop_chartype == ucp_Lt) == prop_fail_result)
# Line 3733  for (;;) Line 3807  for (;;)
3807              int len = 1;              int len = 1;
3808              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3809              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3810              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3811              if ((prop_category == prop_value) == prop_fail_result)              if ((prop_category == prop_value) == prop_fail_result)
3812                break;                break;
3813              eptr+= len;              eptr+= len;
# Line 3746  for (;;) Line 3820  for (;;)
3820              int len = 1;              int len = 1;
3821              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3822              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3823              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_chartype = UCD_CHARTYPE(c);
3824              if ((prop_chartype == prop_value) == prop_fail_result)              if ((prop_chartype == prop_value) == prop_fail_result)
3825                break;                break;
3826              eptr+= len;              eptr+= len;
# Line 3759  for (;;) Line 3833  for (;;)
3833              int len = 1;              int len = 1;
3834              if (eptr >= md->end_subject) break;              if (eptr >= md->end_subject) break;
3835              GETCHARLEN(c, eptr, len);              GETCHARLEN(c, eptr, len);
3836              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_script = UCD_SCRIPT(c);
3837              if ((prop_script == prop_value) == prop_fail_result)              if ((prop_script == prop_value) == prop_fail_result)
3838                break;                break;
3839              eptr+= len;              eptr+= len;
# Line 3788  for (;;) Line 3862  for (;;)
3862            {            {
3863            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3864            GETCHARINCTEST(c, eptr);            GETCHARINCTEST(c, eptr);
3865            prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);            prop_category = UCD_CATEGORY(c);
3866            if (prop_category == ucp_M) break;            if (prop_category == ucp_M) break;
3867            while (eptr < md->end_subject)            while (eptr < md->end_subject)
3868              {              {
# Line 3797  for (;;) Line 3871  for (;;)
3871                {                {
3872                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3873                }                }
3874              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3875              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3876              eptr += len;              eptr += len;
3877              }              }
# Line 3819  for (;;) Line 3893  for (;;)
3893                BACKCHAR(eptr);                BACKCHAR(eptr);
3894                GETCHARLEN(c, eptr, len);                GETCHARLEN(c, eptr, len);
3895                }                }
3896              prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);              prop_category = UCD_CATEGORY(c);
3897              if (prop_category != ucp_M) break;              if (prop_category != ucp_M) break;
3898              eptr--;              eptr--;
3899              }              }
# Line 3839  for (;;) Line 3913  for (;;)
3913            case OP_ANY:            case OP_ANY:
3914            if (max < INT_MAX)            if (max < INT_MAX)
3915              {              {
3916              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
               {  
               for (i = min; i < max; i++)  
                 {  
                 if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
               }  
             else  
3917                {                {
3918                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3919                  {                eptr++;
3920                  if (eptr >= md->end_subject) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3921                }                }
3922              }              }
3923    
# Line 3863  for (;;) Line 3925  for (;;)
3925    
3926            else            else
3927              {              {
3928              if ((ims & PCRE_DOTALL) == 0)              for (i = min; i < max; i++)
3929                {                {
3930                for (i = min; i < max; i++)                if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
3931                  {                eptr++;
3932                  if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;                while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
                 eptr++;  
                 while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;  
                 }  
3933                }                }
3934              else              }
3935              break;
3936    
3937              case OP_ALLANY:
3938              if (max < INT_MAX)
3939                {
3940                for (i = min; i < max; i++)
3941                {                {
3942                eptr = md->end_subject;                if (eptr >= md->end_subject) break;
3943                  eptr++;
3944                  while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
3945                }                }
3946              }              }
3947              else eptr = md->end_subject;   /* Unlimited UTF-8 repeat */
3948            break;            break;
3949    
3950            /* The byte case is the same as non-UTF8 */            /* The byte case is the same as non-UTF8 */
# Line 4064  for (;;) Line 4132  for (;;)
4132          switch(ctype)          switch(ctype)
4133            {            {
4134            case OP_ANY:            case OP_ANY:
4135            if ((ims & PCRE_DOTALL) == 0)            for (i = min; i < max; i++)
4136              {              {
4137              for (i = min; i < max; i++)              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
4138                {              eptr++;
               if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;  
               eptr++;  
               }  
             break;  
4139              }              }
4140            /* For DOTALL case, fall through and treat as \C */            break;
4141    
4142              case OP_ALLANY:
4143            case OP_ANYBYTE:            case OP_ANYBYTE:
4144            c = max - min;            c = max - min;
4145            if (c > (unsigned int)(md->end_subject - eptr))            if (c > (unsigned int)(md->end_subject - eptr))
# Line 4348  Returns: > 0 => success; value Line 4413  Returns: > 0 => success; value
4413                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4414  */  */
4415    
4416  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4417  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4418    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4419    int offsetcount)    int offsetcount)
# Line 4450  end_subject = md->end_subject; Line 4515  end_subject = md->end_subject;
4515    
4516  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
4517  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0;
4518    md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
4519    
4520  md->notbol = (options & PCRE_NOTBOL) != 0;  md->notbol = (options & PCRE_NOTBOL) != 0;
4521  md->noteol = (options & PCRE_NOTEOL) != 0;  md->noteol = (options & PCRE_NOTEOL) != 0;
# Line 4659  for(;;) Line 4725  for(;;)
4725    if (firstline)    if (firstline)
4726      {      {
4727      USPTR t = start_match;      USPTR t = start_match;
4728    #ifdef SUPPORT_UTF8
4729        if (utf8)
4730          {
4731          while (t < md->end_subject && !IS_NEWLINE(t))
4732            {
4733            t++;
4734            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
4735            }
4736          }
4737        else
4738    #endif
4739      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4740      end_subject = t;      end_subject = t;
4741      }      }
4742    
4743    /* Now test for a unique first byte */    /* Now advance to a unique first byte if there is one. */
4744    
4745    if (first_byte >= 0)    if (first_byte >= 0)
4746      {      {
4747      if (first_byte_caseless)      if (first_byte_caseless)
4748        while (start_match < end_subject &&        while (start_match < end_subject && md->lcc[*start_match] != first_byte)
4749               md->lcc[*start_match] != first_byte)          start_match++;
         { NEXTCHAR(start_match); }  
4750      else      else
4751        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4752          { NEXTCHAR(start_match); }          start_match++;
4753      }      }
4754    
4755    /* Or to just after a linebreak for a multiline match if possible */    /* Or to just after a linebreak for a multiline match */
4756    
4757    else if (startline)    else if (startline)
4758      {      {
4759      if (start_match > md->start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4760        {        {
4761        while (start_match <= end_subject && !WAS_NEWLINE(start_match))  #ifdef SUPPORT_UTF8
4762          { NEXTCHAR(start_match); }        if (utf8)
4763            {
4764            while (start_match < end_subject && !WAS_NEWLINE(start_match))
4765              {
4766              start_match++;
4767              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4768                start_match++;
4769              }
4770            }
4771          else
4772    #endif
4773          while (start_match < end_subject && !WAS_NEWLINE(start_match))
4774            start_match++;
4775    
4776        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4777        and we are now at a LF, advance the match position by one more character.        and we are now at a LF, advance the match position by one more character.
# Line 4697  for(;;) Line 4785  for(;;)
4785        }        }
4786      }      }
4787    
4788    /* Or to a non-unique first char after study */    /* Or to a non-unique first byte after study */
4789    
4790    else if (start_bits != NULL)    else if (start_bits != NULL)
4791      {      {
4792      while (start_match < end_subject)      while (start_match < end_subject)
4793        {        {
4794        register unsigned int c = *start_match;        register unsigned int c = *start_match;
4795        if ((start_bits[c/8] & (1 << (c&7))) == 0)        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
4796          { NEXTCHAR(start_match); }          else break;
       else break;  
4797        }        }
4798      }      }
4799    

Legend:
Removed from v.298  
changed lines
  Added in v.381

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12