/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 349 by ph10, Wed Jul 2 18:42:11 2008 UTC revision 371 by ph10, Mon Aug 25 18:28:05 2008 UTC
# Line 158  printf("\n"); Line 158  printf("\n");
158    
159  if (length > md->end_subject - eptr) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
160    
161  /* Separate the caselesss case for speed */  /* Separate the caseless case for speed. In UTF-8 mode we can only do this
162    properly if Unicode properties are supported. Otherwise, we can check only
163    ASCII characters. */
164    
165  if ((ims & PCRE_CASELESS) != 0)  if ((ims & PCRE_CASELESS) != 0)
166    {    {
167    #ifdef SUPPORT_UTF8
168    #ifdef SUPPORT_UCP
169      if (md->utf8)
170        {
171        USPTR endptr = eptr + length;
172        while (eptr < endptr)
173          {
174          int c, d;
175          GETCHARINC(c, eptr);
176          GETCHARINC(d, p);
177          if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
178          }
179        }
180      else
181    #endif
182    #endif
183    
184      /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
185      is no UCP support. */
186    
187    while (length-- > 0)    while (length-- > 0)
188      if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;      { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
189    }    }
190    
191    /* In the caseful case, we can just compare the bytes, whether or not we
192    are in UTF-8 mode. */
193    
194  else  else
195    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
196    
# Line 1669  for (;;) Line 1695  for (;;)
1695           break;           break;
1696    
1697          case PT_GC:          case PT_GC:
1698          if ((ecode[2] != ucp_gentype[prop->chartype]) == (op == OP_PROP))          if ((ecode[2] != _pcre_ucp_gentype[prop->chartype]) == (op == OP_PROP))
1699            RRETURN(MATCH_NOMATCH);            RRETURN(MATCH_NOMATCH);
1700          break;          break;
1701    
# Line 2583  for (;;) Line 2609  for (;;)
2609              {              {
2610              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
2611              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2612                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2613              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2614              if (d < 256) d = md->lcc[d];              if (d < 256) d = md->lcc[d];
2615              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
2616                RRETURN(MATCH_NOMATCH);  
2617              }              }
2618            }            }
2619          else          else
# Line 2692  for (;;) Line 2719  for (;;)
2719              {              {
2720              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);              RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
2721              if (rrc != MATCH_NOMATCH) RRETURN(rrc);              if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2722                if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
2723              GETCHARINC(d, eptr);              GETCHARINC(d, eptr);
2724              if (fi >= max || eptr >= md->end_subject || fc == d)              if (fc == d) RRETURN(MATCH_NOMATCH);
               RRETURN(MATCH_NOMATCH);  
2725              }              }
2726            }            }
2727          else          else
# Line 4358  Returns: > 0 => success; value Line 4385  Returns: > 0 => success; value
4385                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
4386  */  */
4387    
4388  PCRE_EXP_DEFN int  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
4389  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
4390    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,    PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
4391    int offsetcount)    int offsetcount)
# Line 4670  for(;;) Line 4697  for(;;)
4697    if (firstline)    if (firstline)
4698      {      {
4699      USPTR t = start_match;      USPTR t = start_match;
4700    #ifdef SUPPORT_UTF8
4701        if (utf8)
4702          {
4703          while (t < md->end_subject && !IS_NEWLINE(t))
4704            {
4705            t++;
4706            while (t < end_subject && (*t & 0xc0) == 0x80) t++;
4707            }
4708          }
4709        else
4710    #endif
4711      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4712      end_subject = t;      end_subject = t;
4713      }      }
4714    
4715    /* Now test for a unique first byte */    /* Now advance to a unique first byte if there is one. */
4716    
4717    if (first_byte >= 0)    if (first_byte >= 0)
4718      {      {
4719      if (first_byte_caseless)      if (first_byte_caseless)
4720        while (start_match < end_subject &&        while (start_match < end_subject && md->lcc[*start_match] != first_byte)
4721               md->lcc[*start_match] != first_byte)          start_match++;
         { NEXTCHAR(start_match); }  
4722      else      else
4723        while (start_match < end_subject && *start_match != first_byte)        while (start_match < end_subject && *start_match != first_byte)
4724          { NEXTCHAR(start_match); }          start_match++;
4725      }      }
4726    
4727    /* Or to just after a linebreak for a multiline match if possible */    /* Or to just after a linebreak for a multiline match */
4728    
4729    else if (startline)    else if (startline)
4730      {      {
4731      if (start_match > md->start_subject + start_offset)      if (start_match > md->start_subject + start_offset)
4732        {        {
4733        while (start_match <= end_subject && !WAS_NEWLINE(start_match))  #ifdef SUPPORT_UTF8
4734          { NEXTCHAR(start_match); }        if (utf8)
4735            {
4736            while (start_match < end_subject && !WAS_NEWLINE(start_match))
4737              {
4738              start_match++;
4739              while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4740                start_match++;
4741              }
4742            }
4743          else
4744    #endif
4745          while (start_match < end_subject && !WAS_NEWLINE(start_match))
4746            start_match++;
4747    
4748        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4749        and we are now at a LF, advance the match position by one more character.        and we are now at a LF, advance the match position by one more character.
# Line 4708  for(;;) Line 4757  for(;;)
4757        }        }
4758      }      }
4759    
4760    /* Or to a non-unique first char after study */    /* Or to a non-unique first byte after study */
4761    
4762    else if (start_bits != NULL)    else if (start_bits != NULL)
4763      {      {
4764      while (start_match < end_subject)      while (start_match < end_subject)
4765        {        {
4766        register unsigned int c = *start_match;        register unsigned int c = *start_match;
4767        if ((start_bits[c/8] & (1 << (c&7))) == 0)        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
4768          { NEXTCHAR(start_match); }          else break;
       else break;  
4769        }        }
4770      }      }
4771    

Legend:
Removed from v.349  
changed lines
  Added in v.371

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12