/[pcre]/code/trunk/pcre_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 384 by ph10, Sun Mar 8 16:27:43 2009 UTC revision 389 by ph10, Sun Mar 15 18:24:05 2009 UTC
# Line 4716  for(;;) Line 4716  for(;;)
4716      register int *iend = iptr + resetcount;      register int *iend = iptr + resetcount;
4717      while (iptr < iend) *iptr++ = -1;      while (iptr < iend) *iptr++ = -1;
4718      }      }
4719    
4720    /* Advance to a unique first char if possible. If firstline is TRUE, the    /* If firstline is TRUE, the start of the match is constrained to the first
4721    start of the match is constrained to the first line of a multiline string.    line of a multiline string. That is, the match must be before or at the first
4722    That is, the match must be before or at the first newline. Implement this by    newline. Implement this by temporarily adjusting end_subject so that we stop
4723    temporarily adjusting end_subject so that we stop scanning at a newline. If    scanning at a newline. If the match fails at the newline, later code breaks
4724    the match fails at the newline, later code breaks this loop. */    this loop. */
4725    
4726    if (firstline)    if (firstline)
4727      {      {
# Line 4740  for(;;) Line 4740  for(;;)
4740      while (t < md->end_subject && !IS_NEWLINE(t)) t++;      while (t < md->end_subject && !IS_NEWLINE(t)) t++;
4741      end_subject = t;      end_subject = t;
4742      }      }
4743    
4744      /* There are some optimizations that avoid running the match if a known
4745      starting point is not found, or if a known later character is not present.
4746      However, there is an option that disables these, for testing and for ensuring
4747      that all callouts do actually occur. */
4748    
4749      if ((options & PCRE_NO_START_OPTIMIZE) == 0)
4750        {
4751        /* Advance to a unique first byte if there is one. */
4752    
4753    /* Now advance to a unique first byte if there is one. */      if (first_byte >= 0)
   
   if (first_byte >= 0)  
     {  
     if (first_byte_caseless)  
       while (start_match < end_subject && md->lcc[*start_match] != first_byte)  
         start_match++;  
     else  
       while (start_match < end_subject && *start_match != first_byte)  
         start_match++;  
     }  
   
   /* Or to just after a linebreak for a multiline match */  
   
   else if (startline)  
     {  
     if (start_match > md->start_subject + start_offset)  
4754        {        {
4755  #ifdef SUPPORT_UTF8        if (first_byte_caseless)
4756        if (utf8)          while (start_match < end_subject && md->lcc[*start_match] != first_byte)
4757              start_match++;
4758          else
4759            while (start_match < end_subject && *start_match != first_byte)
4760              start_match++;
4761          }
4762    
4763        /* Or to just after a linebreak for a multiline match */
4764    
4765        else if (startline)
4766          {
4767          if (start_match > md->start_subject + start_offset)
4768          {          {
4769          while (start_match < end_subject && !WAS_NEWLINE(start_match))  #ifdef SUPPORT_UTF8
4770            if (utf8)
4771            {            {
4772            start_match++;            while (start_match < end_subject && !WAS_NEWLINE(start_match))
4773            while(start_match < end_subject && (*start_match & 0xc0) == 0x80)              {
4774              start_match++;              start_match++;
4775                while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
4776                  start_match++;
4777                }
4778            }            }
4779          }          else
       else  
4780  #endif  #endif
4781        while (start_match < end_subject && !WAS_NEWLINE(start_match))          while (start_match < end_subject && !WAS_NEWLINE(start_match))
4782          start_match++;            start_match++;
4783    
4784        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,          /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
4785        and we are now at a LF, advance the match position by one more character.          and we are now at a LF, advance the match position by one more character.
4786        */          */
4787    
4788        if (start_match[-1] == '\r' &&          if (start_match[-1] == '\r' &&
4789             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
4790             start_match < end_subject &&               start_match < end_subject &&
4791             *start_match == '\n')               *start_match == '\n')
4792          start_match++;            start_match++;
4793            }
4794        }        }
4795      }  
4796        /* Or to a non-unique first byte after study */
4797    /* Or to a non-unique first byte after study */  
4798        else if (start_bits != NULL)
   else if (start_bits != NULL)  
     {  
     while (start_match < end_subject)  
4799        {        {
4800        register unsigned int c = *start_match;        while (start_match < end_subject)
4801        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;          {
4802          else break;          register unsigned int c = *start_match;
4803            if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
4804              else break;
4805            }
4806        }        }
4807      }      }   /* Starting optimizations */
4808    
4809    /* Restore fudged end_subject */    /* Restore fudged end_subject */
4810    
4811    end_subject = save_end_subject;    end_subject = save_end_subject;
4812    
4813  #ifdef DEBUG  /* Sigh. Some compilers never learn. */  #ifdef DEBUG  /* Sigh. Some compilers never learn. */
# Line 4808  for(;;) Line 4816  for(;;)
4816    printf("\n");    printf("\n");
4817  #endif  #endif
4818    
4819    /* If req_byte is set, we know that that character must appear in the subject    /* If req_byte is set, we know that that character must appear in the
4820    for the match to succeed. If the first character is set, req_byte must be    subject for the match to succeed. If the first character is set, req_byte
4821    later in the subject; otherwise the test starts at the match point. This    must be later in the subject; otherwise the test starts at the match point.
4822    optimization can save a huge amount of backtracking in patterns with nested    This optimization can save a huge amount of backtracking in patterns with
4823    unlimited repeats that aren't going to match. Writing separate code for    nested unlimited repeats that aren't going to match. Writing separate code
4824    cased/caseless versions makes it go faster, as does using an autoincrement    for cased/caseless versions makes it go faster, as does using an
4825    and backing off on a match.    autoincrement and backing off on a match.
4826    
4827    HOWEVER: when the subject string is very, very long, searching to its end can    HOWEVER: when the subject string is very, very long, searching to its end
4828    take a long time, and give bad performance on quite ordinary patterns. This    can take a long time, and give bad performance on quite ordinary patterns.
4829    showed up when somebody was matching something like /^\d+C/ on a 32-megabyte    This showed up when somebody was matching something like /^\d+C/ on a
4830    string... so we don't do this when the string is sufficiently long.    32-megabyte string... so we don't do this when the string is sufficiently
4831      long.
4832    
4833    ALSO: this processing is disabled when partial matching is requested.    ALSO: this processing is disabled when partial matching is requested, or if
4834    */    disabling is explicitly requested. */
4835    
4836    if (req_byte >= 0 &&    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
4837          req_byte >= 0 &&
4838        end_subject - start_match < REQ_BYTE_MAX &&        end_subject - start_match < REQ_BYTE_MAX &&
4839        !md->partial)        !md->partial)
4840      {      {

Legend:
Removed from v.384  
changed lines
  Added in v.389

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12