/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 142 by ph10, Fri Mar 30 15:55:18 2007 UTC revision 143 by ph10, Mon Apr 2 10:08:14 2007 UTC
# Line 1972  while (!done) Line 1972  while (!done)
1972    
1973      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1974        {        {
       int gany_fudge;  
1975        if (timeitm > 0)        if (timeitm > 0)
1976          {          {
1977          register int i;          register int i;
# Line 2212  while (!done) Line 2211  while (!done)
2211          }          }
2212    
2213        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2214        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2215        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2216        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2217        offset values to achieve this. We won't be at the end of the string -  
2218        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any".
2219          If the previous match was at the end of a line terminated by CRLF, an
2220          advance of one character just passes the \r, whereas we should prefer the
2221          longer newline sequence, as does the code in pcre_exec(). Fudge the
2222          offset value to achieve this.
2223    
2224          Otherwise, in the case of UTF-8 matching, the advance must be one
2225          character, not one byte. */
2226    
2227        else        else
2228          {          {
# Line 2224  while (!done) Line 2230  while (!done)
2230            {            {
2231            int onechar = 1;            int onechar = 1;
2232            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2233            if (use_utf8)            if ((((real_pcre *)re)->options & PCRE_NEWLINE_BITS) ==
2234                      PCRE_NEWLINE_ANY &&
2235                  start_offset < len - 1 &&
2236                  bptr[start_offset] == '\r' &&
2237                  bptr[start_offset+1] == '\n')
2238                onechar++;
2239              else if (use_utf8)
2240              {              {
2241              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2242                {                {
# Line 2256  while (!done) Line 2268  while (!done)
2268        what Perl's /g options does. This turns out to be rather cunning. First        what Perl's /g options does. This turns out to be rather cunning. First
2269        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2270        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2271        character.        character. */
   
       Yet more complication arises in the case when the newline option is  
       "any" and a pattern in multiline mode has to match at the start of a  
       line. If a previous match was at the end of a line, and advance of one  
       character just passes the \r, whereas we should prefer the longer newline  
       sequence, as does the code in pcre_exec(). So we fudge it. */  
2272    
2273        g_notempty = 0;        g_notempty = 0;
       gany_fudge = 0;  
2274    
2275        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2276          {          {
2277          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2278          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
         if ((((real_pcre *)re)->options & PCRE_STARTLINE) != 0 &&  
             (((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&  
             use_offsets[0] < len - 1 &&  
             bptr[use_offsets[0]] == '\r' &&  
             bptr[use_offsets[0]+1] == '\n')  
           gany_fudge = 1;  
2279          }          }
2280    
2281        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
2282    
2283        if (do_g) start_offset = use_offsets[1] + gany_fudge;        if (do_g) start_offset = use_offsets[1];
2284    
2285        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
2286    
2287        else        else
2288          {          {
2289          bptr += use_offsets[1] + gany_fudge;          bptr += use_offsets[1];
2290          len -= use_offsets[1] + gany_fudge;          len -= use_offsets[1];
2291          }          }
2292        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2293    

Legend:
Removed from v.142  
changed lines
  Added in v.143

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12