/[pcre]/code/trunk/study.c
ViewVC logotype

Diff of /code/trunk/study.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 63 by nigel, Sat Feb 24 21:40:03 2007 UTC revision 71 by nigel, Sat Feb 24 21:40:24 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997-2002 University of Cambridge             Copyright (c) 1997-2003 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 297  do Line 297  do
297        /* Character class where all the information is in a bit map: set the        /* Character class where all the information is in a bit map: set the
298        bits and either carry on or not, according to the repeat count. If it was        bits and either carry on or not, according to the repeat count. If it was
299        a negative class, and we are operating with UTF-8 characters, any byte        a negative class, and we are operating with UTF-8 characters, any byte
300        with the top-bit set is a potentially valid starter because it may start        with a value >= 0xc4 is a potentially valid starter because it starts a
301        a character with a value > 255. (This is sub-optimal in that the        character with a value > 255. */
       character may be in the range 128-255, and those characters might be  
       unwanted, but that's as far as we go for the moment.) */  
302    
303        case OP_NCLASS:        case OP_NCLASS:
304        if (utf8) memset(start_bits+16, 0xff, 16);        if (utf8)
305            {
306            start_bits[24] |= 0xf0;              /* Bits for 0xc4 - 0xc8 */
307            memset(start_bits+25, 0xff, 7);      /* Bits for 0xc9 - 0xff */
308            }
309        /* Fall through */        /* Fall through */
310    
311        case OP_CLASS:        case OP_CLASS:
312          {          {
313          tcode++;          tcode++;
314          for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];  
315            /* In UTF-8 mode, the bits in a bit map correspond to character
316            values, not to byte values. However, the bit map we are constructing is
317            for byte values. So we have to do a conversion for characters whose
318            value is > 127. In fact, there are only two possible starting bytes for
319            characters in the range 128 - 255. */
320    
321            if (utf8)
322              {
323              for (c = 0; c < 16; c++) start_bits[c] |= tcode[c];
324              for (c = 128; c < 256; c++)
325                {
326                if ((tcode[c/8] && (1 << (c&7))) != 0)
327                  {
328                  int d = (c >> 6) | 0xc0;            /* Set bit for this starter */
329                  start_bits[d/8] |= (1 << (d&7));    /* and then skip on to the */
330                  c = (c & 0xc0) + 0x40 - 1;          /* next relevant character. */
331                  }
332                }
333              }
334    
335            /* In non-UTF-8 mode, the two bit maps are completely compatible. */
336    
337            else
338              {
339              for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
340              }
341    
342            /* Advance past the bit map, and act on what follows */
343    
344          tcode += 32;          tcode += 32;
345          switch (*tcode)          switch (*tcode)
346            {            {

Legend:
Removed from v.63  
changed lines
  Added in v.71

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12