/[pcre]/code/trunk/pcre_valid_utf8.c
ViewVC logotype

Diff of /code/trunk/pcre_valid_utf8.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 409 by ph10, Sat Mar 28 17:10:56 2009 UTC revision 569 by ph10, Sun Nov 7 16:14:50 2010 UTC
# Line 72  Arguments: Line 72  Arguments:
72    
73  Returns:       < 0    if the string is a valid UTF-8 string  Returns:       < 0    if the string is a valid UTF-8 string
74                 >= 0   otherwise; the value is the offset of the bad byte                 >= 0   otherwise; the value is the offset of the bad byte
75    
76    Bad bytes can be:
77    
78      . An isolated byte whose most significant bits are 0x80, because this
79        can only correctly appear within a UTF-8 character;
80    
81      . A byte whose most significant bits are 0xc0, but whose other bits indicate
82        that there are more than 3 additional bytes (i.e. an RFC 2279 starting
83        byte, which is no longer valid under RFC 3629);
84    
85      .
86    
87    The returned offset may also be equal to the length of the string; this means
88    that one or more bytes is missing from the final UTF-8 character.
89  */  */
90    
91  int  int
# Line 93  for (p = string; length-- > 0; p++) Line 107  for (p = string; length-- > 0; p++)
107    if (c < 128) continue;    if (c < 128) continue;
108    if (c < 0xc0) return p - string;    if (c < 0xc0) return p - string;
109    ab = _pcre_utf8_table4[c & 0x3f];     /* Number of additional bytes */    ab = _pcre_utf8_table4[c & 0x3f];     /* Number of additional bytes */
110    if (length < ab || ab > 3) return p - string;    if (ab > 3) return p - string;        /* Too many for RFC 3629 */
111      if (length < ab) return p + 1 + length - string;   /* Missing bytes */
112    length -= ab;    length -= ab;
113    
114    /* Check top bits in the second byte */    /* Check top bits in the second byte */

Legend:
Removed from v.409  
changed lines
  Added in v.569

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12