/[pcre]/code/trunk/pcre_valid_utf8.c
ViewVC logotype

Diff of /code/trunk/pcre_valid_utf8.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 835 by ph10, Wed Dec 28 16:10:09 2011 UTC revision 836 by ph10, Wed Dec 28 17:16:11 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 103  Returns: = 0 if the string is a Line 103  Returns: = 0 if the string is a
103  */  */
104    
105  int  int
106  _pcre_valid_utf8(USPTR string, int length, int *erroroffset)  PRIV(valid_utf)(PCRE_PUCHAR string, int length, int *erroroffset)
107  {  {
108  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
109  register USPTR p;  register PCRE_PUCHAR p;
110    
111  if (length < 0)  if (length < 0)
112    {    {
113    for (p = string; *p != 0; p++);    for (p = string; *p != 0; p++);
114    length = p - string;    length = (int)(p - string);
115    }    }
116    
117  for (p = string; length-- > 0; p++)  for (p = string; length-- > 0; p++)
# Line 123  for (p = string; length-- > 0; p++) Line 123  for (p = string; length-- > 0; p++)
123    
124    if (c < 0xc0)                         /* Isolated 10xx xxxx byte */    if (c < 0xc0)                         /* Isolated 10xx xxxx byte */
125      {      {
126      *erroroffset = p - string;      *erroroffset = (int)(p - string);
127      return PCRE_UTF8_ERR20;      return PCRE_UTF8_ERR20;
128      }      }
129    
130    if (c >= 0xfe)                        /* Invalid 0xfe or 0xff bytes */    if (c >= 0xfe)                        /* Invalid 0xfe or 0xff bytes */
131      {      {
132      *erroroffset = p - string;      *erroroffset = (int)(p - string);
133      return PCRE_UTF8_ERR21;      return PCRE_UTF8_ERR21;
134      }      }
135    
136    ab = _pcre_utf8_table4[c & 0x3f];     /* Number of additional bytes */    ab = PRIV(utf8_table4)[c & 0x3f];     /* Number of additional bytes */
137    if (length < ab)    if (length < ab)
138      {      {
139      *erroroffset = p - string;          /* Missing bytes */      *erroroffset = (int)(p - string);          /* Missing bytes */
140      return ab - length;                 /* Codes ERR1 to ERR5 */      return ab - length;                 /* Codes ERR1 to ERR5 */
141      }      }
142    length -= ab;                         /* Length remaining */    length -= ab;                         /* Length remaining */
# Line 145  for (p = string; length-- > 0; p++) Line 145  for (p = string; length-- > 0; p++)
145    
146    if (((d = *(++p)) & 0xc0) != 0x80)    if (((d = *(++p)) & 0xc0) != 0x80)
147      {      {
148      *erroroffset = p - string - 1;      *erroroffset = (int)(p - string) - 1;
149      return PCRE_UTF8_ERR6;      return PCRE_UTF8_ERR6;
150      }      }
151    
# Line 160  for (p = string; length-- > 0; p++) Line 160  for (p = string; length-- > 0; p++)
160    
161      case 1: if ((c & 0x3e) == 0)      case 1: if ((c & 0x3e) == 0)
162        {        {
163        *erroroffset = p - string - 1;        *erroroffset = (int)(p - string) - 1;
164        return PCRE_UTF8_ERR15;        return PCRE_UTF8_ERR15;
165        }        }
166      break;      break;
# Line 172  for (p = string; length-- > 0; p++) Line 172  for (p = string; length-- > 0; p++)
172      case 2:      case 2:
173      if ((*(++p) & 0xc0) != 0x80)     /* Third byte */      if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
174        {        {
175        *erroroffset = p - string - 2;        *erroroffset = (int)(p - string) - 2;
176        return PCRE_UTF8_ERR7;        return PCRE_UTF8_ERR7;
177        }        }
178      if (c == 0xe0 && (d & 0x20) == 0)      if (c == 0xe0 && (d & 0x20) == 0)
179        {        {
180        *erroroffset = p - string - 2;        *erroroffset = (int)(p - string) - 2;
181        return PCRE_UTF8_ERR16;        return PCRE_UTF8_ERR16;
182        }        }
183      if (c == 0xed && d >= 0xa0)      if (c == 0xed && d >= 0xa0)
184        {        {
185        *erroroffset = p - string - 2;        *erroroffset = (int)(p - string) - 2;
186        return PCRE_UTF8_ERR14;        return PCRE_UTF8_ERR14;
187        }        }
188      break;      break;
# Line 194  for (p = string; length-- > 0; p++) Line 194  for (p = string; length-- > 0; p++)
194      case 3:      case 3:
195      if ((*(++p) & 0xc0) != 0x80)     /* Third byte */      if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
196        {        {
197        *erroroffset = p - string - 2;        *erroroffset = (int)(p - string) - 2;
198        return PCRE_UTF8_ERR7;        return PCRE_UTF8_ERR7;
199        }        }
200      if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */      if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
201        {        {
202        *erroroffset = p - string - 3;        *erroroffset = (int)(p - string) - 3;
203        return PCRE_UTF8_ERR8;        return PCRE_UTF8_ERR8;
204        }        }
205      if (c == 0xf0 && (d & 0x30) == 0)      if (c == 0xf0 && (d & 0x30) == 0)
206        {        {
207        *erroroffset = p - string - 3;        *erroroffset = (int)(p - string) - 3;
208        return PCRE_UTF8_ERR17;        return PCRE_UTF8_ERR17;
209        }        }
210      if (c > 0xf4 || (c == 0xf4 && d > 0x8f))      if (c > 0xf4 || (c == 0xf4 && d > 0x8f))
211        {        {
212        *erroroffset = p - string - 3;        *erroroffset = (int)(p - string) - 3;
213        return PCRE_UTF8_ERR13;        return PCRE_UTF8_ERR13;
214        }        }
215      break;      break;
# Line 225  for (p = string; length-- > 0; p++) Line 225  for (p = string; length-- > 0; p++)
225      case 4:      case 4:
226      if ((*(++p) & 0xc0) != 0x80)     /* Third byte */      if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
227        {        {
228        *erroroffset = p - string - 2;        *erroroffset = (int)(p - string) - 2;
229        return PCRE_UTF8_ERR7;        return PCRE_UTF8_ERR7;
230        }        }
231      if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */      if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
232        {        {
233        *erroroffset = p - string - 3;        *erroroffset = (int)(p - string) - 3;
234        return PCRE_UTF8_ERR8;        return PCRE_UTF8_ERR8;
235        }        }
236      if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */      if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */
237        {        {
238        *erroroffset = p - string - 4;        *erroroffset = (int)(p - string) - 4;
239        return PCRE_UTF8_ERR9;        return PCRE_UTF8_ERR9;
240        }        }
241      if (c == 0xf8 && (d & 0x38) == 0)      if (c == 0xf8 && (d & 0x38) == 0)
242        {        {
243        *erroroffset = p - string - 4;        *erroroffset = (int)(p - string) - 4;
244        return PCRE_UTF8_ERR18;        return PCRE_UTF8_ERR18;
245        }        }
246      break;      break;
# Line 251  for (p = string; length-- > 0; p++) Line 251  for (p = string; length-- > 0; p++)
251      case 5:      case 5:
252      if ((*(++p) & 0xc0) != 0x80)     /* Third byte */      if ((*(++p) & 0xc0) != 0x80)     /* Third byte */
253        {        {
254        *erroroffset = p - string - 2;        *erroroffset = (int)(p - string) - 2;
255        return PCRE_UTF8_ERR7;        return PCRE_UTF8_ERR7;
256        }        }
257      if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */      if ((*(++p) & 0xc0) != 0x80)     /* Fourth byte */
258        {        {
259        *erroroffset = p - string - 3;        *erroroffset = (int)(p - string) - 3;
260        return PCRE_UTF8_ERR8;        return PCRE_UTF8_ERR8;
261        }        }
262      if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */      if ((*(++p) & 0xc0) != 0x80)     /* Fifth byte */
263        {        {
264        *erroroffset = p - string - 4;        *erroroffset = (int)(p - string) - 4;
265        return PCRE_UTF8_ERR9;        return PCRE_UTF8_ERR9;
266        }        }
267      if ((*(++p) & 0xc0) != 0x80)     /* Sixth byte */      if ((*(++p) & 0xc0) != 0x80)     /* Sixth byte */
268        {        {
269        *erroroffset = p - string - 5;        *erroroffset = (int)(p - string) - 5;
270        return PCRE_UTF8_ERR10;        return PCRE_UTF8_ERR10;
271        }        }
272      if (c == 0xfc && (d & 0x3c) == 0)      if (c == 0xfc && (d & 0x3c) == 0)
273        {        {
274        *erroroffset = p - string - 5;        *erroroffset = (int)(p - string) - 5;
275        return PCRE_UTF8_ERR19;        return PCRE_UTF8_ERR19;
276        }        }
277      break;      break;
# Line 283  for (p = string; length-- > 0; p++) Line 283  for (p = string; length-- > 0; p++)
283    
284    if (ab > 3)    if (ab > 3)
285      {      {
286      *erroroffset = p - string - ab;      *erroroffset = (int)(p - string) - ab;
287      return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;      return (ab == 4)? PCRE_UTF8_ERR11 : PCRE_UTF8_ERR12;
288      }      }
289    }    }
290    
291  #else  /* SUPPORT_UTF8 */  #else  /* SUPPORT_UTF */
292  (void)(string);  /* Keep picky compilers happy */  (void)(string);  /* Keep picky compilers happy */
293  (void)(length);  (void)(length);
294  #endif  #endif

Legend:
Removed from v.835  
changed lines
  Added in v.836

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12