/[pcre]/code/trunk/pcre16_utf16_utils.c
ViewVC logotype

Diff of /code/trunk/pcre16_utf16_utils.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/branches/pcre16/pcre16_utf16_utils.c revision 782 by zherczeg, Sat Dec 3 23:58:37 2011 UTC code/trunk/pcre16_utf16_utils.c revision 836 by ph10, Wed Dec 28 17:16:11 2011 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 51  strings to host byte order. */ Line 51  strings to host byte order. */
51    
52  #include "pcre_internal.h"  #include "pcre_internal.h"
53    
54    /*************************************************
55    *  Convert any UTF-16 string to host byte order  *
56    *************************************************/
57    
58    /* This function takes an UTF-16 string and converts
59    it to host byte order. The length can be explicitly set,
60    or autmatically detected for zero terminated strings.
61    BOMs can be kept or discarded during the conversion.
62    Conversion can be done in place (output == input).
63    
64    Arguments:
65      output     the output buffer, its size must be greater
66                 or equal than the input string
67      input      any UTF-16 string
68      length     the number of characters in the input string
69                 can be less than zero for zero terminated strings
70      host_byte_order
71                 A non-zero value means the input is in host byte
72                 order, which can be dynamically changed by BOMs later.
73                 Initially it contains the starting byte order and returns
74                 with the last byte order so it can be used for stream
75                 processing. It can be NULL, which set the host byte
76                 order mode by default.
77      keep_boms  for a non-zero value, the BOM (0xfeff) characters
78                 are copied as well
79    
80    Returns:     the number of characters placed into the output buffer,
81                 including the zero-terminator
82    */
83    
84  int  int
85  pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *output, PCRE_SPTR16 input, int length, int keep_boms)  pcre16_utf16_to_host_byte_order(PCRE_SCHAR16 *output, PCRE_SPTR16 input,
86      int length, int *host_byte_order, int keep_boms)
87  {  {
88  #ifdef SUPPORT_UTF16  #ifdef SUPPORT_UTF
89  /* This function converts any UTF-16 string to host byte order and optionally removes  /* This function converts any UTF-16 string to host byte order and optionally
90  any Byte Order Marks (BOMS). Returns with the remainig length. */  removes any Byte Order Marks (BOMS). Returns with the remainig length. */
91  BOOL same_bo = TRUE;  int host_bo = host_byte_order != NULL ? *host_byte_order : 1;
92  PCRE_SPTR16 end = input + length;  pcre_uchar *optr = (pcre_uchar *)output;
93    const pcre_uchar *iptr = (const pcre_uchar *)input;
94    const pcre_uchar *end;
95  /* The c variable must be unsigned. */  /* The c variable must be unsigned. */
96  register pcre_uchar c;  register pcre_uchar c;
97    
98  while (input < end)  if (length < 0)
99      length = STRLEN_UC(iptr) + 1;
100    end = iptr + length;
101    
102    while (iptr < end)
103    {    {
104    c = *input++;    c = *iptr++;
105    if (c == 0xfeff || c == 0xfffe)    if (c == 0xfeff || c == 0xfffe)
106      {      {
107      /* Detecting the byte order of the machine is unnecessary, it is      /* Detecting the byte order of the machine is unnecessary, it is
108      enough to know that the UTF-16 string has the same byte order or not. */      enough to know that the UTF-16 string has the same byte order or not. */
109      same_bo = c == 0xfeff;      host_bo = c == 0xfeff;
110      if (keep_boms != 0)      if (keep_boms != 0)
111        *output++ = 0xfeff;        *optr++ = 0xfeff;
112      else      else
113        length--;        length--;
114      }      }
115    else    else
116      *output++ = same_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */      *optr++ = host_bo ? c : ((c >> 8) | (c << 8)); /* Flip bytes if needed. */
117    }    }
118    if (host_byte_order != NULL)
119      *host_byte_order = host_bo;
120    
121  #else  #else /* SUPPORT_UTF */
122  (void)(output);  /* Keep picky compilers happy */  (void)(output);  /* Keep picky compilers happy */
123  (void)(input);  (void)(input);
124  (void)(keep_boms);  (void)(keep_boms);
125  #endif  #endif /* SUPPORT_UTF */
126  return length;  return length;
127  }  }
128    

Legend:
Removed from v.782  
changed lines
  Added in v.836

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12