13 #include "utf8_codecvt.hpp"
15 #include <boost/detail/utf8_codecvt_facet.hpp>
20 #include <boost/limits.hpp>
21 #include <boost/config.hpp>
28 #ifndef BOOST_NO_STD_WSTRING
30 BOOST_UTF8_BEGIN_NAMESPACE
36 std::codecvt_base::result utf8_codecvt_facet::do_in(
39 const char * from_end,
40 const char * & from_next,
53 while (from != from_end && to != to_end) {
56 if (invalid_leading_octet(*from)){
59 return std::codecvt_base::error;
64 const int cont_octet_count = get_cont_octet_count(*from);
65 const wchar_t octet1_modifier_table[] = {
66 0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
72 (
unsigned char)(*from++) - octet1_modifier_table[cont_octet_count];
79 while(i != cont_octet_count && from != from_end) {
82 if (invalid_continuing_octet(*from)) {
85 return std::codecvt_base::error;
88 ucs_result *= (1 << 6);
92 ucs_result += (
unsigned char)(*from++) - 0x80;
97 if (from == from_end && i != cont_octet_count) {
99 from_next = from - (i+1);
101 return std::codecvt_base::partial;
109 if(from == from_end)
return std::codecvt_base::ok;
110 else return std::codecvt_base::partial;
113 std::codecvt_base::result utf8_codecvt_facet::do_out(
115 const wchar_t * from,
116 const wchar_t * from_end,
117 const wchar_t * & from_next,
124 const wchar_t octet1_modifier_table[] = {
125 0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
128 wchar_t max_wchar = (std::numeric_limits<wchar_t>::max)();
129 while (from != from_end && to != to_end) {
132 if (*from > max_wchar) {
135 return std::codecvt_base::error;
138 int cont_octet_count = get_cont_octet_out_count(*from);
141 int shift_exponent = (cont_octet_count) * 6;
144 *to++ =
static_cast<char>(octet1_modifier_table[cont_octet_count] +
145 (
unsigned char)(*from / (1 << shift_exponent)));
153 while (i != cont_octet_count && to != to_end) {
155 *to++ =
static_cast<char>(0x80 + ((*from / (1 << shift_exponent)) % (1 << 6)));
159 if(to == to_end && i != cont_octet_count) {
161 to_next = to - (i+1);
162 return std::codecvt_base::partial;
169 if(from == from_end)
return std::codecvt_base::ok;
170 else return std::codecvt_base::partial;
175 int utf8_codecvt_facet::do_length(
176 BOOST_CODECVT_DO_LENGTH_CONST std::mbstate_t &,
178 const char * from_end,
179 std::size_t max_limit
180 #
if BOOST_WORKAROUND(__IBMCPP__, BOOST_TESTED_AT(600))
195 int last_octet_count=0;
196 std::size_t char_count = 0;
197 const char* from_next = from;
199 while (from_next+last_octet_count <= from_end && char_count <= max_limit) {
200 from_next += last_octet_count;
201 last_octet_count = (get_octet_count(*from_next));
204 return static_cast<int>(from_next-from_end);
207 unsigned int utf8_codecvt_facet::get_octet_count(
208 unsigned char lead_octet
211 if (lead_octet <= 0x7f)
return 1;
216 if (0xc0 <= lead_octet && lead_octet <= 0xdf)
return 2;
217 else if (0xe0 <= lead_octet && lead_octet <= 0xef)
return 3;
218 else if (0xf0 <= lead_octet && lead_octet <= 0xf7)
return 4;
219 else if (0xf8 <= lead_octet && lead_octet <= 0xfb)
return 5;
222 BOOST_UTF8_END_NAMESPACE
225 template<std::
size_t s>
247 if (word < 0x10000) {
250 if (word < 0x200000) {
253 if (word < 0x4000000) {
261 BOOST_UTF8_BEGIN_NAMESPACE
264 int utf8_codecvt_facet::get_cont_octet_out_count(
267 return get_cont_octet_out_count_impl<sizeof(wchar_t)>(word);
269 BOOST_UTF8_END_NAMESPACE