1 #ifndef BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP 2 #define BOOST_PROPERTY_TREE_DETAIL_JSON_PARSER_WIDE_ENCODING_HPP 3 4 #include <boost/range/iterator_range_core.hpp> 5 6 #include <cassert> 7 #include <utility> 8 9 namespace pdalboost { namespace property_tree { 10 namespace json_parser { namespace detail 11 { 12 13 struct external_wide_encoding 14 { 15 typedef wchar_t external_char; 16 is_nlpdalboost::property_tree::json_parser::detail::external_wide_encoding17 bool is_nl(wchar_t c) const { return c == L'\n'; } is_wspdalboost::property_tree::json_parser::detail::external_wide_encoding18 bool is_ws(wchar_t c) const { 19 return c == L' ' || c == L'\t' || c == L'\n' || c == L'\r'; 20 } 21 is_minuspdalboost::property_tree::json_parser::detail::external_wide_encoding22 bool is_minus(wchar_t c) const { return c == L'-'; } is_plusminuspdalboost::property_tree::json_parser::detail::external_wide_encoding23 bool is_plusminus(wchar_t c) const { return c == L'+' || c == L'-'; } is_dotpdalboost::property_tree::json_parser::detail::external_wide_encoding24 bool is_dot(wchar_t c) const { return c == L'.'; } is_eEpdalboost::property_tree::json_parser::detail::external_wide_encoding25 bool is_eE(wchar_t c) const { return c == L'e' || c == L'E'; } is_0pdalboost::property_tree::json_parser::detail::external_wide_encoding26 bool is_0(wchar_t c) const { return c == L'0'; } is_digitpdalboost::property_tree::json_parser::detail::external_wide_encoding27 bool is_digit(wchar_t c) const { return c >= L'0' && c <= L'9'; } is_digit0pdalboost::property_tree::json_parser::detail::external_wide_encoding28 bool is_digit0(wchar_t c) const { return c >= L'1' && c <= L'9'; } 29 is_quotepdalboost::property_tree::json_parser::detail::external_wide_encoding30 bool is_quote(wchar_t c) const { return c == L'"'; } is_backslashpdalboost::property_tree::json_parser::detail::external_wide_encoding31 bool is_backslash(wchar_t c) const { return c == L'\\'; } is_slashpdalboost::property_tree::json_parser::detail::external_wide_encoding32 bool is_slash(wchar_t c) const { return c == L'/'; } 33 is_commapdalboost::property_tree::json_parser::detail::external_wide_encoding34 bool is_comma(wchar_t c) const { return c == L','; } is_open_bracketpdalboost::property_tree::json_parser::detail::external_wide_encoding35 bool is_open_bracket(wchar_t c) const { return c == L'['; } is_close_bracketpdalboost::property_tree::json_parser::detail::external_wide_encoding36 bool is_close_bracket(wchar_t c) const { return c == L']'; } is_colonpdalboost::property_tree::json_parser::detail::external_wide_encoding37 bool is_colon(wchar_t c) const { return c == L':'; } is_open_bracepdalboost::property_tree::json_parser::detail::external_wide_encoding38 bool is_open_brace(wchar_t c) const { return c == L'{'; } is_close_bracepdalboost::property_tree::json_parser::detail::external_wide_encoding39 bool is_close_brace(wchar_t c) const { return c == L'}'; } 40 is_apdalboost::property_tree::json_parser::detail::external_wide_encoding41 bool is_a(wchar_t c) const { return c == L'a'; } is_bpdalboost::property_tree::json_parser::detail::external_wide_encoding42 bool is_b(wchar_t c) const { return c == L'b'; } is_epdalboost::property_tree::json_parser::detail::external_wide_encoding43 bool is_e(wchar_t c) const { return c == L'e'; } is_fpdalboost::property_tree::json_parser::detail::external_wide_encoding44 bool is_f(wchar_t c) const { return c == L'f'; } is_lpdalboost::property_tree::json_parser::detail::external_wide_encoding45 bool is_l(wchar_t c) const { return c == L'l'; } is_npdalboost::property_tree::json_parser::detail::external_wide_encoding46 bool is_n(wchar_t c) const { return c == L'n'; } is_rpdalboost::property_tree::json_parser::detail::external_wide_encoding47 bool is_r(wchar_t c) const { return c == L'r'; } is_spdalboost::property_tree::json_parser::detail::external_wide_encoding48 bool is_s(wchar_t c) const { return c == L's'; } is_tpdalboost::property_tree::json_parser::detail::external_wide_encoding49 bool is_t(wchar_t c) const { return c == L't'; } is_updalboost::property_tree::json_parser::detail::external_wide_encoding50 bool is_u(wchar_t c) const { return c == L'u'; } 51 decode_hexdigitpdalboost::property_tree::json_parser::detail::external_wide_encoding52 int decode_hexdigit(wchar_t c) { 53 if (c >= L'0' && c <= L'9') return c - L'0'; 54 if (c >= L'A' && c <= L'F') return c - L'A' + 10; 55 if (c >= L'a' && c <= L'f') return c - L'a' + 10; 56 return -1; 57 } 58 }; 59 60 template <bool B> struct is_utf16 {}; 61 62 class wide_wide_encoding : public external_wide_encoding 63 { 64 typedef is_utf16<sizeof(wchar_t) == 2> test_utf16; 65 public: 66 typedef wchar_t internal_char; 67 68 template <typename Iterator> 69 pdalboost::iterator_range<Iterator> to_internal(Iterator first,Iterator last) const70 to_internal(Iterator first, Iterator last) const { 71 return pdalboost::make_iterator_range(first, last); 72 } 73 to_internal_trivial(wchar_t c) const74 wchar_t to_internal_trivial(wchar_t c) const { 75 assert(!is_surrogate_high(c) && !is_surrogate_low(c)); 76 return c; 77 } 78 79 template <typename Iterator, typename Sentinel, 80 typename EncodingErrorFn> skip_codepoint(Iterator & cur,Sentinel end,EncodingErrorFn error_fn) const81 void skip_codepoint(Iterator& cur, Sentinel end, 82 EncodingErrorFn error_fn) const { 83 transcode_codepoint(cur, end, DoNothing(), error_fn); 84 } 85 86 template <typename Iterator, typename Sentinel, typename TranscodedFn, 87 typename EncodingErrorFn> transcode_codepoint(Iterator & cur,Sentinel end,TranscodedFn transcoded_fn,EncodingErrorFn error_fn) const88 void transcode_codepoint(Iterator& cur, Sentinel end, 89 TranscodedFn transcoded_fn, EncodingErrorFn error_fn) const { 90 return transcode_codepoint(cur, end, transcoded_fn, error_fn, 91 test_utf16()); 92 } 93 94 template <typename TranscodedFn> feed_codepoint(unsigned codepoint,TranscodedFn transcoded_fn) const95 void feed_codepoint(unsigned codepoint, 96 TranscodedFn transcoded_fn) const { 97 feed_codepoint(codepoint, transcoded_fn, test_utf16()); 98 } 99 100 private: 101 struct DoNothing { operator ()pdalboost::property_tree::json_parser::detail::wide_wide_encoding::DoNothing102 void operator ()(wchar_t) const {} 103 }; 104 105 template <typename Iterator, typename Sentinel, typename TranscodedFn, 106 typename EncodingErrorFn> transcode_codepoint(Iterator & cur,Sentinel end,TranscodedFn transcoded_fn,EncodingErrorFn error_fn,is_utf16<false>) const107 void transcode_codepoint(Iterator& cur, Sentinel end, 108 TranscodedFn transcoded_fn, 109 EncodingErrorFn error_fn, 110 is_utf16<false>) const { 111 wchar_t c = *cur; 112 if (c < 0x20) { 113 error_fn(); 114 } 115 transcoded_fn(c); 116 ++cur; 117 } 118 template <typename Iterator, typename Sentinel, typename TranscodedFn, 119 typename EncodingErrorFn> transcode_codepoint(Iterator & cur,Sentinel end,TranscodedFn transcoded_fn,EncodingErrorFn error_fn,is_utf16<true>) const120 void transcode_codepoint(Iterator& cur, Sentinel end, 121 TranscodedFn transcoded_fn, 122 EncodingErrorFn error_fn, 123 is_utf16<true>) const { 124 wchar_t c = *cur; 125 if (c < 0x20) { 126 error_fn(); 127 } 128 if (is_surrogate_low(c)) { 129 error_fn(); 130 } 131 transcoded_fn(c); 132 ++cur; 133 if (is_surrogate_high(c)) { 134 c = *cur; 135 if (!is_surrogate_low(c)) { 136 error_fn(); 137 } 138 transcoded_fn(c); 139 ++cur; 140 } 141 } 142 143 template <typename TranscodedFn> feed_codepoint(unsigned codepoint,TranscodedFn transcoded_fn,is_utf16<false>) const144 void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn, 145 is_utf16<false>) const { 146 transcoded_fn(static_cast<wchar_t>(codepoint)); 147 } 148 template <typename TranscodedFn> feed_codepoint(unsigned codepoint,TranscodedFn transcoded_fn,is_utf16<true>) const149 void feed_codepoint(unsigned codepoint, TranscodedFn transcoded_fn, 150 is_utf16<true>) const { 151 if (codepoint < 0x10000) { 152 transcoded_fn(static_cast<wchar_t>(codepoint)); 153 } else { 154 codepoint -= 0x10000; 155 transcoded_fn(static_cast<wchar_t>((codepoint >> 10) | 0xd800)); 156 transcoded_fn(static_cast<wchar_t>( 157 (codepoint & 0x3ff) | 0xdc00)); 158 } 159 } 160 is_surrogate_high(unsigned codepoint)161 static bool is_surrogate_high(unsigned codepoint) { 162 return (codepoint & 0xfc00) == 0xd800; 163 } is_surrogate_low(unsigned codepoint)164 static bool is_surrogate_low(unsigned codepoint) { 165 return (codepoint & 0xfc00) == 0xdc00; 166 } 167 }; 168 169 }}}} 170 171 #endif 172