1 // Tencent is pleased to support the open source community by making RapidJSON available. 2 // 3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. 4 // 5 // Licensed under the MIT License (the "License"); you may not use this file except 6 // in compliance with the License. You may obtain a copy of the License at 7 // 8 // http://opensource.org/licenses/MIT 9 // 10 // Unless required by applicable law or agreed to in writing, software distributed 11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR 12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the 13 // specific language governing permissions and limitations under the License. 14 15 #ifndef RAPIDJSON_ENCODINGS_H_ 16 #define RAPIDJSON_ENCODINGS_H_ 17 18 #include "rapidjson.h" 19 20 #ifdef _MSC_VER 21 RAPIDJSON_DIAG_PUSH 22 RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data 23 RAPIDJSON_DIAG_OFF(4702) // unreachable code 24 #elif defined(__GNUC__) 25 RAPIDJSON_DIAG_PUSH 26 RAPIDJSON_DIAG_OFF(effc++) 27 RAPIDJSON_DIAG_OFF(overflow) 28 #endif 29 30 RAPIDJSON_NAMESPACE_BEGIN 31 32 /////////////////////////////////////////////////////////////////////////////// 33 // Encoding 34 35 /*! \class rapidjson::Encoding 36 \brief Concept for encoding of Unicode characters. 37 38 \code 39 concept Encoding { 40 typename Ch; //! Type of character. A "character" is actually a code unit in unicode's definition. 41 42 enum { supportUnicode = 1 }; // or 0 if not supporting unicode 43 44 //! \brief Encode a Unicode codepoint to an output stream. 45 //! \param os Output stream. 46 //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively. 47 template<typename OutputStream> 48 static void Encode(OutputStream& os, unsigned codepoint); 49 50 //! \brief Decode a Unicode codepoint from an input stream. 51 //! \param is Input stream. 52 //! \param codepoint Output of the unicode codepoint. 53 //! \return true if a valid codepoint can be decoded from the stream. 54 template <typename InputStream> 55 static bool Decode(InputStream& is, unsigned* codepoint); 56 57 //! \brief Validate one Unicode codepoint from an encoded stream. 58 //! \param is Input stream to obtain codepoint. 59 //! \param os Output for copying one codepoint. 60 //! \return true if it is valid. 61 //! \note This function just validating and copying the codepoint without actually decode it. 62 template <typename InputStream, typename OutputStream> 63 static bool Validate(InputStream& is, OutputStream& os); 64 65 // The following functions are deal with byte streams. 66 67 //! Take a character from input byte stream, skip BOM if exist. 68 template <typename InputByteStream> 69 static CharType TakeBOM(InputByteStream& is); 70 71 //! Take a character from input byte stream. 72 template <typename InputByteStream> 73 static Ch Take(InputByteStream& is); 74 75 //! Put BOM to output byte stream. 76 template <typename OutputByteStream> 77 static void PutBOM(OutputByteStream& os); 78 79 //! Put a character to output byte stream. 80 template <typename OutputByteStream> 81 static void Put(OutputByteStream& os, Ch c); 82 }; 83 \endcode 84 */ 85 86 /////////////////////////////////////////////////////////////////////////////// 87 // UTF8 88 89 //! UTF-8 encoding. 90 /*! http://en.wikipedia.org/wiki/UTF-8 91 http://tools.ietf.org/html/rfc3629 92 \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char. 93 \note implements Encoding concept 94 */ 95 template<typename CharType = char> 96 struct UTF8 { 97 typedef CharType Ch; 98 99 enum { supportUnicode = 1 }; 100 101 template<typename OutputStream> EncodeUTF8102 static void Encode(OutputStream& os, unsigned codepoint) { 103 if (codepoint <= 0x7F) 104 os.Put(static_cast<Ch>(codepoint & 0xFF)); 105 else if (codepoint <= 0x7FF) { 106 os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF))); 107 os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F)))); 108 } 109 else if (codepoint <= 0xFFFF) { 110 os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF))); 111 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); 112 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); 113 } 114 else { 115 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 116 os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF))); 117 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F))); 118 os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F))); 119 os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F))); 120 } 121 } 122 123 template <typename InputStream> DecodeUTF8124 static bool Decode(InputStream& is, unsigned* codepoint) { 125 #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu) 126 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0) 127 #define TAIL() COPY(); TRANS(0x70) 128 Ch c = is.Take(); 129 if (!(c & 0x80)) { 130 *codepoint = (unsigned char)c; 131 return true; 132 } 133 134 unsigned char type = GetRange((unsigned char)c); 135 *codepoint = (0xFF >> type) & (unsigned char)c; 136 bool result = true; 137 switch (type) { 138 case 2: TAIL(); return result; 139 case 3: TAIL(); TAIL(); return result; 140 case 4: COPY(); TRANS(0x50); TAIL(); return result; 141 case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; 142 case 6: TAIL(); TAIL(); TAIL(); return result; 143 case 10: COPY(); TRANS(0x20); TAIL(); return result; 144 case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; 145 default: return false; 146 } 147 #undef COPY 148 #undef TRANS 149 #undef TAIL 150 } 151 152 template <typename InputStream, typename OutputStream> ValidateUTF8153 static bool Validate(InputStream& is, OutputStream& os) { 154 #define COPY() os.Put(c = is.Take()) 155 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & mask) != 0) 156 #define TAIL() COPY(); TRANS(0x70) 157 Ch c; 158 COPY(); 159 if (!(c & 0x80)) 160 return true; 161 162 bool result = true; 163 switch (GetRange((unsigned char)c)) { 164 case 2: TAIL(); return result; 165 case 3: TAIL(); TAIL(); return result; 166 case 4: COPY(); TRANS(0x50); TAIL(); return result; 167 case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result; 168 case 6: TAIL(); TAIL(); TAIL(); return result; 169 case 10: COPY(); TRANS(0x20); TAIL(); return result; 170 case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result; 171 default: return false; 172 } 173 #undef COPY 174 #undef TRANS 175 #undef TAIL 176 } 177 GetRangeUTF8178 static unsigned char GetRange(unsigned char c) { 179 // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ 180 // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types. 181 static const unsigned char type[] = { 182 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 183 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 184 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 185 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 186 0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10, 187 0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40, 188 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, 189 0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20, 190 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 191 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8, 192 }; 193 return type[c]; 194 } 195 196 template <typename InputByteStream> TakeBOMUTF8197 static CharType TakeBOM(InputByteStream& is) { 198 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 199 Ch c = Take(is); 200 if ((unsigned char)c != 0xEFu) return c; 201 c = is.Take(); 202 if ((unsigned char)c != 0xBBu) return c; 203 c = is.Take(); 204 if ((unsigned char)c != 0xBFu) return c; 205 c = is.Take(); 206 return c; 207 } 208 209 template <typename InputByteStream> TakeUTF8210 static Ch Take(InputByteStream& is) { 211 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 212 return is.Take(); 213 } 214 215 template <typename OutputByteStream> PutBOMUTF8216 static void PutBOM(OutputByteStream& os) { 217 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 218 os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu); 219 } 220 221 template <typename OutputByteStream> PutUTF8222 static void Put(OutputByteStream& os, Ch c) { 223 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 224 os.Put(static_cast<typename OutputByteStream::Ch>(c)); 225 } 226 }; 227 228 /////////////////////////////////////////////////////////////////////////////// 229 // UTF16 230 231 //! UTF-16 encoding. 232 /*! http://en.wikipedia.org/wiki/UTF-16 233 http://tools.ietf.org/html/rfc2781 234 \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead. 235 \note implements Encoding concept 236 237 \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. 238 For streaming, use UTF16LE and UTF16BE, which handle endianness. 239 */ 240 template<typename CharType = wchar_t> 241 struct UTF16 { 242 typedef CharType Ch; 243 RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2); 244 245 enum { supportUnicode = 1 }; 246 247 template<typename OutputStream> EncodeUTF16248 static void Encode(OutputStream& os, unsigned codepoint) { 249 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); 250 if (codepoint <= 0xFFFF) { 251 RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 252 os.Put(static_cast<typename OutputStream::Ch>(codepoint)); 253 } 254 else { 255 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 256 unsigned v = codepoint - 0x10000; 257 os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800)); 258 os.Put((v & 0x3FF) | 0xDC00); 259 } 260 } 261 262 template <typename InputStream> DecodeUTF16263 static bool Decode(InputStream& is, unsigned* codepoint) { 264 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); 265 Ch c = is.Take(); 266 if (c < 0xD800 || c > 0xDFFF) { 267 *codepoint = c; 268 return true; 269 } 270 else if (c <= 0xDBFF) { 271 *codepoint = (c & 0x3FF) << 10; 272 c = is.Take(); 273 *codepoint |= (c & 0x3FF); 274 *codepoint += 0x10000; 275 return c >= 0xDC00 && c <= 0xDFFF; 276 } 277 return false; 278 } 279 280 template <typename InputStream, typename OutputStream> ValidateUTF16281 static bool Validate(InputStream& is, OutputStream& os) { 282 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2); 283 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2); 284 Ch c; 285 os.Put(c = is.Take()); 286 if (c < 0xD800 || c > 0xDFFF) 287 return true; 288 else if (c <= 0xDBFF) { 289 os.Put(c = is.Take()); 290 return c >= 0xDC00 && c <= 0xDFFF; 291 } 292 return false; 293 } 294 }; 295 296 //! UTF-16 little endian encoding. 297 template<typename CharType = wchar_t> 298 struct UTF16LE : UTF16<CharType> { 299 template <typename InputByteStream> TakeBOMUTF16LE300 static CharType TakeBOM(InputByteStream& is) { 301 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 302 CharType c = Take(is); 303 return (unsigned short)c == 0xFEFFu ? Take(is) : c; 304 } 305 306 template <typename InputByteStream> TakeUTF16LE307 static CharType Take(InputByteStream& is) { 308 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 309 CharType c = (unsigned char)is.Take(); 310 c |= (unsigned char)is.Take() << 8; 311 return c; 312 } 313 314 template <typename OutputByteStream> PutBOMUTF16LE315 static void PutBOM(OutputByteStream& os) { 316 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 317 os.Put(0xFFu); os.Put(0xFEu); 318 } 319 320 template <typename OutputByteStream> PutUTF16LE321 static void Put(OutputByteStream& os, CharType c) { 322 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 323 os.Put(c & 0xFFu); 324 os.Put((c >> 8) & 0xFFu); 325 } 326 }; 327 328 //! UTF-16 big endian encoding. 329 template<typename CharType = wchar_t> 330 struct UTF16BE : UTF16<CharType> { 331 template <typename InputByteStream> TakeBOMUTF16BE332 static CharType TakeBOM(InputByteStream& is) { 333 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 334 CharType c = Take(is); 335 return (unsigned short)c == 0xFEFFu ? Take(is) : c; 336 } 337 338 template <typename InputByteStream> TakeUTF16BE339 static CharType Take(InputByteStream& is) { 340 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 341 CharType c = (unsigned char)is.Take() << 8; 342 c |= (unsigned char)is.Take(); 343 return c; 344 } 345 346 template <typename OutputByteStream> PutBOMUTF16BE347 static void PutBOM(OutputByteStream& os) { 348 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 349 os.Put(0xFEu); os.Put(0xFFu); 350 } 351 352 template <typename OutputByteStream> PutUTF16BE353 static void Put(OutputByteStream& os, CharType c) { 354 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 355 os.Put((c >> 8) & 0xFFu); 356 os.Put(c & 0xFFu); 357 } 358 }; 359 360 /////////////////////////////////////////////////////////////////////////////// 361 // UTF32 362 363 //! UTF-32 encoding. 364 /*! http://en.wikipedia.org/wiki/UTF-32 365 \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead. 366 \note implements Encoding concept 367 368 \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness. 369 For streaming, use UTF32LE and UTF32BE, which handle endianness. 370 */ 371 template<typename CharType = unsigned> 372 struct UTF32 { 373 typedef CharType Ch; 374 RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4); 375 376 enum { supportUnicode = 1 }; 377 378 template<typename OutputStream> EncodeUTF32379 static void Encode(OutputStream& os, unsigned codepoint) { 380 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4); 381 RAPIDJSON_ASSERT(codepoint <= 0x10FFFF); 382 os.Put(codepoint); 383 } 384 385 template <typename InputStream> DecodeUTF32386 static bool Decode(InputStream& is, unsigned* codepoint) { 387 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); 388 Ch c = is.Take(); 389 *codepoint = c; 390 return c <= 0x10FFFF; 391 } 392 393 template <typename InputStream, typename OutputStream> ValidateUTF32394 static bool Validate(InputStream& is, OutputStream& os) { 395 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4); 396 Ch c; 397 os.Put(c = is.Take()); 398 return c <= 0x10FFFF; 399 } 400 }; 401 402 //! UTF-32 little endian enocoding. 403 template<typename CharType = unsigned> 404 struct UTF32LE : UTF32<CharType> { 405 template <typename InputByteStream> TakeBOMUTF32LE406 static CharType TakeBOM(InputByteStream& is) { 407 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 408 CharType c = Take(is); 409 return (unsigned)c == 0x0000FEFFu ? Take(is) : c; 410 } 411 412 template <typename InputByteStream> TakeUTF32LE413 static CharType Take(InputByteStream& is) { 414 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 415 CharType c = (unsigned char)is.Take(); 416 c |= (unsigned char)is.Take() << 8; 417 c |= (unsigned char)is.Take() << 16; 418 c |= (unsigned char)is.Take() << 24; 419 return c; 420 } 421 422 template <typename OutputByteStream> PutBOMUTF32LE423 static void PutBOM(OutputByteStream& os) { 424 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 425 os.Put(0xFFu); os.Put(0xFEu); os.Put(0x00u); os.Put(0x00u); 426 } 427 428 template <typename OutputByteStream> PutUTF32LE429 static void Put(OutputByteStream& os, CharType c) { 430 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 431 os.Put(c & 0xFFu); 432 os.Put((c >> 8) & 0xFFu); 433 os.Put((c >> 16) & 0xFFu); 434 os.Put((c >> 24) & 0xFFu); 435 } 436 }; 437 438 //! UTF-32 big endian encoding. 439 template<typename CharType = unsigned> 440 struct UTF32BE : UTF32<CharType> { 441 template <typename InputByteStream> TakeBOMUTF32BE442 static CharType TakeBOM(InputByteStream& is) { 443 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 444 CharType c = Take(is); 445 return (unsigned)c == 0x0000FEFFu ? Take(is) : c; 446 } 447 448 template <typename InputByteStream> TakeUTF32BE449 static CharType Take(InputByteStream& is) { 450 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 451 CharType c = (unsigned char)is.Take() << 24; 452 c |= (unsigned char)is.Take() << 16; 453 c |= (unsigned char)is.Take() << 8; 454 c |= (unsigned char)is.Take(); 455 return c; 456 } 457 458 template <typename OutputByteStream> PutBOMUTF32BE459 static void PutBOM(OutputByteStream& os) { 460 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 461 os.Put(0x00u); os.Put(0x00u); os.Put(0xFEu); os.Put(0xFFu); 462 } 463 464 template <typename OutputByteStream> PutUTF32BE465 static void Put(OutputByteStream& os, CharType c) { 466 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 467 os.Put((c >> 24) & 0xFFu); 468 os.Put((c >> 16) & 0xFFu); 469 os.Put((c >> 8) & 0xFFu); 470 os.Put(c & 0xFFu); 471 } 472 }; 473 474 /////////////////////////////////////////////////////////////////////////////// 475 // ASCII 476 477 //! ASCII encoding. 478 /*! http://en.wikipedia.org/wiki/ASCII 479 \tparam CharType Code unit for storing 7-bit ASCII data. Default is char. 480 \note implements Encoding concept 481 */ 482 template<typename CharType = char> 483 struct ASCII { 484 typedef CharType Ch; 485 486 enum { supportUnicode = 0 }; 487 488 template<typename OutputStream> EncodeASCII489 static void Encode(OutputStream& os, unsigned codepoint) { 490 RAPIDJSON_ASSERT(codepoint <= 0x7F); 491 os.Put(static_cast<Ch>(codepoint & 0xFF)); 492 } 493 494 template <typename InputStream> DecodeASCII495 static bool Decode(InputStream& is, unsigned* codepoint) { 496 unsigned char c = static_cast<unsigned char>(is.Take()); 497 *codepoint = c; 498 return c <= 0X7F; 499 } 500 501 template <typename InputStream, typename OutputStream> ValidateASCII502 static bool Validate(InputStream& is, OutputStream& os) { 503 unsigned char c = is.Take(); 504 os.Put(c); 505 return c <= 0x7F; 506 } 507 508 template <typename InputByteStream> TakeBOMASCII509 static CharType TakeBOM(InputByteStream& is) { 510 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 511 Ch c = Take(is); 512 return c; 513 } 514 515 template <typename InputByteStream> TakeASCII516 static Ch Take(InputByteStream& is) { 517 RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1); 518 return is.Take(); 519 } 520 521 template <typename OutputByteStream> PutBOMASCII522 static void PutBOM(OutputByteStream& os) { 523 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 524 (void)os; 525 } 526 527 template <typename OutputByteStream> PutASCII528 static void Put(OutputByteStream& os, Ch c) { 529 RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1); 530 os.Put(static_cast<typename OutputByteStream::Ch>(c)); 531 } 532 }; 533 534 /////////////////////////////////////////////////////////////////////////////// 535 // AutoUTF 536 537 //! Runtime-specified UTF encoding type of a stream. 538 enum UTFType { 539 kUTF8 = 0, //!< UTF-8. 540 kUTF16LE = 1, //!< UTF-16 little endian. 541 kUTF16BE = 2, //!< UTF-16 big endian. 542 kUTF32LE = 3, //!< UTF-32 little endian. 543 kUTF32BE = 4 //!< UTF-32 big endian. 544 }; 545 546 //! Dynamically select encoding according to stream's runtime-specified UTF encoding type. 547 /*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType(). 548 */ 549 template<typename CharType> 550 struct AutoUTF { 551 typedef CharType Ch; 552 553 enum { supportUnicode = 1 }; 554 555 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x 556 557 template<typename OutputStream> EncodeAutoUTF558 RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) { 559 typedef void (*EncodeFunc)(OutputStream&, unsigned); 560 static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) }; 561 (*f[os.GetType()])(os, codepoint); 562 } 563 564 template <typename InputStream> DecodeAutoUTF565 RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) { 566 typedef bool (*DecodeFunc)(InputStream&, unsigned*); 567 static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) }; 568 return (*f[is.GetType()])(is, codepoint); 569 } 570 571 template <typename InputStream, typename OutputStream> ValidateAutoUTF572 RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { 573 typedef bool (*ValidateFunc)(InputStream&, OutputStream&); 574 static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) }; 575 return (*f[is.GetType()])(is, os); 576 } 577 578 #undef RAPIDJSON_ENCODINGS_FUNC 579 }; 580 581 /////////////////////////////////////////////////////////////////////////////// 582 // Transcoder 583 584 //! Encoding conversion. 585 template<typename SourceEncoding, typename TargetEncoding> 586 struct Transcoder { 587 //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream. 588 template<typename InputStream, typename OutputStream> TranscodeTranscoder589 RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { 590 unsigned codepoint; 591 if (!SourceEncoding::Decode(is, &codepoint)) 592 return false; 593 TargetEncoding::Encode(os, codepoint); 594 return true; 595 } 596 597 //! Validate one Unicode codepoint from an encoded stream. 598 template<typename InputStream, typename OutputStream> ValidateTranscoder599 RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { 600 return Transcode(is, os); // Since source/target encoding is different, must transcode. 601 } 602 }; 603 604 //! Specialization of Transcoder with same source and target encoding. 605 template<typename Encoding> 606 struct Transcoder<Encoding, Encoding> { 607 template<typename InputStream, typename OutputStream> 608 RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) { 609 os.Put(is.Take()); // Just copy one code unit. This semantic is different from primary template class. 610 return true; 611 } 612 613 template<typename InputStream, typename OutputStream> 614 RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) { 615 return Encoding::Validate(is, os); // source/target encoding are the same 616 } 617 }; 618 619 RAPIDJSON_NAMESPACE_END 620 621 #if defined(__GNUC__) || defined(_MSV_VER) 622 RAPIDJSON_DIAG_POP 623 #endif 624 625 #endif // RAPIDJSON_ENCODINGS_H_ 626