1 //////////////////////////////////////////////////////////// 2 // 3 // SFML - Simple and Fast Multimedia Library 4 // Copyright (C) 2007-2018 Laurent Gomila (laurent@sfml-dev.org) 5 // 6 // This software is provided 'as-is', without any express or implied warranty. 7 // In no event will the authors be held liable for any damages arising from the use of this software. 8 // 9 // Permission is granted to anyone to use this software for any purpose, 10 // including commercial applications, and to alter it and redistribute it freely, 11 // subject to the following restrictions: 12 // 13 // 1. The origin of this software must not be misrepresented; 14 // you must not claim that you wrote the original software. 15 // If you use this software in a product, an acknowledgment 16 // in the product documentation would be appreciated but is not required. 17 // 18 // 2. Altered source versions must be plainly marked as such, 19 // and must not be misrepresented as being the original software. 20 // 21 // 3. This notice may not be removed or altered from any source distribution. 22 // 23 //////////////////////////////////////////////////////////// 24 25 #ifndef SFML_UTF_HPP 26 #define SFML_UTF_HPP 27 28 //////////////////////////////////////////////////////////// 29 // Headers 30 //////////////////////////////////////////////////////////// 31 #include <SFML/Config.hpp> 32 #include <algorithm> 33 #include <locale> 34 #include <string> 35 #include <cstdlib> 36 37 38 namespace sf 39 { 40 template <unsigned int N> 41 class Utf; 42 43 //////////////////////////////////////////////////////////// 44 /// \brief Specialization of the Utf template for UTF-8 45 /// 46 //////////////////////////////////////////////////////////// 47 template <> 48 class Utf<8> 49 { 50 public: 51 52 //////////////////////////////////////////////////////////// 53 /// \brief Decode a single UTF-8 character 54 /// 55 /// Decoding a character means finding its unique 32-bits 56 /// code (called the codepoint) in the Unicode standard. 57 /// 58 /// \param begin Iterator pointing to the beginning of the input sequence 59 /// \param end Iterator pointing to the end of the input sequence 60 /// \param output Codepoint of the decoded UTF-8 character 61 /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid 62 /// 63 /// \return Iterator pointing to one past the last read element of the input sequence 64 /// 65 //////////////////////////////////////////////////////////// 66 template <typename In> 67 static In decode(In begin, In end, Uint32& output, Uint32 replacement = 0); 68 69 //////////////////////////////////////////////////////////// 70 /// \brief Encode a single UTF-8 character 71 /// 72 /// Encoding a character means converting a unique 32-bits 73 /// code (called the codepoint) in the target encoding, UTF-8. 74 /// 75 /// \param input Codepoint to encode as UTF-8 76 /// \param output Iterator pointing to the beginning of the output sequence 77 /// \param replacement Replacement for characters not convertible to UTF-8 (use 0 to skip them) 78 /// 79 /// \return Iterator to the end of the output sequence which has been written 80 /// 81 //////////////////////////////////////////////////////////// 82 template <typename Out> 83 static Out encode(Uint32 input, Out output, Uint8 replacement = 0); 84 85 //////////////////////////////////////////////////////////// 86 /// \brief Advance to the next UTF-8 character 87 /// 88 /// This function is necessary for multi-elements encodings, as 89 /// a single character may use more than 1 storage element. 90 /// 91 /// \param begin Iterator pointing to the beginning of the input sequence 92 /// \param end Iterator pointing to the end of the input sequence 93 /// 94 /// \return Iterator pointing to one past the last read element of the input sequence 95 /// 96 //////////////////////////////////////////////////////////// 97 template <typename In> 98 static In next(In begin, In end); 99 100 //////////////////////////////////////////////////////////// 101 /// \brief Count the number of characters of a UTF-8 sequence 102 /// 103 /// This function is necessary for multi-elements encodings, as 104 /// a single character may use more than 1 storage element, thus the 105 /// total size can be different from (begin - end). 106 /// 107 /// \param begin Iterator pointing to the beginning of the input sequence 108 /// \param end Iterator pointing to the end of the input sequence 109 /// 110 /// \return Iterator pointing to one past the last read element of the input sequence 111 /// 112 //////////////////////////////////////////////////////////// 113 template <typename In> 114 static std::size_t count(In begin, In end); 115 116 //////////////////////////////////////////////////////////// 117 /// \brief Convert an ANSI characters range to UTF-8 118 /// 119 /// The current global locale will be used by default, unless you 120 /// pass a custom one in the \a locale parameter. 121 /// 122 /// \param begin Iterator pointing to the beginning of the input sequence 123 /// \param end Iterator pointing to the end of the input sequence 124 /// \param output Iterator pointing to the beginning of the output sequence 125 /// \param locale Locale to use for conversion 126 /// 127 /// \return Iterator to the end of the output sequence which has been written 128 /// 129 //////////////////////////////////////////////////////////// 130 template <typename In, typename Out> 131 static Out fromAnsi(In begin, In end, Out output, const std::locale& locale = std::locale()); 132 133 //////////////////////////////////////////////////////////// 134 /// \brief Convert a wide characters range to UTF-8 135 /// 136 /// \param begin Iterator pointing to the beginning of the input sequence 137 /// \param end Iterator pointing to the end of the input sequence 138 /// \param output Iterator pointing to the beginning of the output sequence 139 /// 140 /// \return Iterator to the end of the output sequence which has been written 141 /// 142 //////////////////////////////////////////////////////////// 143 template <typename In, typename Out> 144 static Out fromWide(In begin, In end, Out output); 145 146 //////////////////////////////////////////////////////////// 147 /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-8 148 /// 149 /// \param begin Iterator pointing to the beginning of the input sequence 150 /// \param end Iterator pointing to the end of the input sequence 151 /// \param output Iterator pointing to the beginning of the output sequence 152 /// 153 /// \return Iterator to the end of the output sequence which has been written 154 /// 155 //////////////////////////////////////////////////////////// 156 template <typename In, typename Out> 157 static Out fromLatin1(In begin, In end, Out output); 158 159 //////////////////////////////////////////////////////////// 160 /// \brief Convert an UTF-8 characters range to ANSI characters 161 /// 162 /// The current global locale will be used by default, unless you 163 /// pass a custom one in the \a locale parameter. 164 /// 165 /// \param begin Iterator pointing to the beginning of the input sequence 166 /// \param end Iterator pointing to the end of the input sequence 167 /// \param output Iterator pointing to the beginning of the output sequence 168 /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them) 169 /// \param locale Locale to use for conversion 170 /// 171 /// \return Iterator to the end of the output sequence which has been written 172 /// 173 //////////////////////////////////////////////////////////// 174 template <typename In, typename Out> 175 static Out toAnsi(In begin, In end, Out output, char replacement = 0, const std::locale& locale = std::locale()); 176 177 //////////////////////////////////////////////////////////// 178 /// \brief Convert an UTF-8 characters range to wide characters 179 /// 180 /// \param begin Iterator pointing to the beginning of the input sequence 181 /// \param end Iterator pointing to the end of the input sequence 182 /// \param output Iterator pointing to the beginning of the output sequence 183 /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) 184 /// 185 /// \return Iterator to the end of the output sequence which has been written 186 /// 187 //////////////////////////////////////////////////////////// 188 template <typename In, typename Out> 189 static Out toWide(In begin, In end, Out output, wchar_t replacement = 0); 190 191 //////////////////////////////////////////////////////////// 192 /// \brief Convert an UTF-8 characters range to latin-1 (ISO-5589-1) characters 193 /// 194 /// \param begin Iterator pointing to the beginning of the input sequence 195 /// \param end Iterator pointing to the end of the input sequence 196 /// \param output Iterator pointing to the beginning of the output sequence 197 /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) 198 /// 199 /// \return Iterator to the end of the output sequence which has been written 200 /// 201 //////////////////////////////////////////////////////////// 202 template <typename In, typename Out> 203 static Out toLatin1(In begin, In end, Out output, char replacement = 0); 204 205 //////////////////////////////////////////////////////////// 206 /// \brief Convert a UTF-8 characters range to UTF-8 207 /// 208 /// This functions does nothing more than a direct copy; 209 /// it is defined only to provide the same interface as other 210 /// specializations of the sf::Utf<> template, and allow 211 /// generic code to be written on top of it. 212 /// 213 /// \param begin Iterator pointing to the beginning of the input sequence 214 /// \param end Iterator pointing to the end of the input sequence 215 /// \param output Iterator pointing to the beginning of the output sequence 216 /// 217 /// \return Iterator to the end of the output sequence which has been written 218 /// 219 //////////////////////////////////////////////////////////// 220 template <typename In, typename Out> 221 static Out toUtf8(In begin, In end, Out output); 222 223 //////////////////////////////////////////////////////////// 224 /// \brief Convert a UTF-8 characters range to UTF-16 225 /// 226 /// \param begin Iterator pointing to the beginning of the input sequence 227 /// \param end Iterator pointing to the end of the input sequence 228 /// \param output Iterator pointing to the beginning of the output sequence 229 /// 230 /// \return Iterator to the end of the output sequence which has been written 231 /// 232 //////////////////////////////////////////////////////////// 233 template <typename In, typename Out> 234 static Out toUtf16(In begin, In end, Out output); 235 236 //////////////////////////////////////////////////////////// 237 /// \brief Convert a UTF-8 characters range to UTF-32 238 /// 239 /// \param begin Iterator pointing to the beginning of the input sequence 240 /// \param end Iterator pointing to the end of the input sequence 241 /// \param output Iterator pointing to the beginning of the output sequence 242 /// 243 /// \return Iterator to the end of the output sequence which has been written 244 /// 245 //////////////////////////////////////////////////////////// 246 template <typename In, typename Out> 247 static Out toUtf32(In begin, In end, Out output); 248 }; 249 250 //////////////////////////////////////////////////////////// 251 /// \brief Specialization of the Utf template for UTF-16 252 /// 253 //////////////////////////////////////////////////////////// 254 template <> 255 class Utf<16> 256 { 257 public: 258 259 //////////////////////////////////////////////////////////// 260 /// \brief Decode a single UTF-16 character 261 /// 262 /// Decoding a character means finding its unique 32-bits 263 /// code (called the codepoint) in the Unicode standard. 264 /// 265 /// \param begin Iterator pointing to the beginning of the input sequence 266 /// \param end Iterator pointing to the end of the input sequence 267 /// \param output Codepoint of the decoded UTF-16 character 268 /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid 269 /// 270 /// \return Iterator pointing to one past the last read element of the input sequence 271 /// 272 //////////////////////////////////////////////////////////// 273 template <typename In> 274 static In decode(In begin, In end, Uint32& output, Uint32 replacement = 0); 275 276 //////////////////////////////////////////////////////////// 277 /// \brief Encode a single UTF-16 character 278 /// 279 /// Encoding a character means converting a unique 32-bits 280 /// code (called the codepoint) in the target encoding, UTF-16. 281 /// 282 /// \param input Codepoint to encode as UTF-16 283 /// \param output Iterator pointing to the beginning of the output sequence 284 /// \param replacement Replacement for characters not convertible to UTF-16 (use 0 to skip them) 285 /// 286 /// \return Iterator to the end of the output sequence which has been written 287 /// 288 //////////////////////////////////////////////////////////// 289 template <typename Out> 290 static Out encode(Uint32 input, Out output, Uint16 replacement = 0); 291 292 //////////////////////////////////////////////////////////// 293 /// \brief Advance to the next UTF-16 character 294 /// 295 /// This function is necessary for multi-elements encodings, as 296 /// a single character may use more than 1 storage element. 297 /// 298 /// \param begin Iterator pointing to the beginning of the input sequence 299 /// \param end Iterator pointing to the end of the input sequence 300 /// 301 /// \return Iterator pointing to one past the last read element of the input sequence 302 /// 303 //////////////////////////////////////////////////////////// 304 template <typename In> 305 static In next(In begin, In end); 306 307 //////////////////////////////////////////////////////////// 308 /// \brief Count the number of characters of a UTF-16 sequence 309 /// 310 /// This function is necessary for multi-elements encodings, as 311 /// a single character may use more than 1 storage element, thus the 312 /// total size can be different from (begin - end). 313 /// 314 /// \param begin Iterator pointing to the beginning of the input sequence 315 /// \param end Iterator pointing to the end of the input sequence 316 /// 317 /// \return Iterator pointing to one past the last read element of the input sequence 318 /// 319 //////////////////////////////////////////////////////////// 320 template <typename In> 321 static std::size_t count(In begin, In end); 322 323 //////////////////////////////////////////////////////////// 324 /// \brief Convert an ANSI characters range to UTF-16 325 /// 326 /// The current global locale will be used by default, unless you 327 /// pass a custom one in the \a locale parameter. 328 /// 329 /// \param begin Iterator pointing to the beginning of the input sequence 330 /// \param end Iterator pointing to the end of the input sequence 331 /// \param output Iterator pointing to the beginning of the output sequence 332 /// \param locale Locale to use for conversion 333 /// 334 /// \return Iterator to the end of the output sequence which has been written 335 /// 336 //////////////////////////////////////////////////////////// 337 template <typename In, typename Out> 338 static Out fromAnsi(In begin, In end, Out output, const std::locale& locale = std::locale()); 339 340 //////////////////////////////////////////////////////////// 341 /// \brief Convert a wide characters range to UTF-16 342 /// 343 /// \param begin Iterator pointing to the beginning of the input sequence 344 /// \param end Iterator pointing to the end of the input sequence 345 /// \param output Iterator pointing to the beginning of the output sequence 346 /// 347 /// \return Iterator to the end of the output sequence which has been written 348 /// 349 //////////////////////////////////////////////////////////// 350 template <typename In, typename Out> 351 static Out fromWide(In begin, In end, Out output); 352 353 //////////////////////////////////////////////////////////// 354 /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-16 355 /// 356 /// \param begin Iterator pointing to the beginning of the input sequence 357 /// \param end Iterator pointing to the end of the input sequence 358 /// \param output Iterator pointing to the beginning of the output sequence 359 /// 360 /// \return Iterator to the end of the output sequence which has been written 361 /// 362 //////////////////////////////////////////////////////////// 363 template <typename In, typename Out> 364 static Out fromLatin1(In begin, In end, Out output); 365 366 //////////////////////////////////////////////////////////// 367 /// \brief Convert an UTF-16 characters range to ANSI characters 368 /// 369 /// The current global locale will be used by default, unless you 370 /// pass a custom one in the \a locale parameter. 371 /// 372 /// \param begin Iterator pointing to the beginning of the input sequence 373 /// \param end Iterator pointing to the end of the input sequence 374 /// \param output Iterator pointing to the beginning of the output sequence 375 /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them) 376 /// \param locale Locale to use for conversion 377 /// 378 /// \return Iterator to the end of the output sequence which has been written 379 /// 380 //////////////////////////////////////////////////////////// 381 template <typename In, typename Out> 382 static Out toAnsi(In begin, In end, Out output, char replacement = 0, const std::locale& locale = std::locale()); 383 384 //////////////////////////////////////////////////////////// 385 /// \brief Convert an UTF-16 characters range to wide characters 386 /// 387 /// \param begin Iterator pointing to the beginning of the input sequence 388 /// \param end Iterator pointing to the end of the input sequence 389 /// \param output Iterator pointing to the beginning of the output sequence 390 /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) 391 /// 392 /// \return Iterator to the end of the output sequence which has been written 393 /// 394 //////////////////////////////////////////////////////////// 395 template <typename In, typename Out> 396 static Out toWide(In begin, In end, Out output, wchar_t replacement = 0); 397 398 //////////////////////////////////////////////////////////// 399 /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters 400 /// 401 /// \param begin Iterator pointing to the beginning of the input sequence 402 /// \param end Iterator pointing to the end of the input sequence 403 /// \param output Iterator pointing to the beginning of the output sequence 404 /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) 405 /// 406 /// \return Iterator to the end of the output sequence which has been written 407 /// 408 //////////////////////////////////////////////////////////// 409 template <typename In, typename Out> 410 static Out toLatin1(In begin, In end, Out output, char replacement = 0); 411 412 //////////////////////////////////////////////////////////// 413 /// \brief Convert a UTF-16 characters range to UTF-8 414 /// 415 /// \param begin Iterator pointing to the beginning of the input sequence 416 /// \param end Iterator pointing to the end of the input sequence 417 /// \param output Iterator pointing to the beginning of the output sequence 418 /// 419 /// \return Iterator to the end of the output sequence which has been written 420 /// 421 //////////////////////////////////////////////////////////// 422 template <typename In, typename Out> 423 static Out toUtf8(In begin, In end, Out output); 424 425 //////////////////////////////////////////////////////////// 426 /// \brief Convert a UTF-16 characters range to UTF-16 427 /// 428 /// This functions does nothing more than a direct copy; 429 /// it is defined only to provide the same interface as other 430 /// specializations of the sf::Utf<> template, and allow 431 /// generic code to be written on top of it. 432 /// 433 /// \param begin Iterator pointing to the beginning of the input sequence 434 /// \param end Iterator pointing to the end of the input sequence 435 /// \param output Iterator pointing to the beginning of the output sequence 436 /// 437 /// \return Iterator to the end of the output sequence which has been written 438 /// 439 //////////////////////////////////////////////////////////// 440 template <typename In, typename Out> 441 static Out toUtf16(In begin, In end, Out output); 442 443 //////////////////////////////////////////////////////////// 444 /// \brief Convert a UTF-16 characters range to UTF-32 445 /// 446 /// \param begin Iterator pointing to the beginning of the input sequence 447 /// \param end Iterator pointing to the end of the input sequence 448 /// \param output Iterator pointing to the beginning of the output sequence 449 /// 450 /// \return Iterator to the end of the output sequence which has been written 451 /// 452 //////////////////////////////////////////////////////////// 453 template <typename In, typename Out> 454 static Out toUtf32(In begin, In end, Out output); 455 }; 456 457 //////////////////////////////////////////////////////////// 458 /// \brief Specialization of the Utf template for UTF-32 459 /// 460 //////////////////////////////////////////////////////////// 461 template <> 462 class Utf<32> 463 { 464 public: 465 466 //////////////////////////////////////////////////////////// 467 /// \brief Decode a single UTF-32 character 468 /// 469 /// Decoding a character means finding its unique 32-bits 470 /// code (called the codepoint) in the Unicode standard. 471 /// For UTF-32, the character value is the same as the codepoint. 472 /// 473 /// \param begin Iterator pointing to the beginning of the input sequence 474 /// \param end Iterator pointing to the end of the input sequence 475 /// \param output Codepoint of the decoded UTF-32 character 476 /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid 477 /// 478 /// \return Iterator pointing to one past the last read element of the input sequence 479 /// 480 //////////////////////////////////////////////////////////// 481 template <typename In> 482 static In decode(In begin, In end, Uint32& output, Uint32 replacement = 0); 483 484 //////////////////////////////////////////////////////////// 485 /// \brief Encode a single UTF-32 character 486 /// 487 /// Encoding a character means converting a unique 32-bits 488 /// code (called the codepoint) in the target encoding, UTF-32. 489 /// For UTF-32, the codepoint is the same as the character value. 490 /// 491 /// \param input Codepoint to encode as UTF-32 492 /// \param output Iterator pointing to the beginning of the output sequence 493 /// \param replacement Replacement for characters not convertible to UTF-32 (use 0 to skip them) 494 /// 495 /// \return Iterator to the end of the output sequence which has been written 496 /// 497 //////////////////////////////////////////////////////////// 498 template <typename Out> 499 static Out encode(Uint32 input, Out output, Uint32 replacement = 0); 500 501 //////////////////////////////////////////////////////////// 502 /// \brief Advance to the next UTF-32 character 503 /// 504 /// This function is trivial for UTF-32, which can store 505 /// every character in a single storage element. 506 /// 507 /// \param begin Iterator pointing to the beginning of the input sequence 508 /// \param end Iterator pointing to the end of the input sequence 509 /// 510 /// \return Iterator pointing to one past the last read element of the input sequence 511 /// 512 //////////////////////////////////////////////////////////// 513 template <typename In> 514 static In next(In begin, In end); 515 516 //////////////////////////////////////////////////////////// 517 /// \brief Count the number of characters of a UTF-32 sequence 518 /// 519 /// This function is trivial for UTF-32, which can store 520 /// every character in a single storage element. 521 /// 522 /// \param begin Iterator pointing to the beginning of the input sequence 523 /// \param end Iterator pointing to the end of the input sequence 524 /// 525 /// \return Iterator pointing to one past the last read element of the input sequence 526 /// 527 //////////////////////////////////////////////////////////// 528 template <typename In> 529 static std::size_t count(In begin, In end); 530 531 //////////////////////////////////////////////////////////// 532 /// \brief Convert an ANSI characters range to UTF-32 533 /// 534 /// The current global locale will be used by default, unless you 535 /// pass a custom one in the \a locale parameter. 536 /// 537 /// \param begin Iterator pointing to the beginning of the input sequence 538 /// \param end Iterator pointing to the end of the input sequence 539 /// \param output Iterator pointing to the beginning of the output sequence 540 /// \param locale Locale to use for conversion 541 /// 542 /// \return Iterator to the end of the output sequence which has been written 543 /// 544 //////////////////////////////////////////////////////////// 545 template <typename In, typename Out> 546 static Out fromAnsi(In begin, In end, Out output, const std::locale& locale = std::locale()); 547 548 //////////////////////////////////////////////////////////// 549 /// \brief Convert a wide characters range to UTF-32 550 /// 551 /// \param begin Iterator pointing to the beginning of the input sequence 552 /// \param end Iterator pointing to the end of the input sequence 553 /// \param output Iterator pointing to the beginning of the output sequence 554 /// 555 /// \return Iterator to the end of the output sequence which has been written 556 /// 557 //////////////////////////////////////////////////////////// 558 template <typename In, typename Out> 559 static Out fromWide(In begin, In end, Out output); 560 561 //////////////////////////////////////////////////////////// 562 /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-32 563 /// 564 /// \param begin Iterator pointing to the beginning of the input sequence 565 /// \param end Iterator pointing to the end of the input sequence 566 /// \param output Iterator pointing to the beginning of the output sequence 567 /// 568 /// \return Iterator to the end of the output sequence which has been written 569 /// 570 //////////////////////////////////////////////////////////// 571 template <typename In, typename Out> 572 static Out fromLatin1(In begin, In end, Out output); 573 574 //////////////////////////////////////////////////////////// 575 /// \brief Convert an UTF-32 characters range to ANSI characters 576 /// 577 /// The current global locale will be used by default, unless you 578 /// pass a custom one in the \a locale parameter. 579 /// 580 /// \param begin Iterator pointing to the beginning of the input sequence 581 /// \param end Iterator pointing to the end of the input sequence 582 /// \param output Iterator pointing to the beginning of the output sequence 583 /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them) 584 /// \param locale Locale to use for conversion 585 /// 586 /// \return Iterator to the end of the output sequence which has been written 587 /// 588 //////////////////////////////////////////////////////////// 589 template <typename In, typename Out> 590 static Out toAnsi(In begin, In end, Out output, char replacement = 0, const std::locale& locale = std::locale()); 591 592 //////////////////////////////////////////////////////////// 593 /// \brief Convert an UTF-32 characters range to wide characters 594 /// 595 /// \param begin Iterator pointing to the beginning of the input sequence 596 /// \param end Iterator pointing to the end of the input sequence 597 /// \param output Iterator pointing to the beginning of the output sequence 598 /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) 599 /// 600 /// \return Iterator to the end of the output sequence which has been written 601 /// 602 //////////////////////////////////////////////////////////// 603 template <typename In, typename Out> 604 static Out toWide(In begin, In end, Out output, wchar_t replacement = 0); 605 606 //////////////////////////////////////////////////////////// 607 /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters 608 /// 609 /// \param begin Iterator pointing to the beginning of the input sequence 610 /// \param end Iterator pointing to the end of the input sequence 611 /// \param output Iterator pointing to the beginning of the output sequence 612 /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them) 613 /// 614 /// \return Iterator to the end of the output sequence which has been written 615 /// 616 //////////////////////////////////////////////////////////// 617 template <typename In, typename Out> 618 static Out toLatin1(In begin, In end, Out output, char replacement = 0); 619 620 //////////////////////////////////////////////////////////// 621 /// \brief Convert a UTF-32 characters range to UTF-8 622 /// 623 /// \param begin Iterator pointing to the beginning of the input sequence 624 /// \param end Iterator pointing to the end of the input sequence 625 /// \param output Iterator pointing to the beginning of the output sequence 626 /// 627 /// \return Iterator to the end of the output sequence which has been written 628 /// 629 //////////////////////////////////////////////////////////// 630 template <typename In, typename Out> 631 static Out toUtf8(In begin, In end, Out output); 632 633 //////////////////////////////////////////////////////////// 634 /// \brief Convert a UTF-32 characters range to UTF-16 635 /// 636 /// \param begin Iterator pointing to the beginning of the input sequence 637 /// \param end Iterator pointing to the end of the input sequence 638 /// \param output Iterator pointing to the beginning of the output sequence 639 /// 640 /// \return Iterator to the end of the output sequence which has been written 641 /// 642 //////////////////////////////////////////////////////////// 643 template <typename In, typename Out> 644 static Out toUtf16(In begin, In end, Out output); 645 646 //////////////////////////////////////////////////////////// 647 /// \brief Convert a UTF-32 characters range to UTF-32 648 /// 649 /// This functions does nothing more than a direct copy; 650 /// it is defined only to provide the same interface as other 651 /// specializations of the sf::Utf<> template, and allow 652 /// generic code to be written on top of it. 653 /// 654 /// \param begin Iterator pointing to the beginning of the input sequence 655 /// \param end Iterator pointing to the end of the input sequence 656 /// \param output Iterator pointing to the beginning of the output sequence 657 /// 658 /// \return Iterator to the end of the output sequence which has been written 659 /// 660 //////////////////////////////////////////////////////////// 661 template <typename In, typename Out> 662 static Out toUtf32(In begin, In end, Out output); 663 664 //////////////////////////////////////////////////////////// 665 /// \brief Decode a single ANSI character to UTF-32 666 /// 667 /// This function does not exist in other specializations 668 /// of sf::Utf<>, it is defined for convenience (it is used by 669 /// several other conversion functions). 670 /// 671 /// \param input Input ANSI character 672 /// \param locale Locale to use for conversion 673 /// 674 /// \return Converted character 675 /// 676 //////////////////////////////////////////////////////////// 677 template <typename In> 678 static Uint32 decodeAnsi(In input, const std::locale& locale = std::locale()); 679 680 //////////////////////////////////////////////////////////// 681 /// \brief Decode a single wide character to UTF-32 682 /// 683 /// This function does not exist in other specializations 684 /// of sf::Utf<>, it is defined for convenience (it is used by 685 /// several other conversion functions). 686 /// 687 /// \param input Input wide character 688 /// 689 /// \return Converted character 690 /// 691 //////////////////////////////////////////////////////////// 692 template <typename In> 693 static Uint32 decodeWide(In input); 694 695 //////////////////////////////////////////////////////////// 696 /// \brief Encode a single UTF-32 character to ANSI 697 /// 698 /// This function does not exist in other specializations 699 /// of sf::Utf<>, it is defined for convenience (it is used by 700 /// several other conversion functions). 701 /// 702 /// \param codepoint Iterator pointing to the beginning of the input sequence 703 /// \param output Iterator pointing to the beginning of the output sequence 704 /// \param replacement Replacement if the input character is not convertible to ANSI (use 0 to skip it) 705 /// \param locale Locale to use for conversion 706 /// 707 /// \return Iterator to the end of the output sequence which has been written 708 /// 709 //////////////////////////////////////////////////////////// 710 template <typename Out> 711 static Out encodeAnsi(Uint32 codepoint, Out output, char replacement = 0, const std::locale& locale = std::locale()); 712 713 //////////////////////////////////////////////////////////// 714 /// \brief Encode a single UTF-32 character to wide 715 /// 716 /// This function does not exist in other specializations 717 /// of sf::Utf<>, it is defined for convenience (it is used by 718 /// several other conversion functions). 719 /// 720 /// \param codepoint Iterator pointing to the beginning of the input sequence 721 /// \param output Iterator pointing to the beginning of the output sequence 722 /// \param replacement Replacement if the input character is not convertible to wide (use 0 to skip it) 723 /// 724 /// \return Iterator to the end of the output sequence which has been written 725 /// 726 //////////////////////////////////////////////////////////// 727 template <typename Out> 728 static Out encodeWide(Uint32 codepoint, Out output, wchar_t replacement = 0); 729 }; 730 731 #include <SFML/System/Utf.inl> 732 733 // Make typedefs to get rid of the template syntax 734 typedef Utf<8> Utf8; 735 typedef Utf<16> Utf16; 736 typedef Utf<32> Utf32; 737 738 } // namespace sf 739 740 741 #endif // SFML_UTF_HPP 742 743 744 //////////////////////////////////////////////////////////// 745 /// \class sf::Utf 746 /// \ingroup system 747 /// 748 /// Utility class providing generic functions for UTF conversions. 749 /// 750 /// sf::Utf is a low-level, generic interface for counting, iterating, 751 /// encoding and decoding Unicode characters and strings. It is able 752 /// to handle ANSI, wide, latin-1, UTF-8, UTF-16 and UTF-32 encodings. 753 /// 754 /// sf::Utf<X> functions are all static, these classes are not meant to 755 /// be instantiated. All the functions are template, so that you 756 /// can use any character / string type for a given encoding. 757 /// 758 /// It has 3 specializations: 759 /// \li sf::Utf<8> (typedef'd to sf::Utf8) 760 /// \li sf::Utf<16> (typedef'd to sf::Utf16) 761 /// \li sf::Utf<32> (typedef'd to sf::Utf32) 762 /// 763 //////////////////////////////////////////////////////////// 764