1 ////////////////////////////////////////////////////////////
2 //
3 // SFML - Simple and Fast Multimedia Library
4 // Copyright (C) 2007-2018 Laurent Gomila (laurent@sfml-dev.org)
5 //
6 // This software is provided 'as-is', without any express or implied warranty.
7 // In no event will the authors be held liable for any damages arising from the use of this software.
8 //
9 // Permission is granted to anyone to use this software for any purpose,
10 // including commercial applications, and to alter it and redistribute it freely,
11 // subject to the following restrictions:
12 //
13 // 1. The origin of this software must not be misrepresented;
14 //    you must not claim that you wrote the original software.
15 //    If you use this software in a product, an acknowledgment
16 //    in the product documentation would be appreciated but is not required.
17 //
18 // 2. Altered source versions must be plainly marked as such,
19 //    and must not be misrepresented as being the original software.
20 //
21 // 3. This notice may not be removed or altered from any source distribution.
22 //
23 ////////////////////////////////////////////////////////////
24 
25 #ifndef SFML_UTF_HPP
26 #define SFML_UTF_HPP
27 
28 ////////////////////////////////////////////////////////////
29 // Headers
30 ////////////////////////////////////////////////////////////
31 #include <SFML/Config.hpp>
32 #include <algorithm>
33 #include <locale>
34 #include <string>
35 #include <cstdlib>
36 
37 
38 namespace sf
39 {
40 template <unsigned int N>
41 class Utf;
42 
43 ////////////////////////////////////////////////////////////
44 /// \brief Specialization of the Utf template for UTF-8
45 ///
46 ////////////////////////////////////////////////////////////
47 template <>
48 class Utf<8>
49 {
50 public:
51 
52     ////////////////////////////////////////////////////////////
53     /// \brief Decode a single UTF-8 character
54     ///
55     /// Decoding a character means finding its unique 32-bits
56     /// code (called the codepoint) in the Unicode standard.
57     ///
58     /// \param begin       Iterator pointing to the beginning of the input sequence
59     /// \param end         Iterator pointing to the end of the input sequence
60     /// \param output      Codepoint of the decoded UTF-8 character
61     /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid
62     ///
63     /// \return Iterator pointing to one past the last read element of the input sequence
64     ///
65     ////////////////////////////////////////////////////////////
66     template <typename In>
67     static In decode(In begin, In end, Uint32& output, Uint32 replacement = 0);
68 
69     ////////////////////////////////////////////////////////////
70     /// \brief Encode a single UTF-8 character
71     ///
72     /// Encoding a character means converting a unique 32-bits
73     /// code (called the codepoint) in the target encoding, UTF-8.
74     ///
75     /// \param input       Codepoint to encode as UTF-8
76     /// \param output      Iterator pointing to the beginning of the output sequence
77     /// \param replacement Replacement for characters not convertible to UTF-8 (use 0 to skip them)
78     ///
79     /// \return Iterator to the end of the output sequence which has been written
80     ///
81     ////////////////////////////////////////////////////////////
82     template <typename Out>
83     static Out encode(Uint32 input, Out output, Uint8 replacement = 0);
84 
85     ////////////////////////////////////////////////////////////
86     /// \brief Advance to the next UTF-8 character
87     ///
88     /// This function is necessary for multi-elements encodings, as
89     /// a single character may use more than 1 storage element.
90     ///
91     /// \param begin Iterator pointing to the beginning of the input sequence
92     /// \param end   Iterator pointing to the end of the input sequence
93     ///
94     /// \return Iterator pointing to one past the last read element of the input sequence
95     ///
96     ////////////////////////////////////////////////////////////
97     template <typename In>
98     static In next(In begin, In end);
99 
100     ////////////////////////////////////////////////////////////
101     /// \brief Count the number of characters of a UTF-8 sequence
102     ///
103     /// This function is necessary for multi-elements encodings, as
104     /// a single character may use more than 1 storage element, thus the
105     /// total size can be different from (begin - end).
106     ///
107     /// \param begin Iterator pointing to the beginning of the input sequence
108     /// \param end   Iterator pointing to the end of the input sequence
109     ///
110     /// \return Iterator pointing to one past the last read element of the input sequence
111     ///
112     ////////////////////////////////////////////////////////////
113     template <typename In>
114     static std::size_t count(In begin, In end);
115 
116     ////////////////////////////////////////////////////////////
117     /// \brief Convert an ANSI characters range to UTF-8
118     ///
119     /// The current global locale will be used by default, unless you
120     /// pass a custom one in the \a locale parameter.
121     ///
122     /// \param begin  Iterator pointing to the beginning of the input sequence
123     /// \param end    Iterator pointing to the end of the input sequence
124     /// \param output Iterator pointing to the beginning of the output sequence
125     /// \param locale Locale to use for conversion
126     ///
127     /// \return Iterator to the end of the output sequence which has been written
128     ///
129     ////////////////////////////////////////////////////////////
130     template <typename In, typename Out>
131     static Out fromAnsi(In begin, In end, Out output, const std::locale& locale = std::locale());
132 
133     ////////////////////////////////////////////////////////////
134     /// \brief Convert a wide characters range to UTF-8
135     ///
136     /// \param begin  Iterator pointing to the beginning of the input sequence
137     /// \param end    Iterator pointing to the end of the input sequence
138     /// \param output Iterator pointing to the beginning of the output sequence
139     ///
140     /// \return Iterator to the end of the output sequence which has been written
141     ///
142     ////////////////////////////////////////////////////////////
143     template <typename In, typename Out>
144     static Out fromWide(In begin, In end, Out output);
145 
146     ////////////////////////////////////////////////////////////
147     /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-8
148     ///
149     /// \param begin  Iterator pointing to the beginning of the input sequence
150     /// \param end    Iterator pointing to the end of the input sequence
151     /// \param output Iterator pointing to the beginning of the output sequence
152     ///
153     /// \return Iterator to the end of the output sequence which has been written
154     ///
155     ////////////////////////////////////////////////////////////
156     template <typename In, typename Out>
157     static Out fromLatin1(In begin, In end, Out output);
158 
159     ////////////////////////////////////////////////////////////
160     /// \brief Convert an UTF-8 characters range to ANSI characters
161     ///
162     /// The current global locale will be used by default, unless you
163     /// pass a custom one in the \a locale parameter.
164     ///
165     /// \param begin       Iterator pointing to the beginning of the input sequence
166     /// \param end         Iterator pointing to the end of the input sequence
167     /// \param output      Iterator pointing to the beginning of the output sequence
168     /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them)
169     /// \param locale      Locale to use for conversion
170     ///
171     /// \return Iterator to the end of the output sequence which has been written
172     ///
173     ////////////////////////////////////////////////////////////
174     template <typename In, typename Out>
175     static Out toAnsi(In begin, In end, Out output, char replacement = 0, const std::locale& locale = std::locale());
176 
177     ////////////////////////////////////////////////////////////
178     /// \brief Convert an UTF-8 characters range to wide characters
179     ///
180     /// \param begin       Iterator pointing to the beginning of the input sequence
181     /// \param end         Iterator pointing to the end of the input sequence
182     /// \param output      Iterator pointing to the beginning of the output sequence
183     /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
184     ///
185     /// \return Iterator to the end of the output sequence which has been written
186     ///
187     ////////////////////////////////////////////////////////////
188     template <typename In, typename Out>
189     static Out toWide(In begin, In end, Out output, wchar_t replacement = 0);
190 
191     ////////////////////////////////////////////////////////////
192     /// \brief Convert an UTF-8 characters range to latin-1 (ISO-5589-1) characters
193     ///
194     /// \param begin       Iterator pointing to the beginning of the input sequence
195     /// \param end         Iterator pointing to the end of the input sequence
196     /// \param output      Iterator pointing to the beginning of the output sequence
197     /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
198     ///
199     /// \return Iterator to the end of the output sequence which has been written
200     ///
201     ////////////////////////////////////////////////////////////
202     template <typename In, typename Out>
203     static Out toLatin1(In begin, In end, Out output, char replacement = 0);
204 
205     ////////////////////////////////////////////////////////////
206     /// \brief Convert a UTF-8 characters range to UTF-8
207     ///
208     /// This functions does nothing more than a direct copy;
209     /// it is defined only to provide the same interface as other
210     /// specializations of the sf::Utf<> template, and allow
211     /// generic code to be written on top of it.
212     ///
213     /// \param begin  Iterator pointing to the beginning of the input sequence
214     /// \param end    Iterator pointing to the end of the input sequence
215     /// \param output Iterator pointing to the beginning of the output sequence
216     ///
217     /// \return Iterator to the end of the output sequence which has been written
218     ///
219     ////////////////////////////////////////////////////////////
220     template <typename In, typename Out>
221     static Out toUtf8(In begin, In end, Out output);
222 
223     ////////////////////////////////////////////////////////////
224     /// \brief Convert a UTF-8 characters range to UTF-16
225     ///
226     /// \param begin  Iterator pointing to the beginning of the input sequence
227     /// \param end    Iterator pointing to the end of the input sequence
228     /// \param output Iterator pointing to the beginning of the output sequence
229     ///
230     /// \return Iterator to the end of the output sequence which has been written
231     ///
232     ////////////////////////////////////////////////////////////
233     template <typename In, typename Out>
234     static Out toUtf16(In begin, In end, Out output);
235 
236     ////////////////////////////////////////////////////////////
237     /// \brief Convert a UTF-8 characters range to UTF-32
238     ///
239     /// \param begin  Iterator pointing to the beginning of the input sequence
240     /// \param end    Iterator pointing to the end of the input sequence
241     /// \param output Iterator pointing to the beginning of the output sequence
242     ///
243     /// \return Iterator to the end of the output sequence which has been written
244     ///
245     ////////////////////////////////////////////////////////////
246     template <typename In, typename Out>
247     static Out toUtf32(In begin, In end, Out output);
248 };
249 
250 ////////////////////////////////////////////////////////////
251 /// \brief Specialization of the Utf template for UTF-16
252 ///
253 ////////////////////////////////////////////////////////////
254 template <>
255 class Utf<16>
256 {
257 public:
258 
259     ////////////////////////////////////////////////////////////
260     /// \brief Decode a single UTF-16 character
261     ///
262     /// Decoding a character means finding its unique 32-bits
263     /// code (called the codepoint) in the Unicode standard.
264     ///
265     /// \param begin       Iterator pointing to the beginning of the input sequence
266     /// \param end         Iterator pointing to the end of the input sequence
267     /// \param output      Codepoint of the decoded UTF-16 character
268     /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid
269     ///
270     /// \return Iterator pointing to one past the last read element of the input sequence
271     ///
272     ////////////////////////////////////////////////////////////
273     template <typename In>
274     static In decode(In begin, In end, Uint32& output, Uint32 replacement = 0);
275 
276     ////////////////////////////////////////////////////////////
277     /// \brief Encode a single UTF-16 character
278     ///
279     /// Encoding a character means converting a unique 32-bits
280     /// code (called the codepoint) in the target encoding, UTF-16.
281     ///
282     /// \param input       Codepoint to encode as UTF-16
283     /// \param output      Iterator pointing to the beginning of the output sequence
284     /// \param replacement Replacement for characters not convertible to UTF-16 (use 0 to skip them)
285     ///
286     /// \return Iterator to the end of the output sequence which has been written
287     ///
288     ////////////////////////////////////////////////////////////
289     template <typename Out>
290     static Out encode(Uint32 input, Out output, Uint16 replacement = 0);
291 
292     ////////////////////////////////////////////////////////////
293     /// \brief Advance to the next UTF-16 character
294     ///
295     /// This function is necessary for multi-elements encodings, as
296     /// a single character may use more than 1 storage element.
297     ///
298     /// \param begin Iterator pointing to the beginning of the input sequence
299     /// \param end   Iterator pointing to the end of the input sequence
300     ///
301     /// \return Iterator pointing to one past the last read element of the input sequence
302     ///
303     ////////////////////////////////////////////////////////////
304     template <typename In>
305     static In next(In begin, In end);
306 
307     ////////////////////////////////////////////////////////////
308     /// \brief Count the number of characters of a UTF-16 sequence
309     ///
310     /// This function is necessary for multi-elements encodings, as
311     /// a single character may use more than 1 storage element, thus the
312     /// total size can be different from (begin - end).
313     ///
314     /// \param begin Iterator pointing to the beginning of the input sequence
315     /// \param end   Iterator pointing to the end of the input sequence
316     ///
317     /// \return Iterator pointing to one past the last read element of the input sequence
318     ///
319     ////////////////////////////////////////////////////////////
320     template <typename In>
321     static std::size_t count(In begin, In end);
322 
323     ////////////////////////////////////////////////////////////
324     /// \brief Convert an ANSI characters range to UTF-16
325     ///
326     /// The current global locale will be used by default, unless you
327     /// pass a custom one in the \a locale parameter.
328     ///
329     /// \param begin  Iterator pointing to the beginning of the input sequence
330     /// \param end    Iterator pointing to the end of the input sequence
331     /// \param output Iterator pointing to the beginning of the output sequence
332     /// \param locale Locale to use for conversion
333     ///
334     /// \return Iterator to the end of the output sequence which has been written
335     ///
336     ////////////////////////////////////////////////////////////
337     template <typename In, typename Out>
338     static Out fromAnsi(In begin, In end, Out output, const std::locale& locale = std::locale());
339 
340     ////////////////////////////////////////////////////////////
341     /// \brief Convert a wide characters range to UTF-16
342     ///
343     /// \param begin  Iterator pointing to the beginning of the input sequence
344     /// \param end    Iterator pointing to the end of the input sequence
345     /// \param output Iterator pointing to the beginning of the output sequence
346     ///
347     /// \return Iterator to the end of the output sequence which has been written
348     ///
349     ////////////////////////////////////////////////////////////
350     template <typename In, typename Out>
351     static Out fromWide(In begin, In end, Out output);
352 
353     ////////////////////////////////////////////////////////////
354     /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-16
355     ///
356     /// \param begin  Iterator pointing to the beginning of the input sequence
357     /// \param end    Iterator pointing to the end of the input sequence
358     /// \param output Iterator pointing to the beginning of the output sequence
359     ///
360     /// \return Iterator to the end of the output sequence which has been written
361     ///
362     ////////////////////////////////////////////////////////////
363     template <typename In, typename Out>
364     static Out fromLatin1(In begin, In end, Out output);
365 
366     ////////////////////////////////////////////////////////////
367     /// \brief Convert an UTF-16 characters range to ANSI characters
368     ///
369     /// The current global locale will be used by default, unless you
370     /// pass a custom one in the \a locale parameter.
371     ///
372     /// \param begin       Iterator pointing to the beginning of the input sequence
373     /// \param end         Iterator pointing to the end of the input sequence
374     /// \param output      Iterator pointing to the beginning of the output sequence
375     /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them)
376     /// \param locale      Locale to use for conversion
377     ///
378     /// \return Iterator to the end of the output sequence which has been written
379     ///
380     ////////////////////////////////////////////////////////////
381     template <typename In, typename Out>
382     static Out toAnsi(In begin, In end, Out output, char replacement = 0, const std::locale& locale = std::locale());
383 
384     ////////////////////////////////////////////////////////////
385     /// \brief Convert an UTF-16 characters range to wide characters
386     ///
387     /// \param begin       Iterator pointing to the beginning of the input sequence
388     /// \param end         Iterator pointing to the end of the input sequence
389     /// \param output      Iterator pointing to the beginning of the output sequence
390     /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
391     ///
392     /// \return Iterator to the end of the output sequence which has been written
393     ///
394     ////////////////////////////////////////////////////////////
395     template <typename In, typename Out>
396     static Out toWide(In begin, In end, Out output, wchar_t replacement = 0);
397 
398     ////////////////////////////////////////////////////////////
399     /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters
400     ///
401     /// \param begin       Iterator pointing to the beginning of the input sequence
402     /// \param end         Iterator pointing to the end of the input sequence
403     /// \param output      Iterator pointing to the beginning of the output sequence
404     /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
405     ///
406     /// \return Iterator to the end of the output sequence which has been written
407     ///
408     ////////////////////////////////////////////////////////////
409     template <typename In, typename Out>
410     static Out toLatin1(In begin, In end, Out output, char replacement = 0);
411 
412     ////////////////////////////////////////////////////////////
413     /// \brief Convert a UTF-16 characters range to UTF-8
414     ///
415     /// \param begin  Iterator pointing to the beginning of the input sequence
416     /// \param end    Iterator pointing to the end of the input sequence
417     /// \param output Iterator pointing to the beginning of the output sequence
418     ///
419     /// \return Iterator to the end of the output sequence which has been written
420     ///
421     ////////////////////////////////////////////////////////////
422     template <typename In, typename Out>
423     static Out toUtf8(In begin, In end, Out output);
424 
425     ////////////////////////////////////////////////////////////
426     /// \brief Convert a UTF-16 characters range to UTF-16
427     ///
428     /// This functions does nothing more than a direct copy;
429     /// it is defined only to provide the same interface as other
430     /// specializations of the sf::Utf<> template, and allow
431     /// generic code to be written on top of it.
432     ///
433     /// \param begin  Iterator pointing to the beginning of the input sequence
434     /// \param end    Iterator pointing to the end of the input sequence
435     /// \param output Iterator pointing to the beginning of the output sequence
436     ///
437     /// \return Iterator to the end of the output sequence which has been written
438     ///
439     ////////////////////////////////////////////////////////////
440     template <typename In, typename Out>
441     static Out toUtf16(In begin, In end, Out output);
442 
443     ////////////////////////////////////////////////////////////
444     /// \brief Convert a UTF-16 characters range to UTF-32
445     ///
446     /// \param begin  Iterator pointing to the beginning of the input sequence
447     /// \param end    Iterator pointing to the end of the input sequence
448     /// \param output Iterator pointing to the beginning of the output sequence
449     ///
450     /// \return Iterator to the end of the output sequence which has been written
451     ///
452     ////////////////////////////////////////////////////////////
453     template <typename In, typename Out>
454     static Out toUtf32(In begin, In end, Out output);
455 };
456 
457 ////////////////////////////////////////////////////////////
458 /// \brief Specialization of the Utf template for UTF-32
459 ///
460 ////////////////////////////////////////////////////////////
461 template <>
462 class Utf<32>
463 {
464 public:
465 
466     ////////////////////////////////////////////////////////////
467     /// \brief Decode a single UTF-32 character
468     ///
469     /// Decoding a character means finding its unique 32-bits
470     /// code (called the codepoint) in the Unicode standard.
471     /// For UTF-32, the character value is the same as the codepoint.
472     ///
473     /// \param begin       Iterator pointing to the beginning of the input sequence
474     /// \param end         Iterator pointing to the end of the input sequence
475     /// \param output      Codepoint of the decoded UTF-32 character
476     /// \param replacement Replacement character to use in case the UTF-8 sequence is invalid
477     ///
478     /// \return Iterator pointing to one past the last read element of the input sequence
479     ///
480     ////////////////////////////////////////////////////////////
481     template <typename In>
482     static In decode(In begin, In end, Uint32& output, Uint32 replacement = 0);
483 
484     ////////////////////////////////////////////////////////////
485     /// \brief Encode a single UTF-32 character
486     ///
487     /// Encoding a character means converting a unique 32-bits
488     /// code (called the codepoint) in the target encoding, UTF-32.
489     /// For UTF-32, the codepoint is the same as the character value.
490     ///
491     /// \param input       Codepoint to encode as UTF-32
492     /// \param output      Iterator pointing to the beginning of the output sequence
493     /// \param replacement Replacement for characters not convertible to UTF-32 (use 0 to skip them)
494     ///
495     /// \return Iterator to the end of the output sequence which has been written
496     ///
497     ////////////////////////////////////////////////////////////
498     template <typename Out>
499     static Out encode(Uint32 input, Out output, Uint32 replacement = 0);
500 
501     ////////////////////////////////////////////////////////////
502     /// \brief Advance to the next UTF-32 character
503     ///
504     /// This function is trivial for UTF-32, which can store
505     /// every character in a single storage element.
506     ///
507     /// \param begin Iterator pointing to the beginning of the input sequence
508     /// \param end   Iterator pointing to the end of the input sequence
509     ///
510     /// \return Iterator pointing to one past the last read element of the input sequence
511     ///
512     ////////////////////////////////////////////////////////////
513     template <typename In>
514     static In next(In begin, In end);
515 
516     ////////////////////////////////////////////////////////////
517     /// \brief Count the number of characters of a UTF-32 sequence
518     ///
519     /// This function is trivial for UTF-32, which can store
520     /// every character in a single storage element.
521     ///
522     /// \param begin Iterator pointing to the beginning of the input sequence
523     /// \param end   Iterator pointing to the end of the input sequence
524     ///
525     /// \return Iterator pointing to one past the last read element of the input sequence
526     ///
527     ////////////////////////////////////////////////////////////
528     template <typename In>
529     static std::size_t count(In begin, In end);
530 
531     ////////////////////////////////////////////////////////////
532     /// \brief Convert an ANSI characters range to UTF-32
533     ///
534     /// The current global locale will be used by default, unless you
535     /// pass a custom one in the \a locale parameter.
536     ///
537     /// \param begin  Iterator pointing to the beginning of the input sequence
538     /// \param end    Iterator pointing to the end of the input sequence
539     /// \param output Iterator pointing to the beginning of the output sequence
540     /// \param locale Locale to use for conversion
541     ///
542     /// \return Iterator to the end of the output sequence which has been written
543     ///
544     ////////////////////////////////////////////////////////////
545     template <typename In, typename Out>
546     static Out fromAnsi(In begin, In end, Out output, const std::locale& locale = std::locale());
547 
548     ////////////////////////////////////////////////////////////
549     /// \brief Convert a wide characters range to UTF-32
550     ///
551     /// \param begin  Iterator pointing to the beginning of the input sequence
552     /// \param end    Iterator pointing to the end of the input sequence
553     /// \param output Iterator pointing to the beginning of the output sequence
554     ///
555     /// \return Iterator to the end of the output sequence which has been written
556     ///
557     ////////////////////////////////////////////////////////////
558     template <typename In, typename Out>
559     static Out fromWide(In begin, In end, Out output);
560 
561     ////////////////////////////////////////////////////////////
562     /// \brief Convert a latin-1 (ISO-5589-1) characters range to UTF-32
563     ///
564     /// \param begin  Iterator pointing to the beginning of the input sequence
565     /// \param end    Iterator pointing to the end of the input sequence
566     /// \param output Iterator pointing to the beginning of the output sequence
567     ///
568     /// \return Iterator to the end of the output sequence which has been written
569     ///
570     ////////////////////////////////////////////////////////////
571     template <typename In, typename Out>
572     static Out fromLatin1(In begin, In end, Out output);
573 
574     ////////////////////////////////////////////////////////////
575     /// \brief Convert an UTF-32 characters range to ANSI characters
576     ///
577     /// The current global locale will be used by default, unless you
578     /// pass a custom one in the \a locale parameter.
579     ///
580     /// \param begin       Iterator pointing to the beginning of the input sequence
581     /// \param end         Iterator pointing to the end of the input sequence
582     /// \param output      Iterator pointing to the beginning of the output sequence
583     /// \param replacement Replacement for characters not convertible to ANSI (use 0 to skip them)
584     /// \param locale      Locale to use for conversion
585     ///
586     /// \return Iterator to the end of the output sequence which has been written
587     ///
588     ////////////////////////////////////////////////////////////
589     template <typename In, typename Out>
590     static Out toAnsi(In begin, In end, Out output, char replacement = 0, const std::locale& locale = std::locale());
591 
592     ////////////////////////////////////////////////////////////
593     /// \brief Convert an UTF-32 characters range to wide characters
594     ///
595     /// \param begin       Iterator pointing to the beginning of the input sequence
596     /// \param end         Iterator pointing to the end of the input sequence
597     /// \param output      Iterator pointing to the beginning of the output sequence
598     /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
599     ///
600     /// \return Iterator to the end of the output sequence which has been written
601     ///
602     ////////////////////////////////////////////////////////////
603     template <typename In, typename Out>
604     static Out toWide(In begin, In end, Out output, wchar_t replacement = 0);
605 
606     ////////////////////////////////////////////////////////////
607     /// \brief Convert an UTF-16 characters range to latin-1 (ISO-5589-1) characters
608     ///
609     /// \param begin       Iterator pointing to the beginning of the input sequence
610     /// \param end         Iterator pointing to the end of the input sequence
611     /// \param output      Iterator pointing to the beginning of the output sequence
612     /// \param replacement Replacement for characters not convertible to wide (use 0 to skip them)
613     ///
614     /// \return Iterator to the end of the output sequence which has been written
615     ///
616     ////////////////////////////////////////////////////////////
617     template <typename In, typename Out>
618     static Out toLatin1(In begin, In end, Out output, char replacement = 0);
619 
620     ////////////////////////////////////////////////////////////
621     /// \brief Convert a UTF-32 characters range to UTF-8
622     ///
623     /// \param begin  Iterator pointing to the beginning of the input sequence
624     /// \param end    Iterator pointing to the end of the input sequence
625     /// \param output Iterator pointing to the beginning of the output sequence
626     ///
627     /// \return Iterator to the end of the output sequence which has been written
628     ///
629     ////////////////////////////////////////////////////////////
630     template <typename In, typename Out>
631     static Out toUtf8(In begin, In end, Out output);
632 
633     ////////////////////////////////////////////////////////////
634     /// \brief Convert a UTF-32 characters range to UTF-16
635     ///
636     /// \param begin  Iterator pointing to the beginning of the input sequence
637     /// \param end    Iterator pointing to the end of the input sequence
638     /// \param output Iterator pointing to the beginning of the output sequence
639     ///
640     /// \return Iterator to the end of the output sequence which has been written
641     ///
642     ////////////////////////////////////////////////////////////
643     template <typename In, typename Out>
644     static Out toUtf16(In begin, In end, Out output);
645 
646     ////////////////////////////////////////////////////////////
647     /// \brief Convert a UTF-32 characters range to UTF-32
648     ///
649     /// This functions does nothing more than a direct copy;
650     /// it is defined only to provide the same interface as other
651     /// specializations of the sf::Utf<> template, and allow
652     /// generic code to be written on top of it.
653     ///
654     /// \param begin  Iterator pointing to the beginning of the input sequence
655     /// \param end    Iterator pointing to the end of the input sequence
656     /// \param output Iterator pointing to the beginning of the output sequence
657     ///
658     /// \return Iterator to the end of the output sequence which has been written
659     ///
660     ////////////////////////////////////////////////////////////
661     template <typename In, typename Out>
662     static Out toUtf32(In begin, In end, Out output);
663 
664     ////////////////////////////////////////////////////////////
665     /// \brief Decode a single ANSI character to UTF-32
666     ///
667     /// This function does not exist in other specializations
668     /// of sf::Utf<>, it is defined for convenience (it is used by
669     /// several other conversion functions).
670     ///
671     /// \param input  Input ANSI character
672     /// \param locale Locale to use for conversion
673     ///
674     /// \return Converted character
675     ///
676     ////////////////////////////////////////////////////////////
677     template <typename In>
678     static Uint32 decodeAnsi(In input, const std::locale& locale = std::locale());
679 
680     ////////////////////////////////////////////////////////////
681     /// \brief Decode a single wide character to UTF-32
682     ///
683     /// This function does not exist in other specializations
684     /// of sf::Utf<>, it is defined for convenience (it is used by
685     /// several other conversion functions).
686     ///
687     /// \param input Input wide character
688     ///
689     /// \return Converted character
690     ///
691     ////////////////////////////////////////////////////////////
692     template <typename In>
693     static Uint32 decodeWide(In input);
694 
695     ////////////////////////////////////////////////////////////
696     /// \brief Encode a single UTF-32 character to ANSI
697     ///
698     /// This function does not exist in other specializations
699     /// of sf::Utf<>, it is defined for convenience (it is used by
700     /// several other conversion functions).
701     ///
702     /// \param codepoint   Iterator pointing to the beginning of the input sequence
703     /// \param output      Iterator pointing to the beginning of the output sequence
704     /// \param replacement Replacement if the input character is not convertible to ANSI (use 0 to skip it)
705     /// \param locale      Locale to use for conversion
706     ///
707     /// \return Iterator to the end of the output sequence which has been written
708     ///
709     ////////////////////////////////////////////////////////////
710     template <typename Out>
711     static Out encodeAnsi(Uint32 codepoint, Out output, char replacement = 0, const std::locale& locale = std::locale());
712 
713     ////////////////////////////////////////////////////////////
714     /// \brief Encode a single UTF-32 character to wide
715     ///
716     /// This function does not exist in other specializations
717     /// of sf::Utf<>, it is defined for convenience (it is used by
718     /// several other conversion functions).
719     ///
720     /// \param codepoint   Iterator pointing to the beginning of the input sequence
721     /// \param output      Iterator pointing to the beginning of the output sequence
722     /// \param replacement Replacement if the input character is not convertible to wide (use 0 to skip it)
723     ///
724     /// \return Iterator to the end of the output sequence which has been written
725     ///
726     ////////////////////////////////////////////////////////////
727     template <typename Out>
728     static Out encodeWide(Uint32 codepoint, Out output, wchar_t replacement = 0);
729 };
730 
731 #include <SFML/System/Utf.inl>
732 
733 // Make typedefs to get rid of the template syntax
734 typedef Utf<8>  Utf8;
735 typedef Utf<16> Utf16;
736 typedef Utf<32> Utf32;
737 
738 } // namespace sf
739 
740 
741 #endif // SFML_UTF_HPP
742 
743 
744 ////////////////////////////////////////////////////////////
745 /// \class sf::Utf
746 /// \ingroup system
747 ///
748 /// Utility class providing generic functions for UTF conversions.
749 ///
750 /// sf::Utf is a low-level, generic interface for counting, iterating,
751 /// encoding and decoding Unicode characters and strings. It is able
752 /// to handle ANSI, wide, latin-1, UTF-8, UTF-16 and UTF-32 encodings.
753 ///
754 /// sf::Utf<X> functions are all static, these classes are not meant to
755 /// be instantiated. All the functions are template, so that you
756 /// can use any character / string type for a given encoding.
757 ///
758 /// It has 3 specializations:
759 /// \li sf::Utf<8> (typedef'd to sf::Utf8)
760 /// \li sf::Utf<16> (typedef'd to sf::Utf16)
761 /// \li sf::Utf<32> (typedef'd to sf::Utf32)
762 ///
763 ////////////////////////////////////////////////////////////
764