1 //
2 // UTF8Encoding.h
3 //
4 // Library: Foundation
5 // Package: Text
6 // Module:  UTF8Encoding
7 //
8 // Definition of the UTF8Encoding class.
9 //
10 // Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH.
11 // and Contributors.
12 //
13 // SPDX-License-Identifier:	BSL-1.0
14 //
15 
16 
17 #ifndef Foundation_UTF8Encoding_INCLUDED
18 #define Foundation_UTF8Encoding_INCLUDED
19 
20 
21 #include "Poco/Foundation.h"
22 #include "Poco/TextEncoding.h"
23 
24 
25 namespace Poco {
26 
27 
28 class Foundation_API UTF8Encoding: public TextEncoding
29 	/// UTF-8 text encoding, as defined in RFC 2279.
30 {
31 public:
32 	UTF8Encoding();
33 	~UTF8Encoding();
34 	const char* canonicalName() const;
35 	bool isA(const std::string& encodingName) const;
36 	const CharacterMap& characterMap() const;
37 	int convert(const unsigned char* bytes) const;
38 	int convert(int ch, unsigned char* bytes, int length) const;
39 	int queryConvert(const unsigned char* bytes, int length) const;
40 	int sequenceLength(const unsigned char* bytes, int length) const;
41 
42 	static bool isLegal(const unsigned char *bytes, int length);
43 		/// Utility routine to tell whether a sequence of bytes is legal UTF-8.
44 		/// This must be called with the length pre-determined by the first byte.
45 		/// The sequence is illegal right away if there aren't enough bytes
46 		/// available. If presented with a length > 4, this function returns false.
47 		/// The Unicode definition of UTF-8 goes up to 4-byte sequences.
48 		///
49 		/// Adapted from ftp://ftp.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
50 		/// Copyright 2001-2004 Unicode, Inc.
51 
52 private:
53 	static const char* _names[];
54 	static const CharacterMap _charMap;
55 };
56 
57 
58 } // namespace Poco
59 
60 
61 #endif // Foundation_UTF8Encoding_INCLUDED
62