1 // 2 // UTF8Encoding.h 3 // 4 // Library: Foundation 5 // Package: Text 6 // Module: UTF8Encoding 7 // 8 // Definition of the UTF8Encoding class. 9 // 10 // Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH. 11 // and Contributors. 12 // 13 // SPDX-License-Identifier: BSL-1.0 14 // 15 16 17 #ifndef Foundation_UTF8Encoding_INCLUDED 18 #define Foundation_UTF8Encoding_INCLUDED 19 20 21 #include "Poco/Foundation.h" 22 #include "Poco/TextEncoding.h" 23 24 25 namespace Poco { 26 27 28 class Foundation_API UTF8Encoding: public TextEncoding 29 /// UTF-8 text encoding, as defined in RFC 2279. 30 { 31 public: 32 UTF8Encoding(); 33 ~UTF8Encoding(); 34 const char* canonicalName() const; 35 bool isA(const std::string& encodingName) const; 36 const CharacterMap& characterMap() const; 37 int convert(const unsigned char* bytes) const; 38 int convert(int ch, unsigned char* bytes, int length) const; 39 int queryConvert(const unsigned char* bytes, int length) const; 40 int sequenceLength(const unsigned char* bytes, int length) const; 41 42 static bool isLegal(const unsigned char *bytes, int length); 43 /// Utility routine to tell whether a sequence of bytes is legal UTF-8. 44 /// This must be called with the length pre-determined by the first byte. 45 /// The sequence is illegal right away if there aren't enough bytes 46 /// available. If presented with a length > 4, this function returns false. 47 /// The Unicode definition of UTF-8 goes up to 4-byte sequences. 48 /// 49 /// Adapted from ftp://ftp.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c 50 /// Copyright 2001-2004 Unicode, Inc. 51 52 private: 53 static const char* _names[]; 54 static const CharacterMap _charMap; 55 }; 56 57 58 } // namespace Poco 59 60 61 #endif // Foundation_UTF8Encoding_INCLUDED 62