1 #ifndef COMPONENTS_TOUTF8_H
2 #define COMPONENTS_TOUTF8_H
3 
4 #include <string>
5 #include <cstring>
6 #include <vector>
7 
8 namespace ToUTF8
9 {
10     // These are all the currently supported code pages
11     enum FromType
12     {
13         WINDOWS_1250,      // Central ane Eastern European languages
14         WINDOWS_1251,      // Cyrillic languages
15         WINDOWS_1252,       // Used by English version of Morrowind (and
16             // probably others)
17         CP437           // Used for fonts (*.fnt) if data files encoding is 1252. Otherwise, uses the same encoding as the data files.
18     };
19 
20     FromType calculateEncoding(const std::string& encodingName);
21     std::string encodingUsingMessage(const std::string& encodingName);
22 
23     // class
24 
25     class Utf8Encoder
26     {
27         public:
28             Utf8Encoder(FromType sourceEncoding);
29 
30             // Convert to UTF8 from the previously given code page.
31             std::string getUtf8(const char *input, size_t size);
getUtf8(const std::string & str)32             inline std::string getUtf8(const std::string &str)
33             {
34                 return getUtf8(str.c_str(), str.size());
35             }
36 
37             std::string getLegacyEnc(const char *input, size_t size);
getLegacyEnc(const std::string & str)38             inline std::string getLegacyEnc(const std::string &str)
39             {
40                 return getLegacyEnc(str.c_str(), str.size());
41             }
42 
43         private:
44             void resize(size_t size);
45             size_t getLength(const char* input, bool &ascii);
46             void copyFromArray(unsigned char chp, char* &out);
47             size_t getLength2(const char* input, bool &ascii);
48             void copyFromArray2(const char*& chp, char* &out);
49 
50             std::vector<char> mOutput;
51             signed char* translationArray;
52     };
53 }
54 
55 #endif
56