1 /* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
2    file Copyright.txt or https://cmake.org/licensing#kwsys for details.  */
3 #include "kwsysPrivate.h"
4 
5 #if defined(_MSC_VER)
6 #  pragma warning(disable : 4786)
7 #endif
8 
9 #include KWSYS_HEADER(Encoding.hxx)
10 #include KWSYS_HEADER(Encoding.h)
11 
12 #include <algorithm>
13 #include <clocale>
14 #include <cstdlib>
15 #include <cstring>
16 #include <iostream>
17 
18 // Work-around CMake dependency scanning limitation.  This must
19 // duplicate the above list of headers.
20 #if 0
21 #  include "Encoding.h.in"
22 #  include "Encoding.hxx.in"
23 #endif
24 
25 static const unsigned char helloWorldStrings[][32] = {
26   // English
27   { 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', 0 },
28   // Japanese
29   { 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81,
30     0xA1, 0xE3, 0x81, 0xAF, 0xE4, 0xB8, 0x96, 0xE7, 0x95, 0x8C, 0 },
31   // Arabic
32   { 0xD9, 0x85, 0xD8, 0xB1, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xA7, 0x20, 0xD8,
33     0xA7, 0xD9, 0x84, 0xD8, 0xB9, 0xD8, 0xA7, 0xD9, 0x84, 0xD9, 0x85, 0 },
34   // Yiddish
35   { 0xD7, 0x94, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x90, 0x20, 0xD7,
36     0x95, 0xD7, 0x95, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x98, 0 },
37   // Russian
38   { 0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5,
39     0xD1, 0x82, 0x20, 0xD0, 0xBC, 0xD0, 0xB8, 0xD1, 0x80, 0 },
40   // Latin
41   { 0x4D, 0x75, 0x6E, 0x64, 0x75, 0x73, 0x20, 0x73, 0x61, 0x6C, 0x76, 0x65,
42     0 },
43   // Swahili
44   { 0x68, 0x75, 0x6A, 0x61, 0x6D, 0x62, 0x6F, 0x20, 0x44, 0x75, 0x6E, 0x69,
45     0x61, 0 },
46   // Icelandic
47   { 0x48, 0x61, 0x6C, 0x6C, 0xC3, 0xB3, 0x20, 0x68, 0x65, 0x69, 0x6D, 0x75,
48     0x72, 0 },
49   { 0 }
50 };
51 
testHelloWorldEncoding()52 static int testHelloWorldEncoding()
53 {
54   int ret = 0;
55   for (int i = 0; helloWorldStrings[i][0] != 0; i++) {
56     std::string str = reinterpret_cast<const char*>(helloWorldStrings[i]);
57     std::cout << str << std::endl;
58     std::wstring wstr = kwsys::Encoding::ToWide(str);
59     std::string str2 = kwsys::Encoding::ToNarrow(wstr);
60     wchar_t* c_wstr = kwsysEncoding_DupToWide(str.c_str());
61     char* c_str2 = kwsysEncoding_DupToNarrow(c_wstr);
62     if (!wstr.empty() && (str != str2 || strcmp(c_str2, str.c_str()) != 0)) {
63       std::cout << "converted string was different: " << str2 << std::endl;
64       std::cout << "converted string was different: " << c_str2 << std::endl;
65       ret++;
66     }
67     free(c_wstr);
68     free(c_str2);
69   }
70   return ret;
71 }
72 
testRobustEncoding()73 static int testRobustEncoding()
74 {
75   // test that the conversion functions handle invalid
76   // unicode correctly/gracefully
77 
78   // we manipulate the format flags of stdout, remember
79   // the original state here to restore before return
80   std::ios::fmtflags const& flags = std::cout.flags();
81 
82   int ret = 0;
83   char cstr[] = { (char)-1, 0 };
84   // this conversion could fail
85   std::wstring wstr = kwsys::Encoding::ToWide(cstr);
86 
87   wstr = kwsys::Encoding::ToWide(nullptr);
88   if (!wstr.empty()) {
89     const wchar_t* wcstr = wstr.c_str();
90     std::cout << "ToWide(NULL) returned";
91     for (size_t i = 0; i < wstr.size(); i++) {
92       std::cout << " " << std::hex << (int)wcstr[i];
93     }
94     std::cout << std::endl;
95     ret++;
96   }
97   wstr = kwsys::Encoding::ToWide("");
98   if (!wstr.empty()) {
99     const wchar_t* wcstr = wstr.c_str();
100     std::cout << "ToWide(\"\") returned";
101     for (size_t i = 0; i < wstr.size(); i++) {
102       std::cout << " " << std::hex << (int)wcstr[i];
103     }
104     std::cout << std::endl;
105     ret++;
106   }
107 
108 #ifdef _WIN32
109   // 16 bit wchar_t - we make an invalid surrogate pair
110   wchar_t cwstr[] = { 0xD801, 0xDA00, 0 };
111   // this conversion could fail
112   std::string win_str = kwsys::Encoding::ToNarrow(cwstr);
113 #endif
114 
115   std::string str = kwsys::Encoding::ToNarrow(nullptr);
116   if (!str.empty()) {
117     std::cout << "ToNarrow(NULL) returned " << str << std::endl;
118     ret++;
119   }
120 
121   str = kwsys::Encoding::ToNarrow(L"");
122   if (!wstr.empty()) {
123     std::cout << "ToNarrow(\"\") returned " << str << std::endl;
124     ret++;
125   }
126 
127   std::cout.flags(flags);
128   return ret;
129 }
130 
testWithNulls()131 static int testWithNulls()
132 {
133   int ret = 0;
134   std::vector<std::string> strings;
135   strings.push_back(std::string("ab") + '\0' + 'c');
136   strings.push_back(std::string("d") + '\0' + '\0' + 'e');
137   strings.push_back(std::string() + '\0' + 'f');
138   strings.push_back(std::string() + '\0' + '\0' + "gh");
139   strings.push_back(std::string("ij") + '\0');
140   strings.push_back(std::string("k") + '\0' + '\0');
141   strings.push_back(std::string("\0\0\0\0", 4) + "lmn" +
142                     std::string("\0\0\0\0", 4));
143   for (auto& string : strings) {
144     std::wstring wstr = kwsys::Encoding::ToWide(string);
145     std::string str = kwsys::Encoding::ToNarrow(wstr);
146     std::string s(string);
147     std::replace(s.begin(), s.end(), '\0', ' ');
148     std::cout << "'" << s << "' (" << string.size() << ")" << std::endl;
149     if (str != string) {
150       std::replace(str.begin(), str.end(), '\0', ' ');
151       std::cout << "string with null was different: '" << str << "' ("
152                 << str.size() << ")" << std::endl;
153       ret++;
154     }
155   }
156   return ret;
157 }
158 
testCommandLineArguments()159 static int testCommandLineArguments()
160 {
161   int status = 0;
162 
163   char const* argv[2] = { "./app.exe", (char const*)helloWorldStrings[1] };
164 
165   kwsys::Encoding::CommandLineArguments args(2, argv);
166   kwsys::Encoding::CommandLineArguments arg2 =
167     kwsys::Encoding::CommandLineArguments(args);
168 
169   char const* const* u8_argv = args.argv();
170   for (int i = 0; i < args.argc(); i++) {
171     char const* u8_arg = u8_argv[i];
172     if (strcmp(argv[i], u8_arg) != 0) {
173       std::cout << "argv[" << i << "] " << argv[i] << " != " << u8_arg
174                 << std::endl;
175       status++;
176     }
177   }
178 
179   kwsys::Encoding::CommandLineArguments args3 =
180     kwsys::Encoding::CommandLineArguments::Main(2, argv);
181 
182   return status;
183 }
184 
testToWindowsExtendedPath()185 static int testToWindowsExtendedPath()
186 {
187 #ifdef _WIN32
188   int ret = 0;
189   if (kwsys::Encoding::ToWindowsExtendedPath(
190         "L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") !=
191       L"\\\\?\\L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
192     std::cout << "Problem with ToWindowsExtendedPath "
193               << "\"L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo\""
194               << std::endl;
195     ++ret;
196   }
197 
198   if (kwsys::Encoding::ToWindowsExtendedPath(
199         "L:/Local Mojo/Hex Power Pack/Iffy Voodoo") !=
200       L"\\\\?\\L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
201     std::cout << "Problem with ToWindowsExtendedPath "
202               << "\"L:/Local Mojo/Hex Power Pack/Iffy Voodoo\"" << std::endl;
203     ++ret;
204   }
205 
206   if (kwsys::Encoding::ToWindowsExtendedPath(
207         "\\\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") !=
208       L"\\\\?\\UNC\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
209     std::cout << "Problem with ToWindowsExtendedPath "
210               << "\"\\\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo\""
211               << std::endl;
212     ++ret;
213   }
214 
215   if (kwsys::Encoding::ToWindowsExtendedPath(
216         "//Foo/Local Mojo/Hex Power Pack/Iffy Voodoo") !=
217       L"\\\\?\\UNC\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
218     std::cout << "Problem with ToWindowsExtendedPath "
219               << "\"//Foo/Local Mojo/Hex Power Pack/Iffy Voodoo\""
220               << std::endl;
221     ++ret;
222   }
223 
224   if (kwsys::Encoding::ToWindowsExtendedPath("//") != L"//") {
225     std::cout << "Problem with ToWindowsExtendedPath "
226               << "\"//\"" << std::endl;
227     ++ret;
228   }
229 
230   if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\") != L"\\\\.\\") {
231     std::cout << "Problem with ToWindowsExtendedPath "
232               << "\"\\\\.\\\"" << std::endl;
233     ++ret;
234   }
235 
236   if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\X") != L"\\\\.\\X") {
237     std::cout << "Problem with ToWindowsExtendedPath "
238               << "\"\\\\.\\X\"" << std::endl;
239     ++ret;
240   }
241 
242   if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\X:") != L"\\\\?\\X:") {
243     std::cout << "Problem with ToWindowsExtendedPath "
244               << "\"\\\\.\\X:\"" << std::endl;
245     ++ret;
246   }
247 
248   if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\X:\\") !=
249       L"\\\\?\\X:\\") {
250     std::cout << "Problem with ToWindowsExtendedPath "
251               << "\"\\\\.\\X:\\\"" << std::endl;
252     ++ret;
253   }
254 
255   if (kwsys::Encoding::ToWindowsExtendedPath("NUL") != L"\\\\.\\NUL") {
256     std::cout << "Problem with ToWindowsExtendedPath "
257               << "\"NUL\"" << std::endl;
258     ++ret;
259   }
260 
261   return ret;
262 #else
263   return 0;
264 #endif
265 }
266 
testEncoding(int,char * [])267 int testEncoding(int, char* [])
268 {
269   const char* loc = setlocale(LC_ALL, "");
270   if (loc) {
271     std::cout << "Locale: " << loc << std::endl;
272   } else {
273     std::cout << "Locale: None" << std::endl;
274   }
275 
276   int ret = 0;
277 
278   ret |= testHelloWorldEncoding();
279   ret |= testRobustEncoding();
280   ret |= testCommandLineArguments();
281   ret |= testWithNulls();
282   ret |= testToWindowsExtendedPath();
283 
284   return ret;
285 }
286