1 /* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
2    file Copyright.txt or https://cmake.org/licensing#kwsys for details.  */
3 #include "kwsysPrivate.h"
4 
5 #if defined(_MSC_VER)
6 #  pragma warning(disable : 4786)
7 #endif
8 
9 #include KWSYS_HEADER(Encoding.hxx)
10 #include KWSYS_HEADER(Encoding.h)
11 
12 #include <algorithm>
13 #include <iostream>
14 #include <locale.h>
15 #include <stdlib.h>
16 #include <string.h>
17 
18 // Work-around CMake dependency scanning limitation.  This must
19 // duplicate the above list of headers.
20 #if 0
21 #  include "Encoding.h.in"
22 #  include "Encoding.hxx.in"
23 #endif
24 
25 static const unsigned char helloWorldStrings[][32] = {
26   // English
27   { 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', 0 },
28   // Japanese
29   { 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81,
30     0xA1, 0xE3, 0x81, 0xAF, 0xE4, 0xB8, 0x96, 0xE7, 0x95, 0x8C, 0 },
31   // Arabic
32   { 0xD9, 0x85, 0xD8, 0xB1, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xA7, 0x20, 0xD8,
33     0xA7, 0xD9, 0x84, 0xD8, 0xB9, 0xD8, 0xA7, 0xD9, 0x84, 0xD9, 0x85, 0 },
34   // Yiddish
35   { 0xD7, 0x94, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x90, 0x20, 0xD7,
36     0x95, 0xD7, 0x95, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x98, 0 },
37   // Russian
38   { 0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5,
39     0xD1, 0x82, 0x20, 0xD0, 0xBC, 0xD0, 0xB8, 0xD1, 0x80, 0 },
40   // Latin
41   { 0x4D, 0x75, 0x6E, 0x64, 0x75, 0x73, 0x20, 0x73, 0x61, 0x6C, 0x76, 0x65,
42     0 },
43   // Swahili
44   { 0x68, 0x75, 0x6A, 0x61, 0x6D, 0x62, 0x6F, 0x20, 0x44, 0x75, 0x6E, 0x69,
45     0x61, 0 },
46   // Icelandic
47   { 0x48, 0x61, 0x6C, 0x6C, 0xC3, 0xB3, 0x20, 0x68, 0x65, 0x69, 0x6D, 0x75,
48     0x72, 0 },
49   { 0 }
50 };
51 
testHelloWorldEncoding()52 static int testHelloWorldEncoding()
53 {
54   int ret = 0;
55   for (int i = 0; helloWorldStrings[i][0] != 0; i++) {
56     std::string str = reinterpret_cast<const char*>(helloWorldStrings[i]);
57     std::cout << str << std::endl;
58     std::wstring wstr = kwsys::Encoding::ToWide(str);
59     std::string str2 = kwsys::Encoding::ToNarrow(wstr);
60     wchar_t* c_wstr = kwsysEncoding_DupToWide(str.c_str());
61     char* c_str2 = kwsysEncoding_DupToNarrow(c_wstr);
62     if (!wstr.empty() && (str != str2 || strcmp(c_str2, str.c_str()))) {
63       std::cout << "converted string was different: " << str2 << std::endl;
64       std::cout << "converted string was different: " << c_str2 << std::endl;
65       ret++;
66     }
67     free(c_wstr);
68     free(c_str2);
69   }
70   return ret;
71 }
72 
testRobustEncoding()73 static int testRobustEncoding()
74 {
75   // test that the conversion functions handle invalid
76   // unicode correctly/gracefully
77 
78   // we manipulate the format flags of stdout, remember
79   // the original state here to restore before return
80   std::ios::fmtflags const& flags = std::cout.flags();
81 
82   int ret = 0;
83   char cstr[] = { (char)-1, 0 };
84   // this conversion could fail
85   std::wstring wstr = kwsys::Encoding::ToWide(cstr);
86 
87   wstr = kwsys::Encoding::ToWide(KWSYS_NULLPTR);
88   if (wstr != L"") {
89     const wchar_t* wcstr = wstr.c_str();
90     std::cout << "ToWide(NULL) returned";
91     for (size_t i = 0; i < wstr.size(); i++) {
92       std::cout << " " << std::hex << (int)wcstr[i];
93     }
94     std::cout << std::endl;
95     ret++;
96   }
97   wstr = kwsys::Encoding::ToWide("");
98   if (wstr != L"") {
99     const wchar_t* wcstr = wstr.c_str();
100     std::cout << "ToWide(\"\") returned";
101     for (size_t i = 0; i < wstr.size(); i++) {
102       std::cout << " " << std::hex << (int)wcstr[i];
103     }
104     std::cout << std::endl;
105     ret++;
106   }
107 
108 #ifdef _WIN32
109   // 16 bit wchar_t - we make an invalid surrogate pair
110   wchar_t cwstr[] = { 0xD801, 0xDA00, 0 };
111   // this conversion could fail
112   std::string win_str = kwsys::Encoding::ToNarrow(cwstr);
113 #endif
114 
115   std::string str = kwsys::Encoding::ToNarrow(KWSYS_NULLPTR);
116   if (str != "") {
117     std::cout << "ToNarrow(NULL) returned " << str << std::endl;
118     ret++;
119   }
120 
121   str = kwsys::Encoding::ToNarrow(L"");
122   if (wstr != L"") {
123     std::cout << "ToNarrow(\"\") returned " << str << std::endl;
124     ret++;
125   }
126 
127   std::cout.flags(flags);
128   return ret;
129 }
130 
testWithNulls()131 static int testWithNulls()
132 {
133   int ret = 0;
134   std::vector<std::string> strings;
135   strings.push_back(std::string("ab") + '\0' + 'c');
136   strings.push_back(std::string("d") + '\0' + '\0' + 'e');
137   strings.push_back(std::string() + '\0' + 'f');
138   strings.push_back(std::string() + '\0' + '\0' + "gh");
139   strings.push_back(std::string("ij") + '\0');
140   strings.push_back(std::string("k") + '\0' + '\0');
141   strings.push_back(std::string("\0\0\0\0", 4) + "lmn" +
142                     std::string("\0\0\0\0", 4));
143   for (std::vector<std::string>::iterator it = strings.begin();
144        it != strings.end(); ++it) {
145     std::wstring wstr = kwsys::Encoding::ToWide(*it);
146     std::string str = kwsys::Encoding::ToNarrow(wstr);
147     std::string s(*it);
148     std::replace(s.begin(), s.end(), '\0', ' ');
149     std::cout << "'" << s << "' (" << it->size() << ")" << std::endl;
150     if (str != *it) {
151       std::replace(str.begin(), str.end(), '\0', ' ');
152       std::cout << "string with null was different: '" << str << "' ("
153                 << str.size() << ")" << std::endl;
154       ret++;
155     }
156   }
157   return ret;
158 }
159 
testCommandLineArguments()160 static int testCommandLineArguments()
161 {
162   int status = 0;
163 
164   char const* argv[2] = { "./app.exe", (char const*)helloWorldStrings[1] };
165 
166   kwsys::Encoding::CommandLineArguments args(2, argv);
167   kwsys::Encoding::CommandLineArguments arg2 =
168     kwsys::Encoding::CommandLineArguments(args);
169 
170   char const* const* u8_argv = args.argv();
171   for (int i = 0; i < args.argc(); i++) {
172     char const* u8_arg = u8_argv[i];
173     if (strcmp(argv[i], u8_arg) != 0) {
174       std::cout << "argv[" << i << "] " << argv[i] << " != " << u8_arg
175                 << std::endl;
176       status++;
177     }
178   }
179 
180   kwsys::Encoding::CommandLineArguments args3 =
181     kwsys::Encoding::CommandLineArguments::Main(2, argv);
182 
183   return status;
184 }
185 
testToWindowsExtendedPath()186 static int testToWindowsExtendedPath()
187 {
188 #ifdef _WIN32
189   int ret = 0;
190   if (kwsys::Encoding::ToWindowsExtendedPath(
191         "L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") !=
192       L"\\\\?\\L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
193     std::cout << "Problem with ToWindowsExtendedPath "
194               << "\"L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo\""
195               << std::endl;
196     ++ret;
197   }
198 
199   if (kwsys::Encoding::ToWindowsExtendedPath(
200         "L:/Local Mojo/Hex Power Pack/Iffy Voodoo") !=
201       L"\\\\?\\L:\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
202     std::cout << "Problem with ToWindowsExtendedPath "
203               << "\"L:/Local Mojo/Hex Power Pack/Iffy Voodoo\"" << std::endl;
204     ++ret;
205   }
206 
207   if (kwsys::Encoding::ToWindowsExtendedPath(
208         "\\\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") !=
209       L"\\\\?\\UNC\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
210     std::cout << "Problem with ToWindowsExtendedPath "
211               << "\"\\\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo\""
212               << std::endl;
213     ++ret;
214   }
215 
216   if (kwsys::Encoding::ToWindowsExtendedPath(
217         "//Foo/Local Mojo/Hex Power Pack/Iffy Voodoo") !=
218       L"\\\\?\\UNC\\Foo\\Local Mojo\\Hex Power Pack\\Iffy Voodoo") {
219     std::cout << "Problem with ToWindowsExtendedPath "
220               << "\"//Foo/Local Mojo/Hex Power Pack/Iffy Voodoo\""
221               << std::endl;
222     ++ret;
223   }
224 
225   if (kwsys::Encoding::ToWindowsExtendedPath("//") != L"//") {
226     std::cout << "Problem with ToWindowsExtendedPath "
227               << "\"//\"" << std::endl;
228     ++ret;
229   }
230 
231   if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\") != L"\\\\.\\") {
232     std::cout << "Problem with ToWindowsExtendedPath "
233               << "\"\\\\.\\\"" << std::endl;
234     ++ret;
235   }
236 
237   if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\X") != L"\\\\.\\X") {
238     std::cout << "Problem with ToWindowsExtendedPath "
239               << "\"\\\\.\\X\"" << std::endl;
240     ++ret;
241   }
242 
243   if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\X:") != L"\\\\?\\X:") {
244     std::cout << "Problem with ToWindowsExtendedPath "
245               << "\"\\\\.\\X:\"" << std::endl;
246     ++ret;
247   }
248 
249   if (kwsys::Encoding::ToWindowsExtendedPath("\\\\.\\X:\\") !=
250       L"\\\\?\\X:\\") {
251     std::cout << "Problem with ToWindowsExtendedPath "
252               << "\"\\\\.\\X:\\\"" << std::endl;
253     ++ret;
254   }
255 
256   if (kwsys::Encoding::ToWindowsExtendedPath("NUL") != L"\\\\.\\NUL") {
257     std::cout << "Problem with ToWindowsExtendedPath "
258               << "\"NUL\"" << std::endl;
259     ++ret;
260   }
261 
262   return ret;
263 #else
264   return 0;
265 #endif
266 }
267 
testEncoding(int,char * [])268 int testEncoding(int, char* [])
269 {
270   const char* loc = setlocale(LC_ALL, "");
271   if (loc) {
272     std::cout << "Locale: " << loc << std::endl;
273   } else {
274     std::cout << "Locale: None" << std::endl;
275   }
276 
277   int ret = 0;
278 
279   ret |= testHelloWorldEncoding();
280   ret |= testRobustEncoding();
281   ret |= testCommandLineArguments();
282   ret |= testWithNulls();
283   ret |= testToWindowsExtendedPath();
284 
285   return ret;
286 }
287