1 /*-
2  * Copyright (c) 2011 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 __FBSDID("$FreeBSD$");
27 
28 #include <locale.h>
29 
30 DEFINE_TEST(test_ustar_filename_encoding_UTF8_CP866)
31 {
32   	struct archive *a;
33   	struct archive_entry *entry;
34 	char buff[4096];
35 	size_t used;
36 
37 	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
38 		skipping("en_US.UTF-8 locale not available on this system.");
39 		return;
40 	}
41 
42 	/*
43 	 * Verify that UTF-8 filenames are correctly translated into CP866
44 	 * and stored with hdrcharset=CP866 option.
45 	 */
46 	a = archive_write_new();
47 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
48 	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
49 		skipping("This system cannot convert character-set"
50 		    " from UTF-8 to CP866.");
51 		archive_write_free(a);
52 		return;
53 	}
54 	assertEqualInt(ARCHIVE_OK,
55 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
56 
57 	entry = archive_entry_new2(a);
58 	/* Set a UTF-8 filename. */
59 	archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
60 	archive_entry_set_filetype(entry, AE_IFREG);
61 	archive_entry_set_size(entry, 0);
62 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
63 	archive_entry_free(entry);
64 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
65 
66 	/* Above three characters in UTF-8 should translate to the following
67 	 * three characters in CP866. */
68 	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
69 }
70 
71 DEFINE_TEST(test_ustar_filename_encoding_KOI8R_UTF8)
72 {
73   	struct archive *a;
74   	struct archive_entry *entry;
75 	char buff[4096];
76 	size_t used;
77 
78 	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
79 		skipping("KOI8-R locale not available on this system.");
80 		return;
81 	}
82 
83 	/*
84 	 * Verify that KOI8-R filenames are correctly translated into UTF-8
85 	 * and stored with hdrcharset=UTF-8 option.
86 	 */
87 	a = archive_write_new();
88 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
89 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
90 		skipping("This system cannot convert character-set"
91 		    " from KOI8-R to UTF-8.");
92 		archive_write_free(a);
93 		return;
94 	}
95 	assertEqualInt(ARCHIVE_OK,
96 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
97 
98 	entry = archive_entry_new2(a);
99 	/* Set a KOI8-R filename. */
100 	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
101 	archive_entry_set_filetype(entry, AE_IFREG);
102 	archive_entry_set_size(entry, 0);
103 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
104 	archive_entry_free(entry);
105 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
106 
107 	/* Above three characters in KOI8-R should translate to the following
108 	 * three characters (two bytes each) in UTF-8. */
109 	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
110 }
111 
112 DEFINE_TEST(test_ustar_filename_encoding_KOI8R_CP866)
113 {
114   	struct archive *a;
115   	struct archive_entry *entry;
116 	char buff[4096];
117 	size_t used;
118 
119 	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
120 		skipping("KOI8-R locale not available on this system.");
121 		return;
122 	}
123 
124 	/*
125 	 * Verify that KOI8-R filenames are correctly translated into CP866
126 	 * and stored with hdrcharset=CP866 option.
127 	 */
128 	a = archive_write_new();
129 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
130 	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
131 		skipping("This system cannot convert character-set"
132 		    " from KOI8-R to CP866.");
133 		archive_write_free(a);
134 		return;
135 	}
136 	assertEqualInt(ARCHIVE_OK,
137 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
138 
139 	entry = archive_entry_new2(a);
140 	/* Set a KOI8-R filename. */
141 	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
142 	archive_entry_set_filetype(entry, AE_IFREG);
143 	archive_entry_set_size(entry, 0);
144 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
145 	archive_entry_free(entry);
146 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
147 
148 	/* Above three characters in KOI8-R should translate to the following
149 	 * three characters in CP866. */
150 	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
151 }
152 
153 DEFINE_TEST(test_ustar_filename_encoding_CP1251_UTF8)
154 {
155   	struct archive *a;
156   	struct archive_entry *entry;
157 	char buff[4096];
158 	size_t used;
159 
160 	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
161 	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
162 		skipping("KOI8-R locale not available on this system.");
163 		return;
164 	}
165 
166 	/*
167 	 * Verify that CP1251 filenames are correctly translated into UTF-8
168 	 * and stored with hdrcharset=UTF-8 option.
169 	 */
170 	a = archive_write_new();
171 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
172 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
173 		skipping("This system cannot convert character-set"
174 		    " from KOI8-R to UTF-8.");
175 		archive_write_free(a);
176 		return;
177 	}
178 	assertEqualInt(ARCHIVE_OK,
179 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
180 
181 	entry = archive_entry_new2(a);
182 	/* Set a KOI8-R filename. */
183 	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
184 	archive_entry_set_filetype(entry, AE_IFREG);
185 	archive_entry_set_size(entry, 0);
186 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
187 	archive_entry_free(entry);
188 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
189 
190 	/* Above three characters in CP1251 should translate to the following
191 	 * three characters (two bytes each) in UTF-8. */
192 	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
193 }
194 
195 /*
196  * Do not translate CP1251 into CP866 if non Windows platform.
197  */
198 DEFINE_TEST(test_ustar_filename_encoding_ru_RU_CP1251)
199 {
200   	struct archive *a;
201   	struct archive_entry *entry;
202 	char buff[4096];
203 	size_t used;
204 
205 	if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
206 		skipping("KOI8-R locale not available on this system.");
207 		return;
208 	}
209 
210 	/*
211 	 * Verify that CP1251 filenames are not translated into any
212 	 * other character-set, in particular, CP866.
213 	 */
214 	a = archive_write_new();
215 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
216 	assertEqualInt(ARCHIVE_OK,
217 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
218 
219 	entry = archive_entry_new2(a);
220 	/* Set a KOI8-R filename. */
221 	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
222 	archive_entry_set_filetype(entry, AE_IFREG);
223 	archive_entry_set_size(entry, 0);
224 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
225 	archive_entry_free(entry);
226 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
227 
228 	/* Above three characters in CP1251 should not translate to
229 	 * any other character-set. */
230 	assertEqualMem(buff, "\xEF\xF0\xE8", 3);
231 }
232 
233 /*
234  * Other archiver applications on Windows translate CP1251 filenames
235  * into CP866 filenames and store it in the ustar file.
236  * Test above behavior works well.
237  */
238 DEFINE_TEST(test_ustar_filename_encoding_Russian_Russia)
239 {
240   	struct archive *a;
241   	struct archive_entry *entry;
242 	char buff[4096];
243 	size_t used;
244 
245 	if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
246 		skipping("Russian_Russia locale not available on this system.");
247 		return;
248 	}
249 
250 	/*
251 	 * Verify that Russian_Russia(CP1251) filenames are correctly translated
252 	 * to CP866.
253 	 */
254 	a = archive_write_new();
255 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
256 	assertEqualInt(ARCHIVE_OK,
257 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
258 
259 	entry = archive_entry_new2(a);
260 	/* Set a CP1251 filename. */
261 	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
262 	archive_entry_set_filetype(entry, AE_IFREG);
263 	archive_entry_set_size(entry, 0);
264 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
265 	archive_entry_free(entry);
266 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
267 
268 	/* Above three characters in CP1251 should translate to the following
269 	 * three characters in CP866. */
270 	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
271 }
272 
273 DEFINE_TEST(test_ustar_filename_encoding_EUCJP_UTF8)
274 {
275   	struct archive *a;
276   	struct archive_entry *entry;
277 	char buff[4096];
278 	size_t used;
279 
280 	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
281 		skipping("eucJP locale not available on this system.");
282 		return;
283 	}
284 
285 	/*
286 	 * Verify that EUC-JP filenames are correctly translated to UTF-8.
287 	 */
288 	a = archive_write_new();
289 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
290 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
291 		skipping("This system cannot convert character-set"
292 		    " from eucJP to UTF-8.");
293 		archive_write_free(a);
294 		return;
295 	}
296 	assertEqualInt(ARCHIVE_OK,
297 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
298 
299 	entry = archive_entry_new2(a);
300 	/* Set an EUC-JP filename. */
301 	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
302 	/* Check the Unicode version. */
303 	archive_entry_set_filetype(entry, AE_IFREG);
304 	archive_entry_set_size(entry, 0);
305 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
306 	archive_entry_free(entry);
307 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
308 
309 	/* Check UTF-8 version. */
310 	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
311 }
312 
313 DEFINE_TEST(test_ustar_filename_encoding_EUCJP_CP932)
314 {
315   	struct archive *a;
316   	struct archive_entry *entry;
317 	char buff[4096];
318 	size_t used;
319 
320 	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
321 		skipping("eucJP locale not available on this system.");
322 		return;
323 	}
324 
325 	/*
326 	 * Verify that EUC-JP filenames are correctly translated to CP932.
327 	 */
328 	a = archive_write_new();
329 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
330 	if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
331 		skipping("This system cannot convert character-set"
332 		    " from eucJP to CP932.");
333 		archive_write_free(a);
334 		return;
335 	}
336 	assertEqualInt(ARCHIVE_OK,
337 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
338 
339 	entry = archive_entry_new2(a);
340 	/* Set an EUC-JP filename. */
341 	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
342 	/* Check the Unicode version. */
343 	archive_entry_set_filetype(entry, AE_IFREG);
344 	archive_entry_set_size(entry, 0);
345 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
346 	archive_entry_free(entry);
347 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
348 
349 	/* Check CP932 version. */
350 	assertEqualMem(buff, "\x95\x5C.txt", 6);
351 }
352 
353 DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)
354 {
355   	struct archive *a;
356   	struct archive_entry *entry;
357 	char buff[4096];
358 	size_t used;
359 
360 	if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
361 	    NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
362 		skipping("CP932/SJIS locale not available on this system.");
363 		return;
364 	}
365 
366 	/*
367 	 * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
368 	 */
369 	a = archive_write_new();
370 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
371 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
372 		skipping("This system cannot convert character-set"
373 		    " from CP932/SJIS to UTF-8.");
374 		archive_write_free(a);
375 		return;
376 	}
377 	assertEqualInt(ARCHIVE_OK,
378 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
379 
380 	entry = archive_entry_new2(a);
381 	/* Set a CP932/SJIS filename. */
382 	archive_entry_set_pathname(entry, "\x95\x5C.txt");
383 	/* Check the Unicode version. */
384 	archive_entry_set_filetype(entry, AE_IFREG);
385 	archive_entry_set_size(entry, 0);
386 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
387 	archive_entry_free(entry);
388 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
389 
390 	/* Check UTF-8 version. */
391 	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
392 }
393 
394