1 /*-
2  * Copyright (c) 2011 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 
27 #include <locale.h>
28 
DEFINE_TEST(test_ustar_filename_encoding_UTF8_CP866)29 DEFINE_TEST(test_ustar_filename_encoding_UTF8_CP866)
30 {
31   	struct archive *a;
32   	struct archive_entry *entry;
33 	char buff[4096];
34 	size_t used;
35 
36 	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
37 		skipping("en_US.UTF-8 locale not available on this system.");
38 		return;
39 	}
40 
41 	/*
42 	 * Verify that UTF-8 filenames are correctly translated into CP866
43 	 * and stored with hdrcharset=CP866 option.
44 	 */
45 	a = archive_write_new();
46 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
47 	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
48 		skipping("This system cannot convert character-set"
49 		    " from UTF-8 to CP866.");
50 		archive_write_free(a);
51 		return;
52 	}
53 	assertEqualInt(ARCHIVE_OK,
54 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
55 
56 	entry = archive_entry_new2(a);
57 	/* Set a UTF-8 filename. */
58 	archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
59 	archive_entry_set_filetype(entry, AE_IFREG);
60 	archive_entry_set_size(entry, 0);
61 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
62 	archive_entry_free(entry);
63 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
64 
65 	/* Above three characters in UTF-8 should translate to the following
66 	 * three characters in CP866. */
67 	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
68 }
69 
DEFINE_TEST(test_ustar_filename_encoding_KOI8R_UTF8)70 DEFINE_TEST(test_ustar_filename_encoding_KOI8R_UTF8)
71 {
72   	struct archive *a;
73   	struct archive_entry *entry;
74 	char buff[4096];
75 	size_t used;
76 
77 	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
78 		skipping("KOI8-R locale not available on this system.");
79 		return;
80 	}
81 
82 	/*
83 	 * Verify that KOI8-R filenames are correctly translated into UTF-8
84 	 * and stored with hdrcharset=UTF-8 option.
85 	 */
86 	a = archive_write_new();
87 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
88 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
89 		skipping("This system cannot convert character-set"
90 		    " from KOI8-R to UTF-8.");
91 		archive_write_free(a);
92 		return;
93 	}
94 	assertEqualInt(ARCHIVE_OK,
95 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
96 
97 	entry = archive_entry_new2(a);
98 	/* Set a KOI8-R filename. */
99 	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
100 	archive_entry_set_filetype(entry, AE_IFREG);
101 	archive_entry_set_size(entry, 0);
102 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
103 	archive_entry_free(entry);
104 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
105 
106 	/* Above three characters in KOI8-R should translate to the following
107 	 * three characters (two bytes each) in UTF-8. */
108 	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
109 }
110 
DEFINE_TEST(test_ustar_filename_encoding_KOI8R_CP866)111 DEFINE_TEST(test_ustar_filename_encoding_KOI8R_CP866)
112 {
113   	struct archive *a;
114   	struct archive_entry *entry;
115 	char buff[4096];
116 	size_t used;
117 
118 	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
119 		skipping("KOI8-R locale not available on this system.");
120 		return;
121 	}
122 
123 	/*
124 	 * Verify that KOI8-R filenames are correctly translated into CP866
125 	 * and stored with hdrcharset=CP866 option.
126 	 */
127 	a = archive_write_new();
128 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
129 	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
130 		skipping("This system cannot convert character-set"
131 		    " from KOI8-R to CP866.");
132 		archive_write_free(a);
133 		return;
134 	}
135 	assertEqualInt(ARCHIVE_OK,
136 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
137 
138 	entry = archive_entry_new2(a);
139 	/* Set a KOI8-R filename. */
140 	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
141 	archive_entry_set_filetype(entry, AE_IFREG);
142 	archive_entry_set_size(entry, 0);
143 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
144 	archive_entry_free(entry);
145 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
146 
147 	/* Above three characters in KOI8-R should translate to the following
148 	 * three characters in CP866. */
149 	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
150 }
151 
DEFINE_TEST(test_ustar_filename_encoding_CP1251_UTF8)152 DEFINE_TEST(test_ustar_filename_encoding_CP1251_UTF8)
153 {
154   	struct archive *a;
155   	struct archive_entry *entry;
156 	char buff[4096];
157 	size_t used;
158 
159 	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
160 	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
161 		skipping("KOI8-R locale not available on this system.");
162 		return;
163 	}
164 
165 	/*
166 	 * Verify that CP1251 filenames are correctly translated into UTF-8
167 	 * and stored with hdrcharset=UTF-8 option.
168 	 */
169 	a = archive_write_new();
170 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
171 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
172 		skipping("This system cannot convert character-set"
173 		    " from KOI8-R to UTF-8.");
174 		archive_write_free(a);
175 		return;
176 	}
177 	assertEqualInt(ARCHIVE_OK,
178 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
179 
180 	entry = archive_entry_new2(a);
181 	/* Set a KOI8-R filename. */
182 	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
183 	archive_entry_set_filetype(entry, AE_IFREG);
184 	archive_entry_set_size(entry, 0);
185 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
186 	archive_entry_free(entry);
187 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
188 
189 	/* Above three characters in CP1251 should translate to the following
190 	 * three characters (two bytes each) in UTF-8. */
191 	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
192 }
193 
194 /*
195  * Do not translate CP1251 into CP866 if non Windows platform.
196  */
DEFINE_TEST(test_ustar_filename_encoding_ru_RU_CP1251)197 DEFINE_TEST(test_ustar_filename_encoding_ru_RU_CP1251)
198 {
199   	struct archive *a;
200   	struct archive_entry *entry;
201 	char buff[4096];
202 	size_t used;
203 
204 	if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
205 		skipping("KOI8-R locale not available on this system.");
206 		return;
207 	}
208 
209 	/*
210 	 * Verify that CP1251 filenames are not translated into any
211 	 * other character-set, in particular, CP866.
212 	 */
213 	a = archive_write_new();
214 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
215 	assertEqualInt(ARCHIVE_OK,
216 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
217 
218 	entry = archive_entry_new2(a);
219 	/* Set a KOI8-R filename. */
220 	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
221 	archive_entry_set_filetype(entry, AE_IFREG);
222 	archive_entry_set_size(entry, 0);
223 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
224 	archive_entry_free(entry);
225 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
226 
227 	/* Above three characters in CP1251 should not translate to
228 	 * any other character-set. */
229 	assertEqualMem(buff, "\xEF\xF0\xE8", 3);
230 }
231 
232 /*
233  * Other archiver applications on Windows translate CP1251 filenames
234  * into CP866 filenames and store it in the ustar file.
235  * Test above behavior works well.
236  */
DEFINE_TEST(test_ustar_filename_encoding_Russian_Russia)237 DEFINE_TEST(test_ustar_filename_encoding_Russian_Russia)
238 {
239   	struct archive *a;
240   	struct archive_entry *entry;
241 	char buff[4096];
242 	size_t used;
243 
244 	if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
245 		skipping("Russian_Russia locale not available on this system.");
246 		return;
247 	}
248 
249 	/*
250 	 * Verify that Russian_Russia(CP1251) filenames are correctly translated
251 	 * to CP866.
252 	 */
253 	a = archive_write_new();
254 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
255 	assertEqualInt(ARCHIVE_OK,
256 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
257 
258 	entry = archive_entry_new2(a);
259 	/* Set a CP1251 filename. */
260 	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
261 	archive_entry_set_filetype(entry, AE_IFREG);
262 	archive_entry_set_size(entry, 0);
263 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
264 	archive_entry_free(entry);
265 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
266 
267 	/* Above three characters in CP1251 should translate to the following
268 	 * three characters in CP866. */
269 	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
270 }
271 
DEFINE_TEST(test_ustar_filename_encoding_EUCJP_UTF8)272 DEFINE_TEST(test_ustar_filename_encoding_EUCJP_UTF8)
273 {
274   	struct archive *a;
275   	struct archive_entry *entry;
276 	char buff[4096];
277 	size_t used;
278 
279 	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
280 		skipping("eucJP locale not available on this system.");
281 		return;
282 	}
283 
284 	/*
285 	 * Verify that EUC-JP filenames are correctly translated to UTF-8.
286 	 */
287 	a = archive_write_new();
288 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
289 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
290 		skipping("This system cannot convert character-set"
291 		    " from eucJP to UTF-8.");
292 		archive_write_free(a);
293 		return;
294 	}
295 	assertEqualInt(ARCHIVE_OK,
296 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
297 
298 	entry = archive_entry_new2(a);
299 	/* Set an EUC-JP filename. */
300 	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
301 	/* Check the Unicode version. */
302 	archive_entry_set_filetype(entry, AE_IFREG);
303 	archive_entry_set_size(entry, 0);
304 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
305 	archive_entry_free(entry);
306 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
307 
308 	/* Check UTF-8 version. */
309 	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
310 }
311 
DEFINE_TEST(test_ustar_filename_encoding_EUCJP_CP932)312 DEFINE_TEST(test_ustar_filename_encoding_EUCJP_CP932)
313 {
314   	struct archive *a;
315   	struct archive_entry *entry;
316 	char buff[4096];
317 	size_t used;
318 
319 	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
320 		skipping("eucJP locale not available on this system.");
321 		return;
322 	}
323 
324 	/*
325 	 * Verify that EUC-JP filenames are correctly translated to CP932.
326 	 */
327 	a = archive_write_new();
328 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
329 	if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
330 		skipping("This system cannot convert character-set"
331 		    " from eucJP to CP932.");
332 		archive_write_free(a);
333 		return;
334 	}
335 	assertEqualInt(ARCHIVE_OK,
336 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
337 
338 	entry = archive_entry_new2(a);
339 	/* Set an EUC-JP filename. */
340 	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
341 	/* Check the Unicode version. */
342 	archive_entry_set_filetype(entry, AE_IFREG);
343 	archive_entry_set_size(entry, 0);
344 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
345 	archive_entry_free(entry);
346 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
347 
348 	/* Check CP932 version. */
349 	assertEqualMem(buff, "\x95\x5C.txt", 6);
350 }
351 
DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)352 DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)
353 {
354   	struct archive *a;
355   	struct archive_entry *entry;
356 	char buff[4096];
357 	size_t used;
358 
359 	if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
360 	    NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
361 		skipping("CP932/SJIS locale not available on this system.");
362 		return;
363 	}
364 
365 	/*
366 	 * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
367 	 */
368 	a = archive_write_new();
369 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
370 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
371 		skipping("This system cannot convert character-set"
372 		    " from CP932/SJIS to UTF-8.");
373 		archive_write_free(a);
374 		return;
375 	}
376 	assertEqualInt(ARCHIVE_OK,
377 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
378 
379 	entry = archive_entry_new2(a);
380 	/* Set a CP932/SJIS filename. */
381 	archive_entry_set_pathname(entry, "\x95\x5C.txt");
382 	/* Check the Unicode version. */
383 	archive_entry_set_filetype(entry, AE_IFREG);
384 	archive_entry_set_size(entry, 0);
385 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
386 	archive_entry_free(entry);
387 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
388 
389 	/* Check UTF-8 version. */
390 	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
391 }
392 
393