1 /*-
2  * Copyright (c) 2011 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 #include <locale.h>
27 
DEFINE_TEST(test_gnutar_filename_encoding_UTF8_CP866)28 DEFINE_TEST(test_gnutar_filename_encoding_UTF8_CP866)
29 {
30   	struct archive *a;
31   	struct archive_entry *entry;
32 	char buff[4096];
33 	size_t used;
34 
35 	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
36 		skipping("en_US.UTF-8 locale not available on this system.");
37 		return;
38 	}
39 
40 	/*
41 	 * Verify that UTF-8 filenames are correctly translated into CP866
42 	 * and stored with hdrcharset=CP866 option.
43 	 */
44 	a = archive_write_new();
45 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
46 	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
47 		skipping("This system cannot convert character-set"
48 		    " from UTF-8 to CP866.");
49 		archive_write_free(a);
50 		return;
51 	}
52 	assertEqualInt(ARCHIVE_OK,
53 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
54 
55 	entry = archive_entry_new2(a);
56 	/* Set a UTF-8 filename. */
57 	archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
58 	archive_entry_set_filetype(entry, AE_IFREG);
59 	archive_entry_set_size(entry, 0);
60 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
61 	archive_entry_free(entry);
62 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
63 
64 	/* Above three characters in UTF-8 should translate to the following
65 	 * three characters in CP866. */
66 	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
67 }
68 
DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_UTF8)69 DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_UTF8)
70 {
71   	struct archive *a;
72   	struct archive_entry *entry;
73 	char buff[4096];
74 	size_t used;
75 
76 	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
77 		skipping("KOI8-R locale not available on this system.");
78 		return;
79 	}
80 
81 	/*
82 	 * Verify that KOI8-R filenames are correctly translated into UTF-8
83 	 * and stored with hdrcharset=UTF-8 option.
84 	 */
85 	a = archive_write_new();
86 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
87 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
88 		skipping("This system cannot convert character-set"
89 		    " from KOI8-R to UTF-8.");
90 		archive_write_free(a);
91 		return;
92 	}
93 	assertEqualInt(ARCHIVE_OK,
94 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
95 
96 	entry = archive_entry_new2(a);
97 	/* Set a KOI8-R filename. */
98 	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
99 	archive_entry_set_filetype(entry, AE_IFREG);
100 	archive_entry_set_size(entry, 0);
101 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
102 	archive_entry_free(entry);
103 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
104 
105 	/* Above three characters in KOI8-R should translate to the following
106 	 * three characters (two bytes each) in UTF-8. */
107 	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
108 }
109 
DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_CP866)110 DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_CP866)
111 {
112   	struct archive *a;
113   	struct archive_entry *entry;
114 	char buff[4096];
115 	size_t used;
116 
117 	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
118 		skipping("KOI8-R locale not available on this system.");
119 		return;
120 	}
121 
122 	/*
123 	 * Verify that KOI8-R filenames are correctly translated into CP866
124 	 * and stored with hdrcharset=CP866 option.
125 	 */
126 	a = archive_write_new();
127 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
128 	if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
129 		skipping("This system cannot convert character-set"
130 		    " from KOI8-R to CP866.");
131 		archive_write_free(a);
132 		return;
133 	}
134 	assertEqualInt(ARCHIVE_OK,
135 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
136 
137 	entry = archive_entry_new2(a);
138 	/* Set a KOI8-R filename. */
139 	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
140 	archive_entry_set_filetype(entry, AE_IFREG);
141 	archive_entry_set_size(entry, 0);
142 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
143 	archive_entry_free(entry);
144 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
145 
146 	/* Above three characters in KOI8-R should translate to the following
147 	 * three characters in CP866. */
148 	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
149 }
150 
DEFINE_TEST(test_gnutar_filename_encoding_CP1251_UTF8)151 DEFINE_TEST(test_gnutar_filename_encoding_CP1251_UTF8)
152 {
153   	struct archive *a;
154   	struct archive_entry *entry;
155 	char buff[4096];
156 	size_t used;
157 
158 	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
159 	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
160 		skipping("KOI8-R locale not available on this system.");
161 		return;
162 	}
163 
164 	/*
165 	 * Verify that CP1251 filenames are correctly translated into UTF-8
166 	 * and stored with hdrcharset=UTF-8 option.
167 	 */
168 	a = archive_write_new();
169 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
170 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
171 		skipping("This system cannot convert character-set"
172 		    " from KOI8-R to UTF-8.");
173 		archive_write_free(a);
174 		return;
175 	}
176 	assertEqualInt(ARCHIVE_OK,
177 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
178 
179 	entry = archive_entry_new2(a);
180 	/* Set a KOI8-R filename. */
181 	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
182 	archive_entry_set_filetype(entry, AE_IFREG);
183 	archive_entry_set_size(entry, 0);
184 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
185 	archive_entry_free(entry);
186 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
187 
188 	/* Above three characters in CP1251 should translate to the following
189 	 * three characters (two bytes each) in UTF-8. */
190 	assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
191 }
192 
193 /*
194  * Do not translate CP1251 into CP866 if non Windows platform.
195  */
DEFINE_TEST(test_gnutar_filename_encoding_ru_RU_CP1251)196 DEFINE_TEST(test_gnutar_filename_encoding_ru_RU_CP1251)
197 {
198   	struct archive *a;
199   	struct archive_entry *entry;
200 	char buff[4096];
201 	size_t used;
202 
203 	if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
204 		skipping("KOI8-R locale not available on this system.");
205 		return;
206 	}
207 
208 	/*
209 	 * Verify that CP1251 filenames are not translated into any
210 	 * other character-set, in particular, CP866.
211 	 */
212 	a = archive_write_new();
213 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
214 	assertEqualInt(ARCHIVE_OK,
215 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
216 
217 	entry = archive_entry_new2(a);
218 	/* Set a KOI8-R filename. */
219 	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
220 	archive_entry_set_filetype(entry, AE_IFREG);
221 	archive_entry_set_size(entry, 0);
222 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
223 	archive_entry_free(entry);
224 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
225 
226 	/* Above three characters in CP1251 should not translate to
227 	 * any other character-set. */
228 	assertEqualMem(buff, "\xEF\xF0\xE8", 3);
229 }
230 
231 /*
232  * Other archiver applications on Windows translate CP1251 filenames
233  * into CP866 filenames and store it in the gnutar file.
234  * Test above behavior works well.
235  */
DEFINE_TEST(test_gnutar_filename_encoding_Russian_Russia)236 DEFINE_TEST(test_gnutar_filename_encoding_Russian_Russia)
237 {
238   	struct archive *a;
239   	struct archive_entry *entry;
240 	char buff[4096];
241 	size_t used;
242 
243 	if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
244 		skipping("Russian_Russia locale not available on this system.");
245 		return;
246 	}
247 
248 	/*
249 	 * Verify that Russian_Russia(CP1251) filenames are correctly translated
250 	 * to CP866.
251 	 */
252 	a = archive_write_new();
253 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
254 	assertEqualInt(ARCHIVE_OK,
255 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
256 
257 	entry = archive_entry_new2(a);
258 	/* Set a CP1251 filename. */
259 	archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
260 	archive_entry_set_filetype(entry, AE_IFREG);
261 	archive_entry_set_size(entry, 0);
262 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
263 	archive_entry_free(entry);
264 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
265 
266 	/* Above three characters in CP1251 should translate to the following
267 	 * three characters in CP866. */
268 	assertEqualMem(buff, "\xAF\xE0\xA8", 3);
269 }
270 
DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_UTF8)271 DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_UTF8)
272 {
273   	struct archive *a;
274   	struct archive_entry *entry;
275 	char buff[4096];
276 	size_t used;
277 
278 	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
279 		skipping("eucJP locale not available on this system.");
280 		return;
281 	}
282 
283 	/*
284 	 * Verify that EUC-JP filenames are correctly translated to UTF-8.
285 	 */
286 	a = archive_write_new();
287 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
288 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
289 		skipping("This system cannot convert character-set"
290 		    " from eucJP to UTF-8.");
291 		archive_write_free(a);
292 		return;
293 	}
294 	assertEqualInt(ARCHIVE_OK,
295 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
296 
297 	entry = archive_entry_new2(a);
298 	/* Set an EUC-JP filename. */
299 	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
300 	/* Check the Unicode version. */
301 	archive_entry_set_filetype(entry, AE_IFREG);
302 	archive_entry_set_size(entry, 0);
303 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
304 	archive_entry_free(entry);
305 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
306 
307 	/* Check UTF-8 version. */
308 	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
309 }
310 
DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_CP932)311 DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_CP932)
312 {
313   	struct archive *a;
314   	struct archive_entry *entry;
315 	char buff[4096];
316 	size_t used;
317 
318 	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
319 		skipping("eucJP locale not available on this system.");
320 		return;
321 	}
322 
323 	/*
324 	 * Verify that EUC-JP filenames are correctly translated to CP932.
325 	 */
326 	a = archive_write_new();
327 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
328 	if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
329 		skipping("This system cannot convert character-set"
330 		    " from eucJP to CP932.");
331 		archive_write_free(a);
332 		return;
333 	}
334 	assertEqualInt(ARCHIVE_OK,
335 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
336 
337 	entry = archive_entry_new2(a);
338 	/* Set an EUC-JP filename. */
339 	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
340 	/* Check the Unicode version. */
341 	archive_entry_set_filetype(entry, AE_IFREG);
342 	archive_entry_set_size(entry, 0);
343 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
344 	archive_entry_free(entry);
345 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
346 
347 	/* Check CP932 version. */
348 	assertEqualMem(buff, "\x95\x5C.txt", 6);
349 }
350 
DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8)351 DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8)
352 {
353   	struct archive *a;
354   	struct archive_entry *entry;
355 	char buff[4096];
356 	size_t used;
357 
358 	if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
359 	    NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
360 		skipping("CP932/SJIS locale not available on this system.");
361 		return;
362 	}
363 
364 	/*
365 	 * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
366 	 */
367 	a = archive_write_new();
368 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
369 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
370 		skipping("This system cannot convert character-set"
371 		    " from CP932/SJIS to UTF-8.");
372 		archive_write_free(a);
373 		return;
374 	}
375 	assertEqualInt(ARCHIVE_OK,
376 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
377 
378 	entry = archive_entry_new2(a);
379 	/* Set an CP932/SJIS filename. */
380 	archive_entry_set_pathname(entry, "\x95\x5C.txt");
381 	/* Check the Unicode version. */
382 	archive_entry_set_filetype(entry, AE_IFREG);
383 	archive_entry_set_size(entry, 0);
384 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
385 	archive_entry_free(entry);
386 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
387 
388 	/* Check UTF-8 version. */
389 	assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
390 }
391 
392