1 /*-
2  * Copyright (c) 2003-2007 Tim Kientzle
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 
27 #include <locale.h>
28 
29 /*
30  * Pax interchange is supposed to encode filenames into
31  * UTF-8.  Of course, that's not always possible.  This
32  * test is intended to verify that filenames always get
33  * stored and restored correctly, regardless of the encodings.
34  */
35 
36 /*
37  * Read a manually-created archive that has filenames that are
38  * stored in binary instead of UTF-8 and verify that we get
39  * the right filename returned and that we get a warning only
40  * if the header isn't marked as binary.
41  */
42 static void
test_pax_filename_encoding_1(void)43 test_pax_filename_encoding_1(void)
44 {
45 	static const char testname[] = "test_pax_filename_encoding.tar";
46 	/*
47 	 * \314\214 is a valid 2-byte UTF-8 sequence.
48 	 * \374 is invalid in UTF-8.
49 	 */
50 	char filename[] = "abc\314\214mno\374xyz";
51 	struct archive *a;
52 	struct archive_entry *entry;
53 
54 	/*
55 	 * Read an archive that has non-UTF8 pax filenames in it.
56 	 */
57 	extract_reference_file(testname);
58 	a = archive_read_new();
59 	assertEqualInt(ARCHIVE_OK, archive_read_support_format_tar(a));
60 	assertEqualInt(ARCHIVE_OK, archive_read_support_filter_all(a));
61 	assertEqualInt(ARCHIVE_OK,
62 	    archive_read_open_filename(a, testname, 10240));
63 	/*
64 	 * First entry in this test archive has an invalid UTF-8 sequence
65 	 * in it, but the header is not marked as hdrcharset=BINARY, so that
66 	 * requires a warning.
67 	 */
68 	failure("Invalid UTF8 in a pax archive pathname should cause a warning");
69 	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
70 	assertEqualString(filename, archive_entry_pathname(entry));
71 	/*
72 	 * Second entry is identical except that it does have
73 	 * hdrcharset=BINARY, so no warning should be generated.
74 	 */
75 	failure("A pathname with hdrcharset=BINARY can have invalid UTF8\n"
76 	    " characters in it without generating a warning");
77 	assertEqualInt(ARCHIVE_OK, archive_read_next_header(a, &entry));
78 	assertEqualString(filename, archive_entry_pathname(entry));
79 	archive_read_free(a);
80 }
81 
82 /*
83  * Set the locale and write a pathname containing invalid characters.
84  * This should work; the underlying implementation should automatically
85  * fall back to storing the pathname in binary.
86  */
87 static void
test_pax_filename_encoding_2(void)88 test_pax_filename_encoding_2(void)
89 {
90 	char filename[] = "abc\314\214mno\374xyz";
91 	struct archive *a;
92 	struct archive_entry *entry;
93 	char buff[65536];
94 	char longname[] = "abc\314\214mno\374xyz"
95 	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
96 	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
97 	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
98 	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
99 	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
100 	    "/abc\314\214mno\374xyz/abcdefghijklmnopqrstuvwxyz"
101 	    ;
102 	size_t used;
103 
104 	/*
105 	 * We need a starting locale which has invalid sequences.
106 	 * en_US.UTF-8 seems to be commonly supported.
107 	 */
108 	/* If it doesn't exist, just warn and return. */
109 	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
110 		skipping("invalid encoding tests require a suitable locale;"
111 		    " en_US.UTF-8 not available on this system");
112 		return;
113 	}
114 
115 	assert((a = archive_write_new()) != NULL);
116 	assertEqualIntA(a, 0, archive_write_set_format_pax(a));
117 	assertEqualIntA(a, 0, archive_write_add_filter_none(a));
118 	assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
119 	assertEqualInt(0,
120 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
121 
122 	assert((entry = archive_entry_new()) != NULL);
123 	/* Set pathname, gname, uname, hardlink to nonconvertible values. */
124 	archive_entry_copy_pathname(entry, filename);
125 	archive_entry_copy_gname(entry, filename);
126 	archive_entry_copy_uname(entry, filename);
127 	archive_entry_copy_hardlink(entry, filename);
128 	archive_entry_set_filetype(entry, AE_IFREG);
129 	failure("This should generate a warning for nonconvertible names.");
130 	assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
131 	archive_entry_free(entry);
132 
133 	assert((entry = archive_entry_new()) != NULL);
134 	/* Set path, gname, uname, and symlink to nonconvertible values. */
135 	archive_entry_copy_pathname(entry, filename);
136 	archive_entry_copy_gname(entry, filename);
137 	archive_entry_copy_uname(entry, filename);
138 	archive_entry_copy_symlink(entry, filename);
139 	archive_entry_set_filetype(entry, AE_IFLNK);
140 	failure("This should generate a warning for nonconvertible names.");
141 	assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
142 	archive_entry_free(entry);
143 
144 	assert((entry = archive_entry_new()) != NULL);
145 	/* Set pathname to a very long nonconvertible value. */
146 	archive_entry_copy_pathname(entry, longname);
147 	archive_entry_set_filetype(entry, AE_IFREG);
148 	failure("This should generate a warning for nonconvertible names.");
149 	assertEqualInt(ARCHIVE_WARN, archive_write_header(a, entry));
150 	archive_entry_free(entry);
151 
152 	assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a));
153 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
154 
155 	/*
156 	 * Now read the entries back.
157 	 */
158 
159 	assert((a = archive_read_new()) != NULL);
160 	assertEqualInt(0, archive_read_support_format_tar(a));
161 	assertEqualInt(0, archive_read_open_memory(a, buff, used));
162 
163 	assertEqualInt(0, archive_read_next_header(a, &entry));
164 	assertEqualString(filename, archive_entry_pathname(entry));
165 	assertEqualString(filename, archive_entry_gname(entry));
166 	assertEqualString(filename, archive_entry_uname(entry));
167 	assertEqualString(filename, archive_entry_hardlink(entry));
168 
169 	assertEqualInt(0, archive_read_next_header(a, &entry));
170 	assertEqualString(filename, archive_entry_pathname(entry));
171 	assertEqualString(filename, archive_entry_gname(entry));
172 	assertEqualString(filename, archive_entry_uname(entry));
173 	assertEqualString(filename, archive_entry_symlink(entry));
174 
175 	assertEqualInt(0, archive_read_next_header(a, &entry));
176 	assertEqualString(longname, archive_entry_pathname(entry));
177 
178 	assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
179 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
180 }
181 
182 #if 0 /* Disable this until Tim check out it. */
183 
184 /*
185  * Create an entry starting from a wide-character Unicode pathname,
186  * read it back into "C" locale, which doesn't support the name.
187  * TODO: Figure out the "right" behavior here.
188  */
189 static void
190 test_pax_filename_encoding_3(void)
191 {
192 	wchar_t badname[] = L"xxxAyyyBzzz";
193 	const char badname_utf8[] = "xxx\xE1\x88\xB4yyy\xE5\x99\xB8zzz";
194 	struct archive *a;
195 	struct archive_entry *entry;
196 	char buff[65536];
197 	size_t used;
198 
199 	badname[3] = 0x1234;
200 	badname[7] = 0x5678;
201 
202 	/* If it doesn't exist, just warn and return. */
203 	if (NULL == setlocale(LC_ALL, "C")) {
204 		skipping("Can't set \"C\" locale, so can't exercise "
205 		    "certain character-conversion failures");
206 		return;
207 	}
208 
209 	/* If wctomb is broken, warn and return. */
210 	if (wctomb(buff, 0x1234) > 0) {
211 		skipping("Cannot test conversion failures because \"C\" "
212 		    "locale on this system has no invalid characters.");
213 		return;
214 	}
215 
216 	/* If wctomb is broken, warn and return. */
217 	if (wctomb(buff, 0x1234) > 0) {
218 		skipping("Cannot test conversion failures because \"C\" "
219 		    "locale on this system has no invalid characters.");
220 		return;
221 	}
222 
223 	/* Skip test if archive_entry_update_pathname_utf8() is broken. */
224 	/* In particular, this is currently broken on Win32 because
225 	 * setlocale() does not set the default encoding for CP_ACP. */
226 	entry = archive_entry_new();
227 	if (archive_entry_update_pathname_utf8(entry, badname_utf8)) {
228 		archive_entry_free(entry);
229 		skipping("Cannot test conversion failures.");
230 		return;
231 	}
232 	archive_entry_free(entry);
233 
234 	assert((a = archive_write_new()) != NULL);
235 	assertEqualIntA(a, 0, archive_write_set_format_pax(a));
236 	assertEqualIntA(a, 0, archive_write_add_filter_none(a));
237 	assertEqualIntA(a, 0, archive_write_set_bytes_per_block(a, 0));
238 	assertEqualInt(0,
239 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
240 
241 	assert((entry = archive_entry_new()) != NULL);
242 	/* Set pathname to non-convertible wide value. */
243 	archive_entry_copy_pathname_w(entry, badname);
244 	archive_entry_set_filetype(entry, AE_IFREG);
245 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
246 	archive_entry_free(entry);
247 
248 	assert((entry = archive_entry_new()) != NULL);
249 	archive_entry_copy_pathname_w(entry, L"abc");
250 	/* Set gname to non-convertible wide value. */
251 	archive_entry_copy_gname_w(entry, badname);
252 	archive_entry_set_filetype(entry, AE_IFREG);
253 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
254 	archive_entry_free(entry);
255 
256 	assert((entry = archive_entry_new()) != NULL);
257 	archive_entry_copy_pathname_w(entry, L"abc");
258 	/* Set uname to non-convertible wide value. */
259 	archive_entry_copy_uname_w(entry, badname);
260 	archive_entry_set_filetype(entry, AE_IFREG);
261 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
262 	archive_entry_free(entry);
263 
264 	assert((entry = archive_entry_new()) != NULL);
265 	archive_entry_copy_pathname_w(entry, L"abc");
266 	/* Set hardlink to non-convertible wide value. */
267 	archive_entry_copy_hardlink_w(entry, badname);
268 	archive_entry_set_filetype(entry, AE_IFREG);
269 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
270 	archive_entry_free(entry);
271 
272 	assert((entry = archive_entry_new()) != NULL);
273 	archive_entry_copy_pathname_w(entry, L"abc");
274 	/* Set symlink to non-convertible wide value. */
275 	archive_entry_copy_symlink_w(entry, badname);
276 	archive_entry_set_filetype(entry, AE_IFLNK);
277 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
278 	archive_entry_free(entry);
279 
280 	assertEqualIntA(a, ARCHIVE_OK, archive_write_close(a));
281 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
282 
283 	/*
284 	 * Now read the entries back.
285 	 */
286 
287 	assert((a = archive_read_new()) != NULL);
288 	assertEqualInt(0, archive_read_support_format_tar(a));
289 	assertEqualInt(0, archive_read_open_memory(a, buff, used));
290 
291 	failure("A non-convertible pathname should cause a warning.");
292 	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
293 	assertEqualWString(badname, archive_entry_pathname_w(entry));
294 	failure("If native locale can't convert, we should get UTF-8 back.");
295 	assertEqualString(badname_utf8, archive_entry_pathname(entry));
296 
297 	failure("A non-convertible gname should cause a warning.");
298 	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
299 	assertEqualWString(badname, archive_entry_gname_w(entry));
300 	failure("If native locale can't convert, we should get UTF-8 back.");
301 	assertEqualString(badname_utf8, archive_entry_gname(entry));
302 
303 	failure("A non-convertible uname should cause a warning.");
304 	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
305 	assertEqualWString(badname, archive_entry_uname_w(entry));
306 	failure("If native locale can't convert, we should get UTF-8 back.");
307 	assertEqualString(badname_utf8, archive_entry_uname(entry));
308 
309 	failure("A non-convertible hardlink should cause a warning.");
310 	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
311 	assertEqualWString(badname, archive_entry_hardlink_w(entry));
312 	failure("If native locale can't convert, we should get UTF-8 back.");
313 	assertEqualString(badname_utf8, archive_entry_hardlink(entry));
314 
315 	failure("A non-convertible symlink should cause a warning.");
316 	assertEqualInt(ARCHIVE_WARN, archive_read_next_header(a, &entry));
317 	assertEqualWString(badname, archive_entry_symlink_w(entry));
318 	assertEqualWString(NULL, archive_entry_hardlink_w(entry));
319 	failure("If native locale can't convert, we should get UTF-8 back.");
320 	assertEqualString(badname_utf8, archive_entry_symlink(entry));
321 
322 	assertEqualInt(ARCHIVE_EOF, archive_read_next_header(a, &entry));
323 
324 	assertEqualIntA(a, ARCHIVE_OK, archive_read_close(a));
325 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
326 }
327 #else
328 static void
test_pax_filename_encoding_3(void)329 test_pax_filename_encoding_3(void)
330 {
331 }
332 #endif
333 
334 /*
335  * Verify that KOI8-R filenames are correctly translated to Unicode and UTF-8.
336  */
DEFINE_TEST(test_pax_filename_encoding_KOI8R)337 DEFINE_TEST(test_pax_filename_encoding_KOI8R)
338 {
339   	struct archive *a;
340   	struct archive_entry *entry;
341 	char buff[4096];
342 	size_t used;
343 
344 	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
345 		skipping("KOI8-R locale not available on this system.");
346 		return;
347 	}
348 
349 	/* Check if the platform completely supports the string conversion. */
350 	a = archive_write_new();
351 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
352 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
353 		skipping("This system cannot convert character-set"
354 		    " from KOI8-R to UTF-8.");
355 		archive_write_free(a);
356 		return;
357 	}
358 	archive_write_free(a);
359 
360 	/* Re-create a write archive object since filenames should be written
361 	 * in UTF-8 by default. */
362 	a = archive_write_new();
363 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
364 	assertEqualInt(ARCHIVE_OK,
365 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
366 
367 	entry = archive_entry_new2(a);
368 	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
369 	archive_entry_set_filetype(entry, AE_IFREG);
370 	archive_entry_set_size(entry, 0);
371 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
372 	archive_entry_free(entry);
373 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
374 
375 	/* Above three characters in KOI8-R should translate to the following
376 	 * three characters (two bytes each) in UTF-8. */
377 	assertEqualMem(buff + 512, "15 path=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15);
378 }
379 
380 /*
381  * Verify that CP1251 filenames are correctly translated to Unicode and UTF-8.
382  */
DEFINE_TEST(test_pax_filename_encoding_CP1251)383 DEFINE_TEST(test_pax_filename_encoding_CP1251)
384 {
385   	struct archive *a;
386   	struct archive_entry *entry;
387 	char buff[4096];
388 	size_t used;
389 
390 	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
391 	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
392 		skipping("KOI8-R locale not available on this system.");
393 		return;
394 	}
395 
396 	/* Check if the platform completely supports the string conversion. */
397 	a = archive_write_new();
398 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
399 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
400 		skipping("This system cannot convert character-set"
401 		    " from KOI8-R to UTF-8.");
402 		archive_write_free(a);
403 		return;
404 	}
405 	archive_write_free(a);
406 
407 	/* Re-create a write archive object since filenames should be written
408 	 * in UTF-8 by default. */
409 	a = archive_write_new();
410 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
411 	assertEqualInt(ARCHIVE_OK,
412 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
413 
414 	entry = archive_entry_new2(a);
415 	archive_entry_set_pathname(entry, "\xef\xf0\xe8");
416 	archive_entry_set_filetype(entry, AE_IFREG);
417 	archive_entry_set_size(entry, 0);
418 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
419 	archive_entry_free(entry);
420 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
421 
422 	/* Above three characters in KOI8-R should translate to the following
423 	 * three characters (two bytes each) in UTF-8. */
424 	assertEqualMem(buff + 512, "15 path=\xD0\xBF\xD1\x80\xD0\xB8\x0A", 15);
425 }
426 
427 /*
428  * Verify that EUC-JP filenames are correctly translated to Unicode and UTF-8.
429  */
DEFINE_TEST(test_pax_filename_encoding_EUCJP)430 DEFINE_TEST(test_pax_filename_encoding_EUCJP)
431 {
432   	struct archive *a;
433   	struct archive_entry *entry;
434 	char buff[4096];
435 	size_t used;
436 
437 	if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
438 		skipping("eucJP locale not available on this system.");
439 		return;
440 	}
441 
442 	/* Check if the platform completely supports the string conversion. */
443 	a = archive_write_new();
444 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
445 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
446 		skipping("This system cannot convert character-set"
447 		    " from eucJP to UTF-8.");
448 		archive_write_free(a);
449 		return;
450 	}
451 	archive_write_free(a);
452 
453 	/* Re-create a write archive object since filenames should be written
454 	 * in UTF-8 by default. */
455 	a = archive_write_new();
456 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
457 	assertEqualInt(ARCHIVE_OK,
458 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
459 
460 	entry = archive_entry_new2(a);
461 	archive_entry_set_pathname(entry, "\xC9\xBD.txt");
462 	/* Check the Unicode version. */
463 	archive_entry_set_filetype(entry, AE_IFREG);
464 	archive_entry_set_size(entry, 0);
465 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
466 	archive_entry_free(entry);
467 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
468 
469 	/* Check UTF-8 version. */
470 	assertEqualMem(buff + 512, "16 path=\xE8\xA1\xA8.txt\x0A", 16);
471 
472 }
473 
474 /*
475  * Verify that CP932/SJIS filenames are correctly translated to Unicode and UTF-8.
476  */
DEFINE_TEST(test_pax_filename_encoding_CP932)477 DEFINE_TEST(test_pax_filename_encoding_CP932)
478 {
479   	struct archive *a;
480   	struct archive_entry *entry;
481 	char buff[4096];
482 	size_t used;
483 
484 	if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
485 	    NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
486 		skipping("eucJP locale not available on this system.");
487 		return;
488 	}
489 
490 	/* Check if the platform completely supports the string conversion. */
491 	a = archive_write_new();
492 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
493 	if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
494 		skipping("This system cannot convert character-set"
495 		    " from CP932/SJIS to UTF-8.");
496 		archive_write_free(a);
497 		return;
498 	}
499 	archive_write_free(a);
500 
501 	/* Re-create a write archive object since filenames should be written
502 	 * in UTF-8 by default. */
503 	a = archive_write_new();
504 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
505 	assertEqualInt(ARCHIVE_OK,
506 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
507 
508 	entry = archive_entry_new2(a);
509 	archive_entry_set_pathname(entry, "\x95\x5C.txt");
510 	/* Check the Unicode version. */
511 	archive_entry_set_filetype(entry, AE_IFREG);
512 	archive_entry_set_size(entry, 0);
513 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
514 	archive_entry_free(entry);
515 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
516 
517 	/* Check UTF-8 version. */
518 	assertEqualMem(buff + 512, "16 path=\xE8\xA1\xA8.txt\x0A", 16);
519 
520 }
521 
522 /*
523  * Verify that KOI8-R filenames are not translated to Unicode and UTF-8
524  * when using hdrcharset=BINARY option.
525  */
DEFINE_TEST(test_pax_filename_encoding_KOI8R_BINARY)526 DEFINE_TEST(test_pax_filename_encoding_KOI8R_BINARY)
527 {
528   	struct archive *a;
529   	struct archive_entry *entry;
530 	char buff[4096];
531 	size_t used;
532 
533 	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
534 		skipping("KOI8-R locale not available on this system.");
535 		return;
536 	}
537 
538 	a = archive_write_new();
539 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
540 	/* BINARY mode should be accepted. */
541 	assertEqualInt(ARCHIVE_OK,
542 	    archive_write_set_options(a, "hdrcharset=BINARY"));
543 	assertEqualInt(ARCHIVE_OK,
544 	    archive_write_open_memory(a, buff, sizeof(buff), &used));
545 
546 	entry = archive_entry_new2(a);
547 	archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
548 	archive_entry_set_filetype(entry, AE_IFREG);
549 	archive_entry_set_size(entry, 0);
550 	assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
551 	archive_entry_free(entry);
552 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
553 
554 	/* "hdrcharset=BINARY" pax attribute should be written. */
555 	assertEqualMem(buff + 512, "21 hdrcharset=BINARY\x0A", 21);
556 	/* Above three characters in KOI8-R should not translate to any
557 	 * character-set. */
558 	assertEqualMem(buff + 512+21, "12 path=\xD0\xD2\xC9\x0A", 12);
559 }
560 
561 /*
562  * Pax format writer only accepts both BINARY and UTF-8.
563  * If other character-set name is specified, you will get ARCHIVE_FAILED.
564  */
DEFINE_TEST(test_pax_filename_encoding_KOI8R_CP1251)565 DEFINE_TEST(test_pax_filename_encoding_KOI8R_CP1251)
566 {
567   	struct archive *a;
568 
569 	if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
570 		skipping("KOI8-R locale not available on this system.");
571 		return;
572 	}
573 
574 	a = archive_write_new();
575 	assertEqualInt(ARCHIVE_OK, archive_write_set_format_pax(a));
576 	/* pax format writer only accepts both BINARY and UTF-8. */
577 	assertEqualInt(ARCHIVE_FAILED,
578 	    archive_write_set_options(a, "hdrcharset=CP1251"));
579 	assertEqualInt(ARCHIVE_OK, archive_write_free(a));
580 }
581 
582 
DEFINE_TEST(test_pax_filename_encoding)583 DEFINE_TEST(test_pax_filename_encoding)
584 {
585 	test_pax_filename_encoding_1();
586 	test_pax_filename_encoding_2();
587 	test_pax_filename_encoding_3();
588 }
589