1 /*-
2  * Copyright (c) 2011 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 #include "test.h"
26 __FBSDID("$FreeBSD");
27 
28 #include <locale.h>
29 
30 /*
31  * The sample tar file was made in LANG=KOI8-R and it contains two
32  * files the charset of which are different.
33  * - the filename of first file is stored in BINARY mode.
34  * - the filename of second file is stored in UTF-8.
35  *
36  * Whenever hdrcharset option is specified, we will correctly read the
37  * filename of second file, which is stored in UTF-8 by default.
38  */
39 
40 static void
41 test_read_format_tar_filename_KOI8R_CP866(const char *refname)
42 {
43 	struct archive *a;
44 	struct archive_entry *ae;
45 
46 	/*
47  	* Read filename in ru_RU.CP866 with "hdrcharset=KOI8-R" option.
48  	* We should correctly read two filenames.
49 	*/
50 	if (NULL == setlocale(LC_ALL, "Russian_Russia.866") &&
51 	    NULL == setlocale(LC_ALL, "ru_RU.CP866")) {
52 		skipping("ru_RU.CP866 locale not available on this system.");
53 		return;
54 	}
55 
56 	/* Test if the platform can convert from UTF-8. */
57 	assert((a = archive_read_new()) != NULL);
58 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a));
59 	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) {
60 		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
61 		skipping("This system cannot convert character-set"
62 		    " from UTF-8 to CP866.");
63 		return;
64 	}
65 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
66 
67 	assert((a = archive_read_new()) != NULL);
68 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
69 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
70 	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
71 		skipping("This system cannot convert character-set"
72 		    " from KOI8-R to CP866.");
73 		goto next_test;
74 	}
75 	assertEqualIntA(a, ARCHIVE_OK,
76 	    archive_read_open_filename(a, refname, 10240));
77 
78 	/* Verify regular first file. */
79 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
80 	assertEqualString("\x8f\x90\x88\x82\x85\x92",
81 	    archive_entry_pathname(ae));
82 	assertEqualInt(6, archive_entry_size(ae));
83 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
84 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
85 
86 	/* Verify regular second file. */
87 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
88 	assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
89 	    archive_entry_pathname(ae));
90 	assertEqualInt(6, archive_entry_size(ae));
91 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
92 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
93 
94 
95 	/* End of archive. */
96 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
97 
98 	/* Verify archive format. */
99 	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
100 	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
101 	    archive_format(a));
102 
103 	/* Close the archive. */
104 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
105 next_test:
106 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
107 
108 
109 	/*
110 	 * Read filename in ru_RU.CP866 without "hdrcharset=KOI8-R" option.
111 	 * The filename we can properly read is only second file.
112 	 */
113 
114 	assert((a = archive_read_new()) != NULL);
115 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
116 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
117 	assertEqualIntA(a, ARCHIVE_OK,
118 	    archive_read_open_filename(a, refname, 10240));
119 
120 	/*
121 	 * Verify regular first file.
122 	 * The filename is not translated to CP866 because hdrcharset
123 	 * attribute is BINARY and there is not way to know its charset.
124 	 */
125 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
126 	/* A filename is in KOI8-R. */
127 	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
128 	    archive_entry_pathname(ae));
129 	assertEqualInt(6, archive_entry_size(ae));
130 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
131 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
132 
133 	/*
134 	 * Verify regular second file.
135 	 * The filename is translated from UTF-8 to CP866
136 	 */
137 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
138 	assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2",
139 	    archive_entry_pathname(ae));
140 	assertEqualInt(6, archive_entry_size(ae));
141 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
142 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
143 
144 
145 	/* End of archive. */
146 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
147 
148 	/* Verify archive format. */
149 	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
150 	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
151 	    archive_format(a));
152 
153 	/* Close the archive. */
154 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
155 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
156 }
157 
158 static void
159 test_read_format_tar_filename_KOI8R_UTF8(const char *refname)
160 {
161 	struct archive *a;
162 	struct archive_entry *ae;
163 
164 	/*
165 	 * Read filename in en_US.UTF-8 with "hdrcharset=KOI8-R" option.
166 	 * We should correctly read two filenames.
167 	 */
168 	if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
169 		skipping("en_US.UTF-8 locale not available on this system.");
170 		return;
171 	}
172 
173 	assert((a = archive_read_new()) != NULL);
174 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
175 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
176 	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
177 		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
178 		skipping("This system cannot convert character-set"
179 		    " from KOI8-R to UTF-8.");
180 		return;
181 	}
182 	assertEqualIntA(a, ARCHIVE_OK,
183 	    archive_read_open_filename(a, refname, 10240));
184 
185 	/* Verify regular file. */
186 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
187 	assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2",
188 	    archive_entry_pathname(ae));
189 	assertEqualInt(6, archive_entry_size(ae));
190 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
191 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
192 
193 	/* Verify regular file. */
194 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
195 	assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
196 	    archive_entry_pathname(ae));
197 	assertEqualInt(6, archive_entry_size(ae));
198 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
199 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
200 
201 	/* Verify encryption status */
202 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
203 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
204 
205 	/* End of archive. */
206 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
207 
208 	/* Verify archive format. */
209 	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
210 	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
211 	    archive_format(a));
212 
213 	/* Verify encryption status */
214 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
215 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
216 
217 	/* Close the archive. */
218 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
219 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
220 
221 	/*
222 	 * Read filename in en_US.UTF-8 without "hdrcharset=KOI8-R" option.
223 	 * The filename we can properly read is only second file.
224 	 */
225 
226 	assert((a = archive_read_new()) != NULL);
227 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
228 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
229 	assertEqualIntA(a, ARCHIVE_OK,
230 	    archive_read_open_filename(a, refname, 10240));
231 
232 	/*
233 	 * Verify regular first file.
234 	 * The filename is not translated to UTF-8 because hdrcharset
235 	 * attribute is BINARY and there is not way to know its charset.
236 	 */
237 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
238 	/* A filename is in KOI8-R. */
239 	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
240 	    archive_entry_pathname(ae));
241 	assertEqualInt(6, archive_entry_size(ae));
242 
243 	/* Verify encryption status */
244 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
245 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
246 
247 	/*
248 	 * Verify regular second file.
249 	 */
250 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
251 	assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82",
252 	    archive_entry_pathname(ae));
253 	assertEqualInt(6, archive_entry_size(ae));
254 
255 
256 	/* End of archive. */
257 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
258 
259 	/* Verify archive format. */
260 	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
261 	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
262 	    archive_format(a));
263 
264 	/* Close the archive. */
265 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
266 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
267 }
268 
269 static void
270 test_read_format_tar_filename_KOI8R_CP1251(const char *refname)
271 {
272 	struct archive *a;
273 	struct archive_entry *ae;
274 
275 	/*
276  	* Read filename in CP1251 with "hdrcharset=KOI8-R" option.
277  	* We should correctly read two filenames.
278 	*/
279 	if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
280 	    NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
281 		skipping("CP1251 locale not available on this system.");
282 		return;
283 	}
284 
285 	/* Test if the platform can convert from UTF-8. */
286 	assert((a = archive_read_new()) != NULL);
287 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a));
288 	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) {
289 		assertEqualInt(ARCHIVE_OK, archive_read_free(a));
290 		skipping("This system cannot convert character-set"
291 		    " from UTF-8 to CP1251.");
292 		return;
293 	}
294 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
295 
296 	assert((a = archive_read_new()) != NULL);
297 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
298 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
299 	if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) {
300 		skipping("This system cannot convert character-set"
301 		    " from KOI8-R to CP1251.");
302 		goto next_test;
303 	}
304 	assertEqualIntA(a, ARCHIVE_OK,
305 	    archive_read_open_filename(a, refname, 10240));
306 
307 	/* Verify regular first file. */
308 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
309 	assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2",
310 	    archive_entry_pathname(ae));
311 	assertEqualInt(6, archive_entry_size(ae));
312 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
313 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
314 
315 	/* Verify regular second file. */
316 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
317 	assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
318 	    archive_entry_pathname(ae));
319 	assertEqualInt(6, archive_entry_size(ae));
320 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
321 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
322 
323 
324 	/* End of archive. */
325 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
326 
327 	/* Verify archive format. */
328 	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
329 	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
330 	    archive_format(a));
331 
332 	/* Close the archive. */
333 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
334 next_test:
335 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
336 
337 	/*
338 	 * Read filename in CP1251 without "hdrcharset=KOI8-R" option.
339 	 * The filename we can properly read is only second file.
340 	 */
341 
342 	assert((a = archive_read_new()) != NULL);
343 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a));
344 	assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a));
345 	assertEqualIntA(a, ARCHIVE_OK,
346 	    archive_read_open_filename(a, refname, 10240));
347 
348 	/*
349 	 * Verify regular first file.
350 	 * The filename is not translated to CP1251 because hdrcharset
351 	 * attribute is BINARY and there is not way to know its charset.
352 	 */
353 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
354 	/* A filename is in KOI8-R. */
355 	assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4",
356 	    archive_entry_pathname(ae));
357 	assertEqualInt(6, archive_entry_size(ae));
358 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
359 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
360 
361 	/*
362 	 * Verify regular second file.
363 	 */
364 	assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae));
365 	assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2",
366 	    archive_entry_pathname(ae));
367 	assertEqualInt(6, archive_entry_size(ae));
368 	assertEqualInt(archive_entry_is_encrypted(ae), 0);
369 	assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED);
370 
371 
372 	/* End of archive. */
373 	assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae));
374 
375 	/* Verify archive format. */
376 	assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0));
377 	assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE,
378 	    archive_format(a));
379 
380 	/* Close the archive. */
381 	assertEqualInt(ARCHIVE_OK, archive_read_close(a));
382 	assertEqualInt(ARCHIVE_OK, archive_read_free(a));
383 }
384 
385 
386 DEFINE_TEST(test_read_format_tar_filename)
387 {
388 	const char *refname = "test_read_format_tar_filename_koi8r.tar.Z";
389 
390 	extract_reference_file(refname);
391 	test_read_format_tar_filename_KOI8R_CP866(refname);
392 	test_read_format_tar_filename_KOI8R_UTF8(refname);
393 	test_read_format_tar_filename_KOI8R_CP1251(refname);
394 }
395