1 /*- 2 * Copyright (c) 2011 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 #include "test.h" 26 __FBSDID("$FreeBSD"); 27 28 #include <locale.h> 29 30 /* 31 * The sample tar file was made in LANG=KOI8-R and it contains two 32 * files the charset of which are different. 33 * - the filename of first file is stored in BINARY mode. 34 * - the filename of second file is stored in UTF-8. 35 * 36 * Whenever hdrcharset option is specified, we will correctly read the 37 * filename of second file, which is stored in UTF-8 by default. 38 */ 39 40 static void 41 test_read_format_tar_filename_KOI8R_CP866(const char *refname) 42 { 43 struct archive *a; 44 struct archive_entry *ae; 45 46 /* 47 * Read filename in ru_RU.CP866 with "hdrcharset=KOI8-R" option. 48 * We should correctly read two filenames. 49 */ 50 if (NULL == setlocale(LC_ALL, "Russian_Russia.866") && 51 NULL == setlocale(LC_ALL, "ru_RU.CP866")) { 52 skipping("ru_RU.CP866 locale not available on this system."); 53 return; 54 } 55 56 /* Test if the platform can convert from UTF-8. */ 57 assert((a = archive_read_new()) != NULL); 58 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a)); 59 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) { 60 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 61 skipping("This system cannot convert character-set" 62 " from UTF-8 to CP866."); 63 return; 64 } 65 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 66 67 assert((a = archive_read_new()) != NULL); 68 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 69 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 70 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { 71 skipping("This system cannot convert character-set" 72 " from KOI8-R to CP866."); 73 goto next_test; 74 } 75 assertEqualIntA(a, ARCHIVE_OK, 76 archive_read_open_filename(a, refname, 10240)); 77 78 /* Verify regular first file. */ 79 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 80 assertEqualString("\x8f\x90\x88\x82\x85\x92", 81 archive_entry_pathname(ae)); 82 assertEqualInt(6, archive_entry_size(ae)); 83 assertEqualInt(archive_entry_is_encrypted(ae), 0); 84 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 85 86 /* Verify regular second file. */ 87 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 88 assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2", 89 archive_entry_pathname(ae)); 90 assertEqualInt(6, archive_entry_size(ae)); 91 assertEqualInt(archive_entry_is_encrypted(ae), 0); 92 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 93 94 95 /* End of archive. */ 96 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 97 98 /* Verify archive format. */ 99 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 100 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 101 archive_format(a)); 102 103 /* Close the archive. */ 104 assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 105 next_test: 106 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 107 108 109 /* 110 * Read filename in ru_RU.CP866 without "hdrcharset=KOI8-R" option. 111 * The filename we can properly read is only second file. 112 */ 113 114 assert((a = archive_read_new()) != NULL); 115 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 116 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 117 assertEqualIntA(a, ARCHIVE_OK, 118 archive_read_open_filename(a, refname, 10240)); 119 120 /* 121 * Verify regular first file. 122 * The filename is not translated to CP866 because hdrcharset 123 * attribute is BINARY and there is not way to know its charset. 124 */ 125 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 126 /* A filename is in KOI8-R. */ 127 assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", 128 archive_entry_pathname(ae)); 129 assertEqualInt(6, archive_entry_size(ae)); 130 assertEqualInt(archive_entry_is_encrypted(ae), 0); 131 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 132 133 /* 134 * Verify regular second file. 135 * The filename is translated from UTF-8 to CP866 136 */ 137 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 138 assertEqualString("\xaf\xe0\xa8\xa2\xa5\xe2", 139 archive_entry_pathname(ae)); 140 assertEqualInt(6, archive_entry_size(ae)); 141 assertEqualInt(archive_entry_is_encrypted(ae), 0); 142 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 143 144 145 /* End of archive. */ 146 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 147 148 /* Verify archive format. */ 149 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 150 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 151 archive_format(a)); 152 153 /* Close the archive. */ 154 assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 155 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 156 } 157 158 static void 159 test_read_format_tar_filename_KOI8R_UTF8(const char *refname) 160 { 161 struct archive *a; 162 struct archive_entry *ae; 163 164 /* 165 * Read filename in en_US.UTF-8 with "hdrcharset=KOI8-R" option. 166 * We should correctly read two filenames. 167 */ 168 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 169 skipping("en_US.UTF-8 locale not available on this system."); 170 return; 171 } 172 173 assert((a = archive_read_new()) != NULL); 174 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 175 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 176 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { 177 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 178 skipping("This system cannot convert character-set" 179 " from KOI8-R to UTF-8."); 180 return; 181 } 182 assertEqualIntA(a, ARCHIVE_OK, 183 archive_read_open_filename(a, refname, 10240)); 184 185 /* Verify regular file. */ 186 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 187 assertEqualString("\xd0\x9f\xd0\xa0\xd0\x98\xd0\x92\xd0\x95\xd0\xa2", 188 archive_entry_pathname(ae)); 189 assertEqualInt(6, archive_entry_size(ae)); 190 assertEqualInt(archive_entry_is_encrypted(ae), 0); 191 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 192 193 /* Verify regular file. */ 194 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 195 assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82", 196 archive_entry_pathname(ae)); 197 assertEqualInt(6, archive_entry_size(ae)); 198 assertEqualInt(archive_entry_is_encrypted(ae), 0); 199 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 200 201 /* Verify encryption status */ 202 assertEqualInt(archive_entry_is_encrypted(ae), 0); 203 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 204 205 /* End of archive. */ 206 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 207 208 /* Verify archive format. */ 209 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 210 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 211 archive_format(a)); 212 213 /* Verify encryption status */ 214 assertEqualInt(archive_entry_is_encrypted(ae), 0); 215 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 216 217 /* Close the archive. */ 218 assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 219 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 220 221 /* 222 * Read filename in en_US.UTF-8 without "hdrcharset=KOI8-R" option. 223 * The filename we can properly read is only second file. 224 */ 225 226 assert((a = archive_read_new()) != NULL); 227 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 228 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 229 assertEqualIntA(a, ARCHIVE_OK, 230 archive_read_open_filename(a, refname, 10240)); 231 232 /* 233 * Verify regular first file. 234 * The filename is not translated to UTF-8 because hdrcharset 235 * attribute is BINARY and there is not way to know its charset. 236 */ 237 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 238 /* A filename is in KOI8-R. */ 239 assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", 240 archive_entry_pathname(ae)); 241 assertEqualInt(6, archive_entry_size(ae)); 242 243 /* Verify encryption status */ 244 assertEqualInt(archive_entry_is_encrypted(ae), 0); 245 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 246 247 /* 248 * Verify regular second file. 249 */ 250 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 251 assertEqualString("\xd0\xbf\xd1\x80\xd0\xb8\xd0\xb2\xd0\xb5\xd1\x82", 252 archive_entry_pathname(ae)); 253 assertEqualInt(6, archive_entry_size(ae)); 254 255 256 /* End of archive. */ 257 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 258 259 /* Verify archive format. */ 260 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 261 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 262 archive_format(a)); 263 264 /* Close the archive. */ 265 assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 266 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 267 } 268 269 static void 270 test_read_format_tar_filename_KOI8R_CP1251(const char *refname) 271 { 272 struct archive *a; 273 struct archive_entry *ae; 274 275 /* 276 * Read filename in CP1251 with "hdrcharset=KOI8-R" option. 277 * We should correctly read two filenames. 278 */ 279 if (NULL == setlocale(LC_ALL, "Russian_Russia") && 280 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 281 skipping("CP1251 locale not available on this system."); 282 return; 283 } 284 285 /* Test if the platform can convert from UTF-8. */ 286 assert((a = archive_read_new()) != NULL); 287 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_tar(a)); 288 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=UTF-8")) { 289 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 290 skipping("This system cannot convert character-set" 291 " from UTF-8 to CP1251."); 292 return; 293 } 294 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 295 296 assert((a = archive_read_new()) != NULL); 297 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 298 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 299 if (ARCHIVE_OK != archive_read_set_options(a, "hdrcharset=KOI8-R")) { 300 skipping("This system cannot convert character-set" 301 " from KOI8-R to CP1251."); 302 goto next_test; 303 } 304 assertEqualIntA(a, ARCHIVE_OK, 305 archive_read_open_filename(a, refname, 10240)); 306 307 /* Verify regular first file. */ 308 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 309 assertEqualString("\xcf\xd0\xc8\xc2\xc5\xd2", 310 archive_entry_pathname(ae)); 311 assertEqualInt(6, archive_entry_size(ae)); 312 assertEqualInt(archive_entry_is_encrypted(ae), 0); 313 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 314 315 /* Verify regular second file. */ 316 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 317 assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2", 318 archive_entry_pathname(ae)); 319 assertEqualInt(6, archive_entry_size(ae)); 320 assertEqualInt(archive_entry_is_encrypted(ae), 0); 321 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 322 323 324 /* End of archive. */ 325 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 326 327 /* Verify archive format. */ 328 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 329 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 330 archive_format(a)); 331 332 /* Close the archive. */ 333 assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 334 next_test: 335 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 336 337 /* 338 * Read filename in CP1251 without "hdrcharset=KOI8-R" option. 339 * The filename we can properly read is only second file. 340 */ 341 342 assert((a = archive_read_new()) != NULL); 343 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_filter_all(a)); 344 assertEqualIntA(a, ARCHIVE_OK, archive_read_support_format_all(a)); 345 assertEqualIntA(a, ARCHIVE_OK, 346 archive_read_open_filename(a, refname, 10240)); 347 348 /* 349 * Verify regular first file. 350 * The filename is not translated to CP1251 because hdrcharset 351 * attribute is BINARY and there is not way to know its charset. 352 */ 353 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 354 /* A filename is in KOI8-R. */ 355 assertEqualString("\xf0\xf2\xe9\xf7\xe5\xf4", 356 archive_entry_pathname(ae)); 357 assertEqualInt(6, archive_entry_size(ae)); 358 assertEqualInt(archive_entry_is_encrypted(ae), 0); 359 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 360 361 /* 362 * Verify regular second file. 363 */ 364 assertEqualIntA(a, ARCHIVE_OK, archive_read_next_header(a, &ae)); 365 assertEqualString("\xef\xf0\xe8\xe2\xe5\xf2", 366 archive_entry_pathname(ae)); 367 assertEqualInt(6, archive_entry_size(ae)); 368 assertEqualInt(archive_entry_is_encrypted(ae), 0); 369 assertEqualIntA(a, archive_read_has_encrypted_entries(a), ARCHIVE_READ_FORMAT_ENCRYPTION_UNSUPPORTED); 370 371 372 /* End of archive. */ 373 assertEqualIntA(a, ARCHIVE_EOF, archive_read_next_header(a, &ae)); 374 375 /* Verify archive format. */ 376 assertEqualIntA(a, ARCHIVE_FILTER_COMPRESS, archive_filter_code(a, 0)); 377 assertEqualIntA(a, ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE, 378 archive_format(a)); 379 380 /* Close the archive. */ 381 assertEqualInt(ARCHIVE_OK, archive_read_close(a)); 382 assertEqualInt(ARCHIVE_OK, archive_read_free(a)); 383 } 384 385 386 DEFINE_TEST(test_read_format_tar_filename) 387 { 388 const char *refname = "test_read_format_tar_filename_koi8r.tar.Z"; 389 390 extract_reference_file(refname); 391 test_read_format_tar_filename_KOI8R_CP866(refname); 392 test_read_format_tar_filename_KOI8R_UTF8(refname); 393 test_read_format_tar_filename_KOI8R_CP1251(refname); 394 } 395