1 /*- 2 * Copyright (c) 2011 Michihiro NAKAJIMA 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 #include "test.h" 26 27 #include <locale.h> 28 29 DEFINE_TEST(test_ustar_filename_encoding_UTF8_CP866) 30 { 31 struct archive *a; 32 struct archive_entry *entry; 33 char buff[4096]; 34 size_t used; 35 36 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) { 37 skipping("en_US.UTF-8 locale not available on this system."); 38 return; 39 } 40 41 /* 42 * Verify that UTF-8 filenames are correctly translated into CP866 43 * and stored with hdrcharset=CP866 option. 44 */ 45 a = archive_write_new(); 46 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 47 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 48 skipping("This system cannot convert character-set" 49 " from UTF-8 to CP866."); 50 archive_write_free(a); 51 return; 52 } 53 assertEqualInt(ARCHIVE_OK, 54 archive_write_open_memory(a, buff, sizeof(buff), &used)); 55 56 entry = archive_entry_new2(a); 57 /* Set a UTF-8 filename. */ 58 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8"); 59 archive_entry_set_filetype(entry, AE_IFREG); 60 archive_entry_set_size(entry, 0); 61 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 62 archive_entry_free(entry); 63 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 64 65 /* Above three characters in UTF-8 should translate to the following 66 * three characters in CP866. */ 67 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 68 } 69 70 DEFINE_TEST(test_ustar_filename_encoding_KOI8R_UTF8) 71 { 72 struct archive *a; 73 struct archive_entry *entry; 74 char buff[4096]; 75 size_t used; 76 77 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 78 skipping("KOI8-R locale not available on this system."); 79 return; 80 } 81 82 /* 83 * Verify that KOI8-R filenames are correctly translated into UTF-8 84 * and stored with hdrcharset=UTF-8 option. 85 */ 86 a = archive_write_new(); 87 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 88 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 89 skipping("This system cannot convert character-set" 90 " from KOI8-R to UTF-8."); 91 archive_write_free(a); 92 return; 93 } 94 assertEqualInt(ARCHIVE_OK, 95 archive_write_open_memory(a, buff, sizeof(buff), &used)); 96 97 entry = archive_entry_new2(a); 98 /* Set a KOI8-R filename. */ 99 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 100 archive_entry_set_filetype(entry, AE_IFREG); 101 archive_entry_set_size(entry, 0); 102 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 103 archive_entry_free(entry); 104 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 105 106 /* Above three characters in KOI8-R should translate to the following 107 * three characters (two bytes each) in UTF-8. */ 108 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 109 } 110 111 DEFINE_TEST(test_ustar_filename_encoding_KOI8R_CP866) 112 { 113 struct archive *a; 114 struct archive_entry *entry; 115 char buff[4096]; 116 size_t used; 117 118 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) { 119 skipping("KOI8-R locale not available on this system."); 120 return; 121 } 122 123 /* 124 * Verify that KOI8-R filenames are correctly translated into CP866 125 * and stored with hdrcharset=CP866 option. 126 */ 127 a = archive_write_new(); 128 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 129 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) { 130 skipping("This system cannot convert character-set" 131 " from KOI8-R to CP866."); 132 archive_write_free(a); 133 return; 134 } 135 assertEqualInt(ARCHIVE_OK, 136 archive_write_open_memory(a, buff, sizeof(buff), &used)); 137 138 entry = archive_entry_new2(a); 139 /* Set a KOI8-R filename. */ 140 archive_entry_set_pathname(entry, "\xD0\xD2\xC9"); 141 archive_entry_set_filetype(entry, AE_IFREG); 142 archive_entry_set_size(entry, 0); 143 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 144 archive_entry_free(entry); 145 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 146 147 /* Above three characters in KOI8-R should translate to the following 148 * three characters in CP866. */ 149 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 150 } 151 152 DEFINE_TEST(test_ustar_filename_encoding_CP1251_UTF8) 153 { 154 struct archive *a; 155 struct archive_entry *entry; 156 char buff[4096]; 157 size_t used; 158 159 if (NULL == setlocale(LC_ALL, "Russian_Russia") && 160 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 161 skipping("KOI8-R locale not available on this system."); 162 return; 163 } 164 165 /* 166 * Verify that CP1251 filenames are correctly translated into UTF-8 167 * and stored with hdrcharset=UTF-8 option. 168 */ 169 a = archive_write_new(); 170 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 171 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 172 skipping("This system cannot convert character-set" 173 " from KOI8-R to UTF-8."); 174 archive_write_free(a); 175 return; 176 } 177 assertEqualInt(ARCHIVE_OK, 178 archive_write_open_memory(a, buff, sizeof(buff), &used)); 179 180 entry = archive_entry_new2(a); 181 /* Set a KOI8-R filename. */ 182 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 183 archive_entry_set_filetype(entry, AE_IFREG); 184 archive_entry_set_size(entry, 0); 185 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 186 archive_entry_free(entry); 187 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 188 189 /* Above three characters in CP1251 should translate to the following 190 * three characters (two bytes each) in UTF-8. */ 191 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6); 192 } 193 194 /* 195 * Do not translate CP1251 into CP866 if non Windows platform. 196 */ 197 DEFINE_TEST(test_ustar_filename_encoding_ru_RU_CP1251) 198 { 199 struct archive *a; 200 struct archive_entry *entry; 201 char buff[4096]; 202 size_t used; 203 204 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) { 205 skipping("KOI8-R locale not available on this system."); 206 return; 207 } 208 209 /* 210 * Verify that CP1251 filenames are not translated into any 211 * other character-set, in particular, CP866. 212 */ 213 a = archive_write_new(); 214 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 215 assertEqualInt(ARCHIVE_OK, 216 archive_write_open_memory(a, buff, sizeof(buff), &used)); 217 218 entry = archive_entry_new2(a); 219 /* Set a KOI8-R filename. */ 220 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 221 archive_entry_set_filetype(entry, AE_IFREG); 222 archive_entry_set_size(entry, 0); 223 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 224 archive_entry_free(entry); 225 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 226 227 /* Above three characters in CP1251 should not translate to 228 * any other character-set. */ 229 assertEqualMem(buff, "\xEF\xF0\xE8", 3); 230 } 231 232 /* 233 * Other archiver applications on Windows translate CP1251 filenames 234 * into CP866 filenames and store it in the ustar file. 235 * Test above behavior works well. 236 */ 237 DEFINE_TEST(test_ustar_filename_encoding_Russian_Russia) 238 { 239 struct archive *a; 240 struct archive_entry *entry; 241 char buff[4096]; 242 size_t used; 243 244 if (NULL == setlocale(LC_ALL, "Russian_Russia")) { 245 skipping("Russian_Russia locale not available on this system."); 246 return; 247 } 248 249 /* 250 * Verify that Russian_Russia(CP1251) filenames are correctly translated 251 * to CP866. 252 */ 253 a = archive_write_new(); 254 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 255 assertEqualInt(ARCHIVE_OK, 256 archive_write_open_memory(a, buff, sizeof(buff), &used)); 257 258 entry = archive_entry_new2(a); 259 /* Set a CP1251 filename. */ 260 archive_entry_set_pathname(entry, "\xEF\xF0\xE8"); 261 archive_entry_set_filetype(entry, AE_IFREG); 262 archive_entry_set_size(entry, 0); 263 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 264 archive_entry_free(entry); 265 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 266 267 /* Above three characters in CP1251 should translate to the following 268 * three characters in CP866. */ 269 assertEqualMem(buff, "\xAF\xE0\xA8", 3); 270 } 271 272 DEFINE_TEST(test_ustar_filename_encoding_EUCJP_UTF8) 273 { 274 struct archive *a; 275 struct archive_entry *entry; 276 char buff[4096]; 277 size_t used; 278 279 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 280 skipping("eucJP locale not available on this system."); 281 return; 282 } 283 284 /* 285 * Verify that EUC-JP filenames are correctly translated to UTF-8. 286 */ 287 a = archive_write_new(); 288 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 289 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 290 skipping("This system cannot convert character-set" 291 " from eucJP to UTF-8."); 292 archive_write_free(a); 293 return; 294 } 295 assertEqualInt(ARCHIVE_OK, 296 archive_write_open_memory(a, buff, sizeof(buff), &used)); 297 298 entry = archive_entry_new2(a); 299 /* Set an EUC-JP filename. */ 300 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 301 /* Check the Unicode version. */ 302 archive_entry_set_filetype(entry, AE_IFREG); 303 archive_entry_set_size(entry, 0); 304 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 305 archive_entry_free(entry); 306 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 307 308 /* Check UTF-8 version. */ 309 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 310 } 311 312 DEFINE_TEST(test_ustar_filename_encoding_EUCJP_CP932) 313 { 314 struct archive *a; 315 struct archive_entry *entry; 316 char buff[4096]; 317 size_t used; 318 319 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) { 320 skipping("eucJP locale not available on this system."); 321 return; 322 } 323 324 /* 325 * Verify that EUC-JP filenames are correctly translated to CP932. 326 */ 327 a = archive_write_new(); 328 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 329 if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) { 330 skipping("This system cannot convert character-set" 331 " from eucJP to CP932."); 332 archive_write_free(a); 333 return; 334 } 335 assertEqualInt(ARCHIVE_OK, 336 archive_write_open_memory(a, buff, sizeof(buff), &used)); 337 338 entry = archive_entry_new2(a); 339 /* Set an EUC-JP filename. */ 340 archive_entry_set_pathname(entry, "\xC9\xBD.txt"); 341 /* Check the Unicode version. */ 342 archive_entry_set_filetype(entry, AE_IFREG); 343 archive_entry_set_size(entry, 0); 344 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 345 archive_entry_free(entry); 346 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 347 348 /* Check CP932 version. */ 349 assertEqualMem(buff, "\x95\x5C.txt", 6); 350 } 351 352 DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8) 353 { 354 struct archive *a; 355 struct archive_entry *entry; 356 char buff[4096]; 357 size_t used; 358 359 if (NULL == setlocale(LC_ALL, "Japanese_Japan") && 360 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) { 361 skipping("CP932/SJIS locale not available on this system."); 362 return; 363 } 364 365 /* 366 * Verify that CP932/SJIS filenames are correctly translated to UTF-8. 367 */ 368 a = archive_write_new(); 369 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a)); 370 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) { 371 skipping("This system cannot convert character-set" 372 " from CP932/SJIS to UTF-8."); 373 archive_write_free(a); 374 return; 375 } 376 assertEqualInt(ARCHIVE_OK, 377 archive_write_open_memory(a, buff, sizeof(buff), &used)); 378 379 entry = archive_entry_new2(a); 380 /* Set a CP932/SJIS filename. */ 381 archive_entry_set_pathname(entry, "\x95\x5C.txt"); 382 /* Check the Unicode version. */ 383 archive_entry_set_filetype(entry, AE_IFREG); 384 archive_entry_set_size(entry, 0); 385 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry)); 386 archive_entry_free(entry); 387 assertEqualInt(ARCHIVE_OK, archive_write_free(a)); 388 389 /* Check UTF-8 version. */ 390 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7); 391 } 392 393