1 /*-
2 * Copyright (c) 2011 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 #include "test.h"
26
27 #include <locale.h>
28
DEFINE_TEST(test_ustar_filename_encoding_UTF8_CP866)29 DEFINE_TEST(test_ustar_filename_encoding_UTF8_CP866)
30 {
31 struct archive *a;
32 struct archive_entry *entry;
33 char buff[4096];
34 size_t used;
35
36 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
37 skipping("en_US.UTF-8 locale not available on this system.");
38 return;
39 }
40
41 /*
42 * Verify that UTF-8 filenames are correctly translated into CP866
43 * and stored with hdrcharset=CP866 option.
44 */
45 a = archive_write_new();
46 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
47 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
48 skipping("This system cannot convert character-set"
49 " from UTF-8 to CP866.");
50 archive_write_free(a);
51 return;
52 }
53 assertEqualInt(ARCHIVE_OK,
54 archive_write_open_memory(a, buff, sizeof(buff), &used));
55
56 entry = archive_entry_new2(a);
57 /* Set a UTF-8 filename. */
58 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
59 archive_entry_set_filetype(entry, AE_IFREG);
60 archive_entry_set_size(entry, 0);
61 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
62 archive_entry_free(entry);
63 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
64
65 /* Above three characters in UTF-8 should translate to the following
66 * three characters in CP866. */
67 assertEqualMem(buff, "\xAF\xE0\xA8", 3);
68 }
69
DEFINE_TEST(test_ustar_filename_encoding_KOI8R_UTF8)70 DEFINE_TEST(test_ustar_filename_encoding_KOI8R_UTF8)
71 {
72 struct archive *a;
73 struct archive_entry *entry;
74 char buff[4096];
75 size_t used;
76
77 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
78 skipping("KOI8-R locale not available on this system.");
79 return;
80 }
81
82 /*
83 * Verify that KOI8-R filenames are correctly translated into UTF-8
84 * and stored with hdrcharset=UTF-8 option.
85 */
86 a = archive_write_new();
87 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
88 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
89 skipping("This system cannot convert character-set"
90 " from KOI8-R to UTF-8.");
91 archive_write_free(a);
92 return;
93 }
94 assertEqualInt(ARCHIVE_OK,
95 archive_write_open_memory(a, buff, sizeof(buff), &used));
96
97 entry = archive_entry_new2(a);
98 /* Set a KOI8-R filename. */
99 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
100 archive_entry_set_filetype(entry, AE_IFREG);
101 archive_entry_set_size(entry, 0);
102 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
103 archive_entry_free(entry);
104 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
105
106 /* Above three characters in KOI8-R should translate to the following
107 * three characters (two bytes each) in UTF-8. */
108 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
109 }
110
DEFINE_TEST(test_ustar_filename_encoding_KOI8R_CP866)111 DEFINE_TEST(test_ustar_filename_encoding_KOI8R_CP866)
112 {
113 struct archive *a;
114 struct archive_entry *entry;
115 char buff[4096];
116 size_t used;
117
118 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
119 skipping("KOI8-R locale not available on this system.");
120 return;
121 }
122
123 /*
124 * Verify that KOI8-R filenames are correctly translated into CP866
125 * and stored with hdrcharset=CP866 option.
126 */
127 a = archive_write_new();
128 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
129 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
130 skipping("This system cannot convert character-set"
131 " from KOI8-R to CP866.");
132 archive_write_free(a);
133 return;
134 }
135 assertEqualInt(ARCHIVE_OK,
136 archive_write_open_memory(a, buff, sizeof(buff), &used));
137
138 entry = archive_entry_new2(a);
139 /* Set a KOI8-R filename. */
140 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
141 archive_entry_set_filetype(entry, AE_IFREG);
142 archive_entry_set_size(entry, 0);
143 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
144 archive_entry_free(entry);
145 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
146
147 /* Above three characters in KOI8-R should translate to the following
148 * three characters in CP866. */
149 assertEqualMem(buff, "\xAF\xE0\xA8", 3);
150 }
151
DEFINE_TEST(test_ustar_filename_encoding_CP1251_UTF8)152 DEFINE_TEST(test_ustar_filename_encoding_CP1251_UTF8)
153 {
154 struct archive *a;
155 struct archive_entry *entry;
156 char buff[4096];
157 size_t used;
158
159 if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
160 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
161 skipping("KOI8-R locale not available on this system.");
162 return;
163 }
164
165 /*
166 * Verify that CP1251 filenames are correctly translated into UTF-8
167 * and stored with hdrcharset=UTF-8 option.
168 */
169 a = archive_write_new();
170 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
171 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
172 skipping("This system cannot convert character-set"
173 " from KOI8-R to UTF-8.");
174 archive_write_free(a);
175 return;
176 }
177 assertEqualInt(ARCHIVE_OK,
178 archive_write_open_memory(a, buff, sizeof(buff), &used));
179
180 entry = archive_entry_new2(a);
181 /* Set a KOI8-R filename. */
182 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
183 archive_entry_set_filetype(entry, AE_IFREG);
184 archive_entry_set_size(entry, 0);
185 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
186 archive_entry_free(entry);
187 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
188
189 /* Above three characters in CP1251 should translate to the following
190 * three characters (two bytes each) in UTF-8. */
191 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
192 }
193
194 /*
195 * Do not translate CP1251 into CP866 if non Windows platform.
196 */
DEFINE_TEST(test_ustar_filename_encoding_ru_RU_CP1251)197 DEFINE_TEST(test_ustar_filename_encoding_ru_RU_CP1251)
198 {
199 struct archive *a;
200 struct archive_entry *entry;
201 char buff[4096];
202 size_t used;
203
204 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
205 skipping("KOI8-R locale not available on this system.");
206 return;
207 }
208
209 /*
210 * Verify that CP1251 filenames are not translated into any
211 * other character-set, in particular, CP866.
212 */
213 a = archive_write_new();
214 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
215 assertEqualInt(ARCHIVE_OK,
216 archive_write_open_memory(a, buff, sizeof(buff), &used));
217
218 entry = archive_entry_new2(a);
219 /* Set a KOI8-R filename. */
220 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
221 archive_entry_set_filetype(entry, AE_IFREG);
222 archive_entry_set_size(entry, 0);
223 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
224 archive_entry_free(entry);
225 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
226
227 /* Above three characters in CP1251 should not translate to
228 * any other character-set. */
229 assertEqualMem(buff, "\xEF\xF0\xE8", 3);
230 }
231
232 /*
233 * Other archiver applications on Windows translate CP1251 filenames
234 * into CP866 filenames and store it in the ustar file.
235 * Test above behavior works well.
236 */
DEFINE_TEST(test_ustar_filename_encoding_Russian_Russia)237 DEFINE_TEST(test_ustar_filename_encoding_Russian_Russia)
238 {
239 struct archive *a;
240 struct archive_entry *entry;
241 char buff[4096];
242 size_t used;
243
244 if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
245 skipping("Russian_Russia locale not available on this system.");
246 return;
247 }
248
249 /*
250 * Verify that Russian_Russia(CP1251) filenames are correctly translated
251 * to CP866.
252 */
253 a = archive_write_new();
254 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
255 assertEqualInt(ARCHIVE_OK,
256 archive_write_open_memory(a, buff, sizeof(buff), &used));
257
258 entry = archive_entry_new2(a);
259 /* Set a CP1251 filename. */
260 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
261 archive_entry_set_filetype(entry, AE_IFREG);
262 archive_entry_set_size(entry, 0);
263 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
264 archive_entry_free(entry);
265 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
266
267 /* Above three characters in CP1251 should translate to the following
268 * three characters in CP866. */
269 assertEqualMem(buff, "\xAF\xE0\xA8", 3);
270 }
271
DEFINE_TEST(test_ustar_filename_encoding_EUCJP_UTF8)272 DEFINE_TEST(test_ustar_filename_encoding_EUCJP_UTF8)
273 {
274 struct archive *a;
275 struct archive_entry *entry;
276 char buff[4096];
277 size_t used;
278
279 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
280 skipping("eucJP locale not available on this system.");
281 return;
282 }
283
284 /*
285 * Verify that EUC-JP filenames are correctly translated to UTF-8.
286 */
287 a = archive_write_new();
288 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
289 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
290 skipping("This system cannot convert character-set"
291 " from eucJP to UTF-8.");
292 archive_write_free(a);
293 return;
294 }
295 assertEqualInt(ARCHIVE_OK,
296 archive_write_open_memory(a, buff, sizeof(buff), &used));
297
298 entry = archive_entry_new2(a);
299 /* Set an EUC-JP filename. */
300 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
301 /* Check the Unicode version. */
302 archive_entry_set_filetype(entry, AE_IFREG);
303 archive_entry_set_size(entry, 0);
304 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
305 archive_entry_free(entry);
306 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
307
308 /* Check UTF-8 version. */
309 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
310 }
311
DEFINE_TEST(test_ustar_filename_encoding_EUCJP_CP932)312 DEFINE_TEST(test_ustar_filename_encoding_EUCJP_CP932)
313 {
314 struct archive *a;
315 struct archive_entry *entry;
316 char buff[4096];
317 size_t used;
318
319 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
320 skipping("eucJP locale not available on this system.");
321 return;
322 }
323
324 /*
325 * Verify that EUC-JP filenames are correctly translated to CP932.
326 */
327 a = archive_write_new();
328 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
329 if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
330 skipping("This system cannot convert character-set"
331 " from eucJP to CP932.");
332 archive_write_free(a);
333 return;
334 }
335 assertEqualInt(ARCHIVE_OK,
336 archive_write_open_memory(a, buff, sizeof(buff), &used));
337
338 entry = archive_entry_new2(a);
339 /* Set an EUC-JP filename. */
340 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
341 /* Check the Unicode version. */
342 archive_entry_set_filetype(entry, AE_IFREG);
343 archive_entry_set_size(entry, 0);
344 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
345 archive_entry_free(entry);
346 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
347
348 /* Check CP932 version. */
349 assertEqualMem(buff, "\x95\x5C.txt", 6);
350 }
351
DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)352 DEFINE_TEST(test_ustar_filename_encoding_CP932_UTF8)
353 {
354 struct archive *a;
355 struct archive_entry *entry;
356 char buff[4096];
357 size_t used;
358
359 if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
360 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
361 skipping("CP932/SJIS locale not available on this system.");
362 return;
363 }
364
365 /*
366 * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
367 */
368 a = archive_write_new();
369 assertEqualInt(ARCHIVE_OK, archive_write_set_format_ustar(a));
370 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
371 skipping("This system cannot convert character-set"
372 " from CP932/SJIS to UTF-8.");
373 archive_write_free(a);
374 return;
375 }
376 assertEqualInt(ARCHIVE_OK,
377 archive_write_open_memory(a, buff, sizeof(buff), &used));
378
379 entry = archive_entry_new2(a);
380 /* Set a CP932/SJIS filename. */
381 archive_entry_set_pathname(entry, "\x95\x5C.txt");
382 /* Check the Unicode version. */
383 archive_entry_set_filetype(entry, AE_IFREG);
384 archive_entry_set_size(entry, 0);
385 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
386 archive_entry_free(entry);
387 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
388
389 /* Check UTF-8 version. */
390 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
391 }
392
393