1 /*-
2 * Copyright (c) 2011 Michihiro NAKAJIMA
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25 #include "test.h"
26 #include <locale.h>
27
DEFINE_TEST(test_gnutar_filename_encoding_UTF8_CP866)28 DEFINE_TEST(test_gnutar_filename_encoding_UTF8_CP866)
29 {
30 struct archive *a;
31 struct archive_entry *entry;
32 char buff[4096];
33 size_t used;
34
35 if (NULL == setlocale(LC_ALL, "en_US.UTF-8")) {
36 skipping("en_US.UTF-8 locale not available on this system.");
37 return;
38 }
39
40 /*
41 * Verify that UTF-8 filenames are correctly translated into CP866
42 * and stored with hdrcharset=CP866 option.
43 */
44 a = archive_write_new();
45 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
46 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
47 skipping("This system cannot convert character-set"
48 " from UTF-8 to CP866.");
49 archive_write_free(a);
50 return;
51 }
52 assertEqualInt(ARCHIVE_OK,
53 archive_write_open_memory(a, buff, sizeof(buff), &used));
54
55 entry = archive_entry_new2(a);
56 /* Set a UTF-8 filename. */
57 archive_entry_set_pathname(entry, "\xD0\xBF\xD1\x80\xD0\xB8");
58 archive_entry_set_filetype(entry, AE_IFREG);
59 archive_entry_set_size(entry, 0);
60 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
61 archive_entry_free(entry);
62 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
63
64 /* Above three characters in UTF-8 should translate to the following
65 * three characters in CP866. */
66 assertEqualMem(buff, "\xAF\xE0\xA8", 3);
67 }
68
DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_UTF8)69 DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_UTF8)
70 {
71 struct archive *a;
72 struct archive_entry *entry;
73 char buff[4096];
74 size_t used;
75
76 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
77 skipping("KOI8-R locale not available on this system.");
78 return;
79 }
80
81 /*
82 * Verify that KOI8-R filenames are correctly translated into UTF-8
83 * and stored with hdrcharset=UTF-8 option.
84 */
85 a = archive_write_new();
86 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
87 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
88 skipping("This system cannot convert character-set"
89 " from KOI8-R to UTF-8.");
90 archive_write_free(a);
91 return;
92 }
93 assertEqualInt(ARCHIVE_OK,
94 archive_write_open_memory(a, buff, sizeof(buff), &used));
95
96 entry = archive_entry_new2(a);
97 /* Set a KOI8-R filename. */
98 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
99 archive_entry_set_filetype(entry, AE_IFREG);
100 archive_entry_set_size(entry, 0);
101 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
102 archive_entry_free(entry);
103 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
104
105 /* Above three characters in KOI8-R should translate to the following
106 * three characters (two bytes each) in UTF-8. */
107 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
108 }
109
DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_CP866)110 DEFINE_TEST(test_gnutar_filename_encoding_KOI8R_CP866)
111 {
112 struct archive *a;
113 struct archive_entry *entry;
114 char buff[4096];
115 size_t used;
116
117 if (NULL == setlocale(LC_ALL, "ru_RU.KOI8-R")) {
118 skipping("KOI8-R locale not available on this system.");
119 return;
120 }
121
122 /*
123 * Verify that KOI8-R filenames are correctly translated into CP866
124 * and stored with hdrcharset=CP866 option.
125 */
126 a = archive_write_new();
127 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
128 if (archive_write_set_options(a, "hdrcharset=CP866") != ARCHIVE_OK) {
129 skipping("This system cannot convert character-set"
130 " from KOI8-R to CP866.");
131 archive_write_free(a);
132 return;
133 }
134 assertEqualInt(ARCHIVE_OK,
135 archive_write_open_memory(a, buff, sizeof(buff), &used));
136
137 entry = archive_entry_new2(a);
138 /* Set a KOI8-R filename. */
139 archive_entry_set_pathname(entry, "\xD0\xD2\xC9");
140 archive_entry_set_filetype(entry, AE_IFREG);
141 archive_entry_set_size(entry, 0);
142 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
143 archive_entry_free(entry);
144 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
145
146 /* Above three characters in KOI8-R should translate to the following
147 * three characters in CP866. */
148 assertEqualMem(buff, "\xAF\xE0\xA8", 3);
149 }
150
DEFINE_TEST(test_gnutar_filename_encoding_CP1251_UTF8)151 DEFINE_TEST(test_gnutar_filename_encoding_CP1251_UTF8)
152 {
153 struct archive *a;
154 struct archive_entry *entry;
155 char buff[4096];
156 size_t used;
157
158 if (NULL == setlocale(LC_ALL, "Russian_Russia") &&
159 NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
160 skipping("KOI8-R locale not available on this system.");
161 return;
162 }
163
164 /*
165 * Verify that CP1251 filenames are correctly translated into UTF-8
166 * and stored with hdrcharset=UTF-8 option.
167 */
168 a = archive_write_new();
169 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
170 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
171 skipping("This system cannot convert character-set"
172 " from KOI8-R to UTF-8.");
173 archive_write_free(a);
174 return;
175 }
176 assertEqualInt(ARCHIVE_OK,
177 archive_write_open_memory(a, buff, sizeof(buff), &used));
178
179 entry = archive_entry_new2(a);
180 /* Set a KOI8-R filename. */
181 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
182 archive_entry_set_filetype(entry, AE_IFREG);
183 archive_entry_set_size(entry, 0);
184 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
185 archive_entry_free(entry);
186 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
187
188 /* Above three characters in CP1251 should translate to the following
189 * three characters (two bytes each) in UTF-8. */
190 assertEqualMem(buff, "\xD0\xBF\xD1\x80\xD0\xB8", 6);
191 }
192
193 /*
194 * Do not translate CP1251 into CP866 if non Windows platform.
195 */
DEFINE_TEST(test_gnutar_filename_encoding_ru_RU_CP1251)196 DEFINE_TEST(test_gnutar_filename_encoding_ru_RU_CP1251)
197 {
198 struct archive *a;
199 struct archive_entry *entry;
200 char buff[4096];
201 size_t used;
202
203 if (NULL == setlocale(LC_ALL, "ru_RU.CP1251")) {
204 skipping("KOI8-R locale not available on this system.");
205 return;
206 }
207
208 /*
209 * Verify that CP1251 filenames are not translated into any
210 * other character-set, in particular, CP866.
211 */
212 a = archive_write_new();
213 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
214 assertEqualInt(ARCHIVE_OK,
215 archive_write_open_memory(a, buff, sizeof(buff), &used));
216
217 entry = archive_entry_new2(a);
218 /* Set a KOI8-R filename. */
219 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
220 archive_entry_set_filetype(entry, AE_IFREG);
221 archive_entry_set_size(entry, 0);
222 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
223 archive_entry_free(entry);
224 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
225
226 /* Above three characters in CP1251 should not translate to
227 * any other character-set. */
228 assertEqualMem(buff, "\xEF\xF0\xE8", 3);
229 }
230
231 /*
232 * Other archiver applications on Windows translate CP1251 filenames
233 * into CP866 filenames and store it in the gnutar file.
234 * Test above behavior works well.
235 */
DEFINE_TEST(test_gnutar_filename_encoding_Russian_Russia)236 DEFINE_TEST(test_gnutar_filename_encoding_Russian_Russia)
237 {
238 struct archive *a;
239 struct archive_entry *entry;
240 char buff[4096];
241 size_t used;
242
243 if (NULL == setlocale(LC_ALL, "Russian_Russia")) {
244 skipping("Russian_Russia locale not available on this system.");
245 return;
246 }
247
248 /*
249 * Verify that Russian_Russia(CP1251) filenames are correctly translated
250 * to CP866.
251 */
252 a = archive_write_new();
253 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
254 assertEqualInt(ARCHIVE_OK,
255 archive_write_open_memory(a, buff, sizeof(buff), &used));
256
257 entry = archive_entry_new2(a);
258 /* Set a CP1251 filename. */
259 archive_entry_set_pathname(entry, "\xEF\xF0\xE8");
260 archive_entry_set_filetype(entry, AE_IFREG);
261 archive_entry_set_size(entry, 0);
262 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
263 archive_entry_free(entry);
264 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
265
266 /* Above three characters in CP1251 should translate to the following
267 * three characters in CP866. */
268 assertEqualMem(buff, "\xAF\xE0\xA8", 3);
269 }
270
DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_UTF8)271 DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_UTF8)
272 {
273 struct archive *a;
274 struct archive_entry *entry;
275 char buff[4096];
276 size_t used;
277
278 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
279 skipping("eucJP locale not available on this system.");
280 return;
281 }
282
283 /*
284 * Verify that EUC-JP filenames are correctly translated to UTF-8.
285 */
286 a = archive_write_new();
287 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
288 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
289 skipping("This system cannot convert character-set"
290 " from eucJP to UTF-8.");
291 archive_write_free(a);
292 return;
293 }
294 assertEqualInt(ARCHIVE_OK,
295 archive_write_open_memory(a, buff, sizeof(buff), &used));
296
297 entry = archive_entry_new2(a);
298 /* Set an EUC-JP filename. */
299 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
300 /* Check the Unicode version. */
301 archive_entry_set_filetype(entry, AE_IFREG);
302 archive_entry_set_size(entry, 0);
303 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
304 archive_entry_free(entry);
305 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
306
307 /* Check UTF-8 version. */
308 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
309 }
310
DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_CP932)311 DEFINE_TEST(test_gnutar_filename_encoding_EUCJP_CP932)
312 {
313 struct archive *a;
314 struct archive_entry *entry;
315 char buff[4096];
316 size_t used;
317
318 if (NULL == setlocale(LC_ALL, "ja_JP.eucJP")) {
319 skipping("eucJP locale not available on this system.");
320 return;
321 }
322
323 /*
324 * Verify that EUC-JP filenames are correctly translated to CP932.
325 */
326 a = archive_write_new();
327 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
328 if (archive_write_set_options(a, "hdrcharset=CP932") != ARCHIVE_OK) {
329 skipping("This system cannot convert character-set"
330 " from eucJP to CP932.");
331 archive_write_free(a);
332 return;
333 }
334 assertEqualInt(ARCHIVE_OK,
335 archive_write_open_memory(a, buff, sizeof(buff), &used));
336
337 entry = archive_entry_new2(a);
338 /* Set an EUC-JP filename. */
339 archive_entry_set_pathname(entry, "\xC9\xBD.txt");
340 /* Check the Unicode version. */
341 archive_entry_set_filetype(entry, AE_IFREG);
342 archive_entry_set_size(entry, 0);
343 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
344 archive_entry_free(entry);
345 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
346
347 /* Check CP932 version. */
348 assertEqualMem(buff, "\x95\x5C.txt", 6);
349 }
350
DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8)351 DEFINE_TEST(test_gnutar_filename_encoding_CP932_UTF8)
352 {
353 struct archive *a;
354 struct archive_entry *entry;
355 char buff[4096];
356 size_t used;
357
358 if (NULL == setlocale(LC_ALL, "Japanese_Japan") &&
359 NULL == setlocale(LC_ALL, "ja_JP.SJIS")) {
360 skipping("CP932/SJIS locale not available on this system.");
361 return;
362 }
363
364 /*
365 * Verify that CP932/SJIS filenames are correctly translated to UTF-8.
366 */
367 a = archive_write_new();
368 assertEqualInt(ARCHIVE_OK, archive_write_set_format_gnutar(a));
369 if (archive_write_set_options(a, "hdrcharset=UTF-8") != ARCHIVE_OK) {
370 skipping("This system cannot convert character-set"
371 " from CP932/SJIS to UTF-8.");
372 archive_write_free(a);
373 return;
374 }
375 assertEqualInt(ARCHIVE_OK,
376 archive_write_open_memory(a, buff, sizeof(buff), &used));
377
378 entry = archive_entry_new2(a);
379 /* Set an CP932/SJIS filename. */
380 archive_entry_set_pathname(entry, "\x95\x5C.txt");
381 /* Check the Unicode version. */
382 archive_entry_set_filetype(entry, AE_IFREG);
383 archive_entry_set_size(entry, 0);
384 assertEqualInt(ARCHIVE_OK, archive_write_header(a, entry));
385 archive_entry_free(entry);
386 assertEqualInt(ARCHIVE_OK, archive_write_free(a));
387
388 /* Check UTF-8 version. */
389 assertEqualMem(buff, "\xE8\xA1\xA8.txt", 7);
390 }
391
392