1 /* utf8converttest.cc: test convert_to_utf8()
2  *
3  * Copyright (C) 2008,2009 Olly Betts
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation; either version 2 of the
8  * License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
18  * USA
19  */
20 
21 #include <config.h>
22 
23 #include <cstdlib>
24 #include <iostream>
25 #include <string>
26 
27 #include "utf8convert.h"
28 
29 using namespace std;
30 
31 struct testcase {
32     const char * charset;
33     const char * dump;
34     size_t len;
35     const char * utf8;
36 };
37 
38 static const testcase tests[] = {
39     { "utf8", "Hello world", 0, "Hello world" },
40     { "iso-8859-1", "Hello world", 0, "Hello world" },
41     { "us-ascii", "Hello world", 0, "Hello world" },
42     { "iso-8859-1", "Hello\xa0world", 0, "Hello\xc2\xa0world" },
43     { "ISO-8859-1", "Hello\xa0world", 0, "Hello\xc2\xa0world" },
44     { "ISO8859-1", "Hello\xa0world", 0, "Hello\xc2\xa0world" },
45 #if !defined USE_ICONV || defined __GNU_LIBRARY__
46     // "8859_1" is not understood by Solaris iconv, for example.
47     { "8859_1", "Hello\xa0world", 0, "Hello\xc2\xa0world" },
48 #endif
49     { "UTF16BE", "\0T\0e\0s\0t", 8, "Test" },
50     { "UTF16", "\xfe\xff\0T\0e\0s\0t", 10, "Test" },
51     { "UTF_16BE", "\0T\0e\0s\0t", 8, "Test" },
52     { "UTF 16BE", "\0T\0e\0s\0t", 8, "Test" },
53     { "UTF16LE", "T\0e\0s\0t\0", 8, "Test" },
54     { "UTF16", "\xff\xfeT\0e\0s\0t\0", 10, "Test" },
55     { "UCS-2BE", "\0T\0e\0s\0t", 8, "Test" },
56     { "UCS2BE", "\0T\0e\0s\0t", 8, "Test" },
57     { "UCS_2BE", "\0T\0e\0s\0t", 8, "Test" },
58     { "UCS 2BE", "\0T\0e\0s\0t", 8, "Test" },
59     { "UCS-2LE", "T\0e\0s\0t\0", 8, "Test" },
60     { "UCS2LE", "T\0e\0s\0t\0", 8, "Test" },
61     { "UTF16BE", "\xdb\xff\xdf\xfd", 0, "\xf4\x8f\xbf\xbd" },
62     { "UTF16", "\xfe\xff\xdb\xff\xdf\xfd", 0, "\xf4\x8f\xbf\xbd" },
63     { "UTF-16", "\xfe\xff\xdb\xff\xdf\xfd", 0, "\xf4\x8f\xbf\xbd" },
64     { "UTF16LE", "\xff\xdb\xfd\xdf", 0, "\xf4\x8f\xbf\xbd" },
65     { "UTF16", "\xff\xfe\xff\xdb\xfd\xdf", 0, "\xf4\x8f\xbf\xbd" },
66 // GNU libiconv doesn't seem to handle these as expected:
67 #ifndef USE_ICONV
68     { "UCS-2", "\xfe\xff\0T\0e\0s\0t", 10, "Test" },
69     { "UCS-2", "\xff\xfeT\0e\0s\0t\0", 10, "Test" },
70     { "UCS2", "\xfe\xff\0T\0e\0s\0t", 10, "Test" },
71     { "UCS2", "\xff\xfeT\0e\0s\0t\0", 10, "Test" },
72     // If there's no BOM, we're supposed to assume BE.
73     { "UTF16", "\xdb\xff\xdf\xfd", 0, "\xf4\x8f\xbf\xbd" },
74 #endif
75     { 0, 0, 0, 0 }
76 };
77 
78 int
main()79 main()
80 {
81     for (size_t i = 0; tests[i].charset; ++i) {
82 	size_t len = tests[i].len;
83 	string dump;
84 	if (len) {
85 	    dump.assign(tests[i].dump, len);
86 	} else {
87 	    dump.assign(tests[i].dump);
88 	}
89 	convert_to_utf8(dump, tests[i].charset);
90 	if (tests[i].utf8 != dump) {
91 	    cout << "Converting from " << tests[i].charset << "\n"
92 		    "Expected [" << tests[i].utf8 << "]\n"
93 		    "Got      [" << dump << "]" << endl;
94 	    exit(1);
95 	}
96     }
97 }
98