1 // Copyright (c) 2010, Thomas Goyne <plorkyeran@aegisub.org>
2 //
3 // Permission to use, copy, modify, and distribute this software for any
4 // purpose with or without fee is hereby granted, provided that the above
5 // copyright notice and this permission notice appear in all copies.
6 //
7 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10 // ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12 // ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13 // OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15 #include <libaegisub/charset_conv.h>
16
17 #include <main.h>
18
19 #include <cstdint>
20 #include <iconv.h>
21
22 using namespace agi::charset;
23
TEST(lagi_iconv,BasicSetup)24 TEST(lagi_iconv, BasicSetup) {
25 EXPECT_NO_THROW(IconvWrapper("UTF-8", "UTF-16LE"));
26 }
27
TEST(lagi_iconv,InvalidConversions)28 TEST(lagi_iconv, InvalidConversions) {
29 EXPECT_THROW(IconvWrapper("nonexistent charset", "UTF-16LE"), UnsupportedConversion);
30 EXPECT_THROW(IconvWrapper("UTF-16LE", "nonexistent charset"), UnsupportedConversion);
31 EXPECT_THROW(IconvWrapper("nonexistent charset", "nonexistent charset"), UnsupportedConversion);
32 }
33
TEST(lagi_iconv,StrLen1)34 TEST(lagi_iconv, StrLen1) {
35 IconvWrapper conv("UTF-8", "UTF-8", false);
36 for (int i = 0; i < 10; i++) {
37 std::string str(i, ' ');
38 ASSERT_EQ(i, conv.SrcStrLen(str.c_str()));
39 ASSERT_EQ(i, conv.DstStrLen(str.c_str()));
40 }
41 }
TEST(lagi_iconv,StrLen2)42 TEST(lagi_iconv, StrLen2) {
43 IconvWrapper conv("UTF-16LE", "UTF-16LE", false);
44 for (int i = 0; i < 10; i++) {
45 std::basic_string<int16_t> str(i, ' ');
46 ASSERT_EQ(2*i, conv.SrcStrLen((const char *)str.c_str()));
47 ASSERT_EQ(2*i, conv.DstStrLen((const char *)str.c_str()));
48 }
49 }
TEST(lagi_iconv,StrLen4)50 TEST(lagi_iconv, StrLen4) {
51 IconvWrapper conv("UTF-32LE", "UTF-32LE", false);
52 for (int i = 0; i < 10; i++) {
53 std::basic_string<int32_t> str(i, ' ');
54 ASSERT_EQ(4*i, conv.SrcStrLen((const char *)str.c_str()));
55 ASSERT_EQ(4*i, conv.DstStrLen((const char *)str.c_str()));
56 }
57 }
58
59 #ifdef _LIBICONV_VERSION
TEST(lagi_iconv,Fallbacks)60 TEST(lagi_iconv, Fallbacks) {
61 IconvWrapper nofallback("UTF-8", "Shift-JIS", false);
62 IconvWrapper fallback("UTF-8", "Shift-JIS", true);
63 IconvWrapper noneneeded("UTF-8", "UTF-16LE", false);
64
65 // Shift-JIS does not have a backslash
66 EXPECT_THROW(nofallback.Convert("\\"), BadOutput);
67 ASSERT_NO_THROW(fallback.Convert("\\"));
68 EXPECT_EQ("\\", fallback.Convert("\\"));
69 EXPECT_NO_THROW(noneneeded.Convert("\\"));
70
71 // BOM into non-unicode
72 char bom[] = "\xEF\xBB\xBF";
73 EXPECT_THROW(nofallback.Convert(bom), BadOutput);
74 ASSERT_NO_THROW(fallback.Convert(bom));
75 EXPECT_EQ("", fallback.Convert(bom));
76 EXPECT_NO_THROW(noneneeded.Convert(bom));
77
78 // A snowman (U+2603)
79 char snowman[] = "\xE2\x98\x83";
80 EXPECT_THROW(nofallback.Convert(snowman), BadOutput);
81 EXPECT_NO_THROW(noneneeded.Convert(snowman));
82 ASSERT_NO_THROW(fallback.Convert(snowman));
83 EXPECT_EQ("?", fallback.Convert(snowman));
84 }
85
TEST(lagi_iconv,BadInput)86 TEST(lagi_iconv, BadInput) {
87 IconvWrapper utf16("UTF-16LE", "UTF-8");
88 EXPECT_THROW(utf16.Convert(" "), BadInput);
89 IconvWrapper utf8("UTF-8", "UTF-16LE");
90 EXPECT_THROW(utf8.Convert("\xE2\xFF"), BadInput);
91 }
92 #endif
93
TEST(lagi_iconv,Conversions)94 TEST(lagi_iconv, Conversions) {
95 IconvWrapper utf16le("UTF-16LE", "UTF-8", false);
96 IconvWrapper utf16be("UTF-16BE", "UTF-8", false);
97 IconvWrapper utf8("UTF-8", "UTF-16LE", false);
98
99 char space_utf8_[] = " ";
100 char space_utf16be_[] = {0, 32, 0, 0};
101 char space_utf16le_[] = {32, 0, 0, 0};
102 std::string space_utf8(space_utf8_);
103 std::string space_utf16be(space_utf16be_, 2);
104 std::string space_utf16le(space_utf16le_, 2);
105
106 EXPECT_EQ(space_utf8, utf16le.Convert(space_utf16le));
107 EXPECT_EQ(space_utf8, utf16be.Convert(space_utf16be));
108 EXPECT_EQ(space_utf16le, utf8.Convert(space_utf8));
109 }
110
111 // Basic overflow tests
TEST(lagi_iconv,Buffer)112 TEST(lagi_iconv, Buffer) {
113 IconvWrapper conv("UTF-8", "UTF-16LE", false);
114 char buff[32];
115 memset(buff, 0xFF, sizeof(buff));
116
117 EXPECT_THROW(conv.Convert("", 1, buff, 0), BufferTooSmall);
118 EXPECT_EQ('\xFF', buff[0]);
119 EXPECT_THROW(conv.Convert("", 1, buff, 1), BufferTooSmall);
120 EXPECT_EQ('\xFF', buff[0]);
121 EXPECT_NO_THROW(conv.Convert("", 1, buff, 2));
122 EXPECT_EQ('\0', buff[0]);
123 EXPECT_EQ('\0', buff[1]);
124 EXPECT_EQ('\xFF', buff[2]);
125 }
126
TEST(lagi_iconv,LocalSupport)127 TEST(lagi_iconv, LocalSupport) {
128 ASSERT_NO_THROW(IconvWrapper("UTF-8", ""));
129 IconvWrapper conv("UTF-8", "");
130 ASSERT_NO_THROW(conv.Convert(" "));
131 EXPECT_EQ(" ", conv.Convert(" "));
132 }
TEST(lagi_iconv,wchar_tSupport)133 TEST(lagi_iconv, wchar_tSupport) {
134 EXPECT_NO_THROW(IconvWrapper("UTF-8", "wchar_t"));
135 }
136
TEST(lagi_iconv,Roundtrip)137 TEST(lagi_iconv, Roundtrip) {
138 for (auto const& name : GetEncodingsList<std::vector<std::string>>()) {
139 ASSERT_NO_THROW(IconvWrapper("utf-8", name.c_str()));
140 ASSERT_NO_THROW(IconvWrapper(name.c_str(), "utf-8"));
141 EXPECT_EQ(
142 "Jackdaws love my big sphinx of quartz",
143 IconvWrapper(name.c_str(), "utf-8").Convert(
144 IconvWrapper("utf-8", name.c_str()).Convert(
145 "Jackdaws love my big sphinx of quartz")));
146 }
147 }
148
TEST(lagi_iconv,Iso6937)149 TEST(lagi_iconv, Iso6937) {
150 ASSERT_NO_THROW(IconvWrapper("UTF-8", "ISO-6937-2"));
151 IconvWrapper subst("UTF-8", "ISO-6937-2");
152 IconvWrapper no_subst("UTF-8", "ISO-6937-2", false);
153
154 // 7-bit is same as ISO-8859
155 for (int i = 0; i < 128; ++i) {
156 const char buf[] = { (char)i, 0 };
157 std::string ret;
158 EXPECT_NO_THROW(ret = subst.Convert(buf));
159 EXPECT_STREQ(buf, ret.c_str());
160 }
161
162 std::string ret;
163
164 // LATIN CAPITAL LETTER D WITH CARON (U+010E) - multibyte char in main block
165 EXPECT_NO_THROW(ret = subst.Convert("\xC4\x8E"));
166 EXPECT_STREQ("\xCF\x44", ret.c_str());
167
168 // BREVE - multibyte char in extended ranges
169 EXPECT_NO_THROW(ret = subst.Convert("\xCB\x98"));
170 EXPECT_STREQ("\xC6\x20", ret.c_str());
171
172 // EM DASH - single byte char in extended ranges
173 EXPECT_NO_THROW(ret = subst.Convert("\xE2\x80\x94"));
174 EXPECT_STREQ("\xD0", ret.c_str());
175
176 // codepoint not in ISO-6937-2
177 EXPECT_NO_THROW(ret = subst.Convert("\xCB\x97"));
178 EXPECT_STREQ("?", ret.c_str());
179 EXPECT_THROW(no_subst.Convert("\xCB\x97"), agi::charset::BadOutput);
180 }
181