1 /*
2 * ====================================================================
3 * Licensed to the Apache Software Foundation (ASF) under one
4 * or more contributor license agreements. See the NOTICE file
5 * distributed with this work for additional information
6 * regarding copyright ownership. The ASF licenses this file
7 * to you under the Apache License, Version 2.0 (the
8 * "License"); you may not use this file except in compliance
9 * with the License. You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing,
14 * software distributed under the License is distributed on an
15 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16 * KIND, either express or implied. See the License for the
17 * specific language governing permissions and limitations
18 * under the License.
19 * ====================================================================
20 */
21
22 #include <boost/test/unit_test.hpp>
23
24 #include <codecvt>
25 #include <cstdint>
26 #include <locale>
27 #include <random>
28 #include <stdexcept>
29 #include <string>
30 #include <vector>
31
32 #include "private/svn_utf_private.h"
33 #include "../src/aprwrap.hpp"
34
35 namespace {
to_utf8(const std::u32string & str)36 std::string to_utf8(const std::u32string& str)
37 {
38 static const int32_t endiancheck = 0xa5cbbc5a;
39 static const bool arch_big_endian =
40 (reinterpret_cast<const char*>(&endiancheck)[sizeof(endiancheck) - 1] == '\x5a');
41
42 apr::pool scratch_pool;
43 const svn_string_t* utf8_string;
44
45 auto err = svn_utf__utf32_to_utf8(
46 &utf8_string,
47 reinterpret_cast<const apr_int32_t*>(str.c_str()),
48 str.size(), arch_big_endian, scratch_pool.get(), scratch_pool.get());
49 if (err)
50 {
51 svn_error_clear(err);
52 throw std::range_error("bad unicode code point");
53 }
54 return std::string(utf8_string->data, utf8_string->len);
55 }
56
57 template<typename C> struct codepoint;
58 template<> struct codepoint<void>
59 {
60 using src_type = char32_t;
61 static constexpr std::uint_least32_t min = 0;
62 static constexpr std::uint_least32_t max = 0x10ffff;
63 static constexpr std::uint_least32_t surrogate_min = 0xd800;
64 static constexpr std::uint_least32_t surrogate_max = 0xdfff;
65 };
66
67 template<> struct codepoint<char32_t> : public codepoint<void>
68 {
69 using dst_type = char32_t;
convert__anone7fef7620111::codepoint70 static std::u32string convert(const std::u32string& str)
71 {
72 return str;
73 };
74 };
75
76 template<> struct codepoint<char16_t> : public codepoint<void>
77 {
78 using dst_type = char16_t;
convert__anone7fef7620111::codepoint79 static std::u16string convert(const std::u32string& str)
80 {
81 std::wstring_convert<std::codecvt_utf8_utf16<dst_type>, dst_type> u;
82 return u.from_bytes(to_utf8(str));
83 }
84 };
85
86 template<> struct codepoint<wchar_t> : public codepoint<void>
87 {
88 using dst_type = wchar_t;
89
90 #ifdef WIN32
91 // Be conservative, use UCS-2 for wchar_t on Windows
92 static_assert(sizeof(wchar_t) == sizeof(char16_t),
93 "I thought we had 2-byte wide chars on Windows");
94 static constexpr std::uint_least32_t max = 0xffff;
95 #endif
96
convert__anone7fef7620111::codepoint97 static std::wstring convert(const std::u32string& str)
98 {
99 #ifdef WIN32
100 const auto from_utf8 =
101 [](const std::string& sstr)
102 {
103 apr::pool scratch_pool;
104 const wchar_t* result;
105 auto err = svn_utf__win32_utf8_to_utf16(
106 &result, sstr.c_str(), nullptr, scratch_pool.get());
107 if (err)
108 {
109 svn_error_clear(err);
110 throw std::range_error("bad conversion to utf16");
111 }
112 return std::wstring(result);
113 }
114 #else
115 std::wstring_convert<std::codecvt_utf8<dst_type>, dst_type> u;
116 const auto from_utf8 = [&u](const std::string& sstr)
117 {
118 return u.from_bytes(sstr);
119 };
120 #endif
121 return from_utf8(to_utf8(str));
122 }
123 };
124
125 // Generate random strings.
126 template<typename C>
generate_string_data(int count)127 inline std::vector<std::basic_string<C>> generate_string_data(int count)
128 {
129 using cp = codepoint<C>;
130 std::mt19937 mt{std::random_device()()};
131 std::uniform_int_distribution<> cgen{typename cp::src_type(cp::min),
132 typename cp::src_type(cp::max)};
133 std::uniform_int_distribution<> lgen{7U, 31U};
134
135 std::vector<std::basic_string<C>> result;
136 result.reserve(count);
137
138 for (int i = 0; i < count; ++i)
139 {
140 const unsigned len = lgen(mt);
141
142 std::u32string val;
143 val.reserve(len);
144
145 for (unsigned j = 0; j < len; ++j)
146 {
147 repeat:
148 auto c = cgen(mt);
149 if (uint_least32_t(c) >= cp::surrogate_min
150 && uint_least32_t(c) <= cp::surrogate_max)
151 goto repeat;
152 val.push_back(c);
153 }
154 result.emplace_back(cp::convert(val));
155 }
156 return result;
157 }
158 } // anonymous namespace
159
160
161 #include "../src/private/strings_private.hpp"
162
163 #include "fixture_init.hpp"
164
165 namespace svn = ::apache::subversion::svnxx;
166 namespace impl = ::apache::subversion::svnxx::impl;
167
168 BOOST_AUTO_TEST_SUITE(strings,
169 * boost::unit_test::fixture<init>());
170
BOOST_AUTO_TEST_CASE(wstring_conversion_roundtrip)171 BOOST_AUTO_TEST_CASE(wstring_conversion_roundtrip)
172 {
173 for (const auto& sample : generate_string_data<wchar_t>(100))
174 BOOST_TEST((sample == impl::convert<wchar_t>(impl::convert(sample))));
175 }
176
BOOST_AUTO_TEST_CASE(u16string_conversion_roundtrip)177 BOOST_AUTO_TEST_CASE(u16string_conversion_roundtrip)
178 {
179 for (const auto& sample : generate_string_data<char16_t>(100))
180 BOOST_TEST((sample == impl::convert<char16_t>(impl::convert(sample))));
181 }
182
BOOST_AUTO_TEST_CASE(u32string_conversion_roundtrip)183 BOOST_AUTO_TEST_CASE(u32string_conversion_roundtrip)
184 {
185 for (const auto& sample : generate_string_data<char32_t>(100))
186 BOOST_TEST((sample == impl::convert<char32_t>(impl::convert(sample))));
187 }
188
BOOST_AUTO_TEST_CASE(nulchar)189 BOOST_AUTO_TEST_CASE(nulchar)
190 {
191 const std::string nulstr("\0", 1);
192 const std::wstring wnulstr(L"\0", 1);
193 const std::u16string u16nulstr(u"\0", 1);
194 const std::u32string u32nulstr(U"\0", 1);
195
196 BOOST_TEST(nulstr.size() == 1);
197 BOOST_TEST(wnulstr.size() == 1);
198 BOOST_TEST(u16nulstr.size() == 1);
199 BOOST_TEST(u32nulstr.size() == 1);
200
201 BOOST_TEST(impl::convert<wchar_t>(nulstr).size() == 1);
202 BOOST_TEST(impl::convert<char16_t>(nulstr).size() == 1);
203 BOOST_TEST(impl::convert<char32_t>(nulstr).size() == 1);
204
205 BOOST_TEST((impl::convert<wchar_t>(nulstr) == wnulstr));
206 BOOST_TEST((impl::convert<char16_t>(nulstr) == u16nulstr));
207 BOOST_TEST((impl::convert<char32_t>(nulstr) == u32nulstr));
208
209 BOOST_TEST(impl::convert(wnulstr).size() == 1);
210 BOOST_TEST(impl::convert(u16nulstr).size() == 1);
211 BOOST_TEST(impl::convert(u32nulstr).size() == 1);
212
213 BOOST_TEST((impl::convert(wnulstr) == nulstr));
214 BOOST_TEST((impl::convert(u16nulstr) == nulstr));
215 BOOST_TEST((impl::convert(u32nulstr) == nulstr));
216 }
217
218 BOOST_AUTO_TEST_SUITE_END();
219