1 /*
2  * ====================================================================
3  *    Licensed to the Apache Software Foundation (ASF) under one
4  *    or more contributor license agreements.  See the NOTICE file
5  *    distributed with this work for additional information
6  *    regarding copyright ownership.  The ASF licenses this file
7  *    to you under the Apache License, Version 2.0 (the
8  *    "License"); you may not use this file except in compliance
9  *    with the License.  You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  *    Unless required by applicable law or agreed to in writing,
14  *    software distributed under the License is distributed on an
15  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  *    KIND, either express or implied.  See the License for the
17  *    specific language governing permissions and limitations
18  *    under the License.
19  * ====================================================================
20  */
21 
22 #include <boost/test/unit_test.hpp>
23 
24 #include <codecvt>
25 #include <cstdint>
26 #include <locale>
27 #include <random>
28 #include <stdexcept>
29 #include <string>
30 #include <vector>
31 
32 #include "private/svn_utf_private.h"
33 #include "../src/aprwrap.hpp"
34 
35 namespace {
to_utf8(const std::u32string & str)36 std::string to_utf8(const std::u32string& str)
37 {
38   static const int32_t endiancheck = 0xa5cbbc5a;
39   static const bool arch_big_endian =
40     (reinterpret_cast<const char*>(&endiancheck)[sizeof(endiancheck) - 1] == '\x5a');
41 
42   apr::pool scratch_pool;
43   const svn_string_t* utf8_string;
44 
45   auto err = svn_utf__utf32_to_utf8(
46       &utf8_string,
47       reinterpret_cast<const apr_int32_t*>(str.c_str()),
48       str.size(), arch_big_endian, scratch_pool.get(), scratch_pool.get());
49   if (err)
50     {
51       svn_error_clear(err);
52       throw std::range_error("bad unicode code point");
53     }
54   return std::string(utf8_string->data, utf8_string->len);
55 }
56 
57 template<typename C> struct codepoint;
58 template<> struct codepoint<void>
59 {
60   using src_type = char32_t;
61   static constexpr std::uint_least32_t min = 0;
62   static constexpr std::uint_least32_t max = 0x10ffff;
63   static constexpr std::uint_least32_t surrogate_min = 0xd800;
64   static constexpr std::uint_least32_t surrogate_max = 0xdfff;
65 };
66 
67 template<> struct codepoint<char32_t> : public codepoint<void>
68 {
69   using dst_type = char32_t;
convert__anone7fef7620111::codepoint70   static std::u32string convert(const std::u32string& str)
71     {
72       return str;
73     };
74 };
75 
76 template<> struct codepoint<char16_t> : public codepoint<void>
77 {
78   using dst_type = char16_t;
convert__anone7fef7620111::codepoint79   static std::u16string convert(const std::u32string& str)
80     {
81       std::wstring_convert<std::codecvt_utf8_utf16<dst_type>, dst_type> u;
82       return u.from_bytes(to_utf8(str));
83     }
84 };
85 
86 template<> struct codepoint<wchar_t> : public codepoint<void>
87 {
88   using dst_type = wchar_t;
89 
90 #ifdef WIN32
91   // Be conservative, use UCS-2 for wchar_t on Windows
92   static_assert(sizeof(wchar_t) == sizeof(char16_t),
93                 "I thought we had 2-byte wide chars on Windows");
94   static constexpr std::uint_least32_t max = 0xffff;
95 #endif
96 
convert__anone7fef7620111::codepoint97   static std::wstring convert(const std::u32string& str)
98     {
99 #ifdef WIN32
100       const auto from_utf8 =
101         [](const std::string& sstr)
102           {
103             apr::pool scratch_pool;
104             const wchar_t* result;
105             auto err = svn_utf__win32_utf8_to_utf16(
106                 &result, sstr.c_str(), nullptr, scratch_pool.get());
107             if (err)
108               {
109                 svn_error_clear(err);
110                 throw std::range_error("bad conversion to utf16");
111               }
112             return std::wstring(result);
113           }
114 #else
115       std::wstring_convert<std::codecvt_utf8<dst_type>, dst_type> u;
116       const auto from_utf8 = [&u](const std::string& sstr)
117                                {
118                                  return u.from_bytes(sstr);
119                                };
120 #endif
121       return from_utf8(to_utf8(str));
122     }
123 };
124 
125 // Generate random strings.
126 template<typename C>
generate_string_data(int count)127 inline std::vector<std::basic_string<C>> generate_string_data(int count)
128 {
129   using cp = codepoint<C>;
130   std::mt19937 mt{std::random_device()()};
131   std::uniform_int_distribution<> cgen{typename cp::src_type(cp::min),
132                                        typename cp::src_type(cp::max)};
133   std::uniform_int_distribution<> lgen{7U, 31U};
134 
135   std::vector<std::basic_string<C>> result;
136   result.reserve(count);
137 
138   for (int i = 0; i < count; ++i)
139     {
140       const unsigned len = lgen(mt);
141 
142       std::u32string val;
143       val.reserve(len);
144 
145       for (unsigned j = 0; j < len; ++j)
146         {
147         repeat:
148           auto c = cgen(mt);
149           if (uint_least32_t(c) >= cp::surrogate_min
150               && uint_least32_t(c) <= cp::surrogate_max)
151             goto repeat;
152           val.push_back(c);
153         }
154       result.emplace_back(cp::convert(val));
155     }
156   return result;
157 }
158 } // anonymous namespace
159 
160 
161 #include "../src/private/strings_private.hpp"
162 
163 #include "fixture_init.hpp"
164 
165 namespace svn = ::apache::subversion::svnxx;
166 namespace impl = ::apache::subversion::svnxx::impl;
167 
168 BOOST_AUTO_TEST_SUITE(strings,
169                       * boost::unit_test::fixture<init>());
170 
BOOST_AUTO_TEST_CASE(wstring_conversion_roundtrip)171 BOOST_AUTO_TEST_CASE(wstring_conversion_roundtrip)
172 {
173   for (const auto& sample : generate_string_data<wchar_t>(100))
174     BOOST_TEST((sample == impl::convert<wchar_t>(impl::convert(sample))));
175 }
176 
BOOST_AUTO_TEST_CASE(u16string_conversion_roundtrip)177 BOOST_AUTO_TEST_CASE(u16string_conversion_roundtrip)
178 {
179   for (const auto& sample : generate_string_data<char16_t>(100))
180     BOOST_TEST((sample == impl::convert<char16_t>(impl::convert(sample))));
181 }
182 
BOOST_AUTO_TEST_CASE(u32string_conversion_roundtrip)183 BOOST_AUTO_TEST_CASE(u32string_conversion_roundtrip)
184 {
185   for (const auto& sample : generate_string_data<char32_t>(100))
186     BOOST_TEST((sample == impl::convert<char32_t>(impl::convert(sample))));
187 }
188 
BOOST_AUTO_TEST_CASE(nulchar)189 BOOST_AUTO_TEST_CASE(nulchar)
190 {
191   const std::string nulstr("\0", 1);
192   const std::wstring wnulstr(L"\0", 1);
193   const std::u16string u16nulstr(u"\0", 1);
194   const std::u32string u32nulstr(U"\0", 1);
195 
196   BOOST_TEST(nulstr.size() == 1);
197   BOOST_TEST(wnulstr.size() == 1);
198   BOOST_TEST(u16nulstr.size() == 1);
199   BOOST_TEST(u32nulstr.size() == 1);
200 
201   BOOST_TEST(impl::convert<wchar_t>(nulstr).size() == 1);
202   BOOST_TEST(impl::convert<char16_t>(nulstr).size() == 1);
203   BOOST_TEST(impl::convert<char32_t>(nulstr).size() == 1);
204 
205   BOOST_TEST((impl::convert<wchar_t>(nulstr) == wnulstr));
206   BOOST_TEST((impl::convert<char16_t>(nulstr) == u16nulstr));
207   BOOST_TEST((impl::convert<char32_t>(nulstr) == u32nulstr));
208 
209   BOOST_TEST(impl::convert(wnulstr).size() == 1);
210   BOOST_TEST(impl::convert(u16nulstr).size() == 1);
211   BOOST_TEST(impl::convert(u32nulstr).size() == 1);
212 
213   BOOST_TEST((impl::convert(wnulstr) == nulstr));
214   BOOST_TEST((impl::convert(u16nulstr) == nulstr));
215   BOOST_TEST((impl::convert(u32nulstr) == nulstr));
216 }
217 
218 BOOST_AUTO_TEST_SUITE_END();
219