1 // Copyright 2020, 2021 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com)
2 //
3 // This file is part of the heyoka library.
4 //
5 // This Source Code Form is subject to the terms of the Mozilla
6 // Public License v. 2.0. If a copy of the MPL was not distributed
7 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
9 #include <heyoka/config.hpp>
10
11 #include <cassert>
12 #include <cstdint>
13 #include <initializer_list>
14 #include <stdexcept>
15 #include <string>
16 #include <type_traits>
17 #include <unordered_map>
18 #include <utility>
19 #include <variant>
20 #include <vector>
21
22 #include <boost/numeric/conversion/cast.hpp>
23
24 #include <fmt/format.h>
25
26 #include <llvm/IR/Attributes.h>
27 #include <llvm/IR/BasicBlock.h>
28 #include <llvm/IR/DerivedTypes.h>
29 #include <llvm/IR/Function.h>
30 #include <llvm/IR/IRBuilder.h>
31 #include <llvm/IR/LLVMContext.h>
32 #include <llvm/IR/Module.h>
33 #include <llvm/IR/Type.h>
34 #include <llvm/IR/Value.h>
35 #include <llvm/Support/Casting.h>
36
37 #if defined(HEYOKA_HAVE_REAL128)
38
39 #include <mp++/real128.hpp>
40
41 #endif
42
43 #include <heyoka/detail/llvm_helpers.hpp>
44 #include <heyoka/detail/llvm_vector_type.hpp>
45 #include <heyoka/detail/sleef.hpp>
46 #include <heyoka/detail/string_conv.hpp>
47 #include <heyoka/detail/taylor_common.hpp>
48 #include <heyoka/expression.hpp>
49 #include <heyoka/func.hpp>
50 #include <heyoka/llvm_state.hpp>
51 #include <heyoka/math/square.hpp>
52 #include <heyoka/math/tanh.hpp>
53 #include <heyoka/number.hpp>
54 #include <heyoka/s11n.hpp>
55 #include <heyoka/taylor.hpp>
56 #include <heyoka/variable.hpp>
57
58 #if defined(_MSC_VER) && !defined(__clang__)
59
60 // NOTE: MSVC has issues with the other "using"
61 // statement form.
62 using namespace fmt::literals;
63
64 #else
65
66 using fmt::literals::operator""_format;
67
68 #endif
69
70 namespace heyoka
71 {
72
73 namespace detail
74 {
75
tanh_impl(expression e)76 tanh_impl::tanh_impl(expression e) : func_base("tanh", std::vector{std::move(e)}) {}
77
tanh_impl()78 tanh_impl::tanh_impl() : tanh_impl(0_dbl) {}
79
gradient() const80 std::vector<expression> tanh_impl::gradient() const
81 {
82 assert(args().size() == 1u);
83 return {1_dbl - square(tanh(args()[0]))};
84 }
85
codegen_dbl(llvm_state & s,const std::vector<llvm::Value * > & args) const86 llvm::Value *tanh_impl::codegen_dbl(llvm_state &s, const std::vector<llvm::Value *> &args) const
87 {
88 assert(args.size() == 1u);
89 assert(args[0] != nullptr);
90
91 if (auto vec_t = llvm::dyn_cast<llvm_vector_type>(args[0]->getType())) {
92 if (const auto sfn = sleef_function_name(s.context(), "tanh", vec_t->getElementType(),
93 boost::numeric_cast<std::uint32_t>(vec_t->getNumElements()));
94 !sfn.empty()) {
95 return llvm_invoke_external(
96 s, sfn, vec_t, args,
97 // NOTE: in theory we may add ReadNone here as well,
98 // but for some reason, at least up to LLVM 10,
99 // this causes strange codegen issues. Revisit
100 // in the future.
101 {llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn});
102 }
103 }
104
105 return call_extern_vec(s, args[0], "tanh");
106 }
107
codegen_ldbl(llvm_state & s,const std::vector<llvm::Value * > & args) const108 llvm::Value *tanh_impl::codegen_ldbl(llvm_state &s, const std::vector<llvm::Value *> &args) const
109 {
110 assert(args.size() == 1u);
111 assert(args[0] != nullptr);
112
113 return call_extern_vec(s, args[0],
114 #if defined(_MSC_VER)
115 // NOTE: it seems like the MSVC stdlib does not have a tanhl function,
116 // because LLVM complains about the symbol "tanhl" not being
117 // defined. Hence, use our own wrapper instead.
118 "heyoka_tanhl"
119 #else
120 "tanhl"
121 #endif
122 );
123 }
124
125 #if defined(HEYOKA_HAVE_REAL128)
126
codegen_f128(llvm_state & s,const std::vector<llvm::Value * > & args) const127 llvm::Value *tanh_impl::codegen_f128(llvm_state &s, const std::vector<llvm::Value *> &args) const
128 {
129 assert(args.size() == 1u);
130 assert(args[0] != nullptr);
131
132 return call_extern_vec(s, args[0], "tanhq");
133 }
134
135 #endif
136
taylor_decompose(taylor_dc_t & u_vars_defs)137 taylor_dc_t::size_type tanh_impl::taylor_decompose(taylor_dc_t &u_vars_defs) &&
138 {
139 assert(args().size() == 1u);
140
141 // Append the tanh decomposition.
142 u_vars_defs.emplace_back(func{std::move(*this)}, std::vector<std::uint32_t>{});
143
144 // Append the auxiliary function tanh(arg) * tanh(arg).
145 u_vars_defs.emplace_back(square(expression{"u_{}"_format(u_vars_defs.size() - 1u)}), std::vector<std::uint32_t>{});
146
147 // Add the hidden dep.
148 (u_vars_defs.end() - 2)->second.push_back(boost::numeric_cast<std::uint32_t>(u_vars_defs.size() - 1u));
149
150 return u_vars_defs.size() - 2u;
151 }
152
153 namespace
154 {
155
156 // Derivative of tanh(number).
157 template <typename T, typename U, std::enable_if_t<is_num_param_v<U>, int> = 0>
taylor_diff_tanh_impl(llvm_state & s,const tanh_impl & f,const std::vector<std::uint32_t> &,const U & num,const std::vector<llvm::Value * > &,llvm::Value * par_ptr,std::uint32_t,std::uint32_t order,std::uint32_t,std::uint32_t batch_size)158 llvm::Value *taylor_diff_tanh_impl(llvm_state &s, const tanh_impl &f, const std::vector<std::uint32_t> &, const U &num,
159 const std::vector<llvm::Value *> &, llvm::Value *par_ptr, std::uint32_t,
160 std::uint32_t order, std::uint32_t, std::uint32_t batch_size)
161 {
162 if (order == 0u) {
163 return codegen_from_values<T>(s, f, {taylor_codegen_numparam<T>(s, num, par_ptr, batch_size)});
164 } else {
165 return vector_splat(s.builder(), codegen<T>(s, number{0.}), batch_size);
166 }
167 }
168
169 template <typename T>
taylor_diff_tanh_impl(llvm_state & s,const tanh_impl & f,const std::vector<std::uint32_t> & deps,const variable & var,const std::vector<llvm::Value * > & arr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t,std::uint32_t batch_size)170 llvm::Value *taylor_diff_tanh_impl(llvm_state &s, const tanh_impl &f, const std::vector<std::uint32_t> &deps,
171 const variable &var, const std::vector<llvm::Value *> &arr, llvm::Value *,
172 std::uint32_t n_uvars, std::uint32_t order, std::uint32_t, std::uint32_t batch_size)
173 {
174 auto &builder = s.builder();
175
176 // Fetch the index of the variable.
177 const auto b_idx = uname_to_index(var.name());
178
179 if (order == 0u) {
180 return codegen_from_values<T>(s, f, {taylor_fetch_diff(arr, b_idx, 0, n_uvars)});
181 }
182
183 // NOTE: iteration in the [1, order] range.
184 std::vector<llvm::Value *> sum;
185 for (std::uint32_t j = 1; j <= order; ++j) {
186 // NOTE: the only hidden dependency contains the index of the
187 // u variable whose definition is tanh(var) * tanh(var).
188 auto bj = taylor_fetch_diff(arr, b_idx, j, n_uvars);
189 auto cnj = taylor_fetch_diff(arr, deps[0], order - j, n_uvars);
190
191 auto fac = vector_splat(builder, codegen<T>(s, number(static_cast<T>(j))), batch_size);
192
193 // Add j*cnj*bj to the sum.
194 sum.push_back(builder.CreateFMul(fac, builder.CreateFMul(cnj, bj)));
195 }
196
197 // Init the return value as the result of the sum.
198 auto ret_acc = pairwise_sum(builder, sum);
199
200 // Divide by order.
201 ret_acc
202 = builder.CreateFDiv(ret_acc, vector_splat(builder, codegen<T>(s, number(static_cast<T>(order))), batch_size));
203
204 // Create and return the result.
205 return builder.CreateFSub(taylor_fetch_diff(arr, b_idx, order, n_uvars), ret_acc);
206 }
207
208 // All the other cases.
209 template <typename T, typename U, std::enable_if_t<!is_num_param_v<U>, int> = 0>
taylor_diff_tanh_impl(llvm_state &,const tanh_impl &,const std::vector<std::uint32_t> &,const U &,const std::vector<llvm::Value * > &,llvm::Value *,std::uint32_t,std::uint32_t,std::uint32_t,std::uint32_t)210 llvm::Value *taylor_diff_tanh_impl(llvm_state &, const tanh_impl &, const std::vector<std::uint32_t> &, const U &,
211 const std::vector<llvm::Value *> &, llvm::Value *, std::uint32_t, std::uint32_t,
212 std::uint32_t, std::uint32_t)
213 {
214 throw std::invalid_argument(
215 "An invalid argument type was encountered while trying to build the Taylor derivative of a hyperbolic tangent");
216 }
217
218 template <typename T>
taylor_diff_tanh(llvm_state & s,const tanh_impl & f,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size)219 llvm::Value *taylor_diff_tanh(llvm_state &s, const tanh_impl &f, const std::vector<std::uint32_t> &deps,
220 const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, std::uint32_t n_uvars,
221 std::uint32_t order, std::uint32_t idx, std::uint32_t batch_size)
222 {
223 assert(f.args().size() == 1u);
224
225 if (deps.size() != 1u) {
226 throw std::invalid_argument(
227 "A hidden dependency vector of size 1 is expected in order to compute the Taylor "
228 "derivative of the hyperbolic tangent, but a vector of size {} was passed instead"_format(deps.size()));
229 }
230
231 return std::visit(
232 [&](const auto &v) {
233 return taylor_diff_tanh_impl<T>(s, f, deps, v, arr, par_ptr, n_uvars, order, idx, batch_size);
234 },
235 f.args()[0].value());
236 }
237
238 } // namespace
239
taylor_diff_dbl(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const240 llvm::Value *tanh_impl::taylor_diff_dbl(llvm_state &s, const std::vector<std::uint32_t> &deps,
241 const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
242 std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
243 std::uint32_t batch_size, bool) const
244 {
245 return taylor_diff_tanh<double>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
246 }
247
taylor_diff_ldbl(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const248 llvm::Value *tanh_impl::taylor_diff_ldbl(llvm_state &s, const std::vector<std::uint32_t> &deps,
249 const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
250 std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
251 std::uint32_t batch_size, bool) const
252 {
253 return taylor_diff_tanh<long double>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
254 }
255
256 #if defined(HEYOKA_HAVE_REAL128)
257
taylor_diff_f128(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const258 llvm::Value *tanh_impl::taylor_diff_f128(llvm_state &s, const std::vector<std::uint32_t> &deps,
259 const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
260 std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
261 std::uint32_t batch_size, bool) const
262 {
263 return taylor_diff_tanh<mppp::real128>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
264 }
265
266 #endif
267
268 namespace
269 {
270
271 // Derivative of tanh(number).
272 template <typename T, typename U, std::enable_if_t<is_num_param_v<U>, int> = 0>
taylor_c_diff_func_tanh_impl(llvm_state & s,const tanh_impl & fn,const U & num,std::uint32_t n_uvars,std::uint32_t batch_size)273 llvm::Function *taylor_c_diff_func_tanh_impl(llvm_state &s, const tanh_impl &fn, const U &num, std::uint32_t n_uvars,
274 std::uint32_t batch_size)
275 {
276 return taylor_c_diff_func_unary_num_det<T>(s, fn, num, n_uvars, batch_size, "tanh", 1);
277 }
278
279 // Derivative of tanh(variable).
280 template <typename T>
taylor_c_diff_func_tanh_impl(llvm_state & s,const tanh_impl & fn,const variable & var,std::uint32_t n_uvars,std::uint32_t batch_size)281 llvm::Function *taylor_c_diff_func_tanh_impl(llvm_state &s, const tanh_impl &fn, const variable &var,
282 std::uint32_t n_uvars, std::uint32_t batch_size)
283 {
284 auto &module = s.module();
285 auto &builder = s.builder();
286 auto &context = s.context();
287
288 // Fetch the floating-point type.
289 auto val_t = to_llvm_vector_type<T>(context, batch_size);
290
291 // Fetch the function name and arguments.
292 const auto na_pair = taylor_c_diff_func_name_args<T>(context, "tanh", n_uvars, batch_size, {var}, 1);
293 const auto &fname = na_pair.first;
294 const auto &fargs = na_pair.second;
295
296 // Try to see if we already created the function.
297 auto f = module.getFunction(fname);
298
299 if (f == nullptr) {
300 // The function was not created before, do it now.
301
302 // Fetch the current insertion block.
303 auto orig_bb = builder.GetInsertBlock();
304
305 // The return type is val_t.
306 auto *ft = llvm::FunctionType::get(val_t, fargs, false);
307 // Create the function
308 f = llvm::Function::Create(ft, llvm::Function::InternalLinkage, fname, &module);
309 assert(f != nullptr);
310
311 // Fetch the necessary function arguments.
312 auto ord = f->args().begin();
313 auto diff_ptr = f->args().begin() + 2;
314 auto b_idx = f->args().begin() + 5;
315 auto dep_idx = f->args().begin() + 6;
316
317 // Create a new basic block to start insertion into.
318 builder.SetInsertPoint(llvm::BasicBlock::Create(context, "entry", f));
319
320 // Create the return value.
321 auto retval = builder.CreateAlloca(val_t);
322
323 // Create the accumulator.
324 auto acc = builder.CreateAlloca(val_t);
325
326 llvm_if_then_else(
327 s, builder.CreateICmpEQ(ord, builder.getInt32(0)),
328 [&]() {
329 // For order 0, invoke the function on the order 0 of b_idx.
330 builder.CreateStore(codegen_from_values<T>(
331 s, fn, {taylor_c_load_diff(s, diff_ptr, n_uvars, builder.getInt32(0), b_idx)}),
332 retval);
333 },
334 [&]() {
335 // Init the accumulator.
336 builder.CreateStore(vector_splat(builder, codegen<T>(s, number{0.}), batch_size), acc);
337
338 // Run the loop.
339 llvm_loop_u32(s, builder.getInt32(1), builder.CreateAdd(ord, builder.getInt32(1)), [&](llvm::Value *j) {
340 auto bj = taylor_c_load_diff(s, diff_ptr, n_uvars, j, b_idx);
341 auto cnj = taylor_c_load_diff(s, diff_ptr, n_uvars, builder.CreateSub(ord, j), dep_idx);
342
343 auto fac = vector_splat(builder, builder.CreateUIToFP(j, to_llvm_type<T>(context)), batch_size);
344
345 builder.CreateStore(builder.CreateFAdd(builder.CreateLoad(acc),
346 builder.CreateFMul(fac, builder.CreateFMul(cnj, bj))),
347 acc);
348 });
349
350 // Divide by the order and subtract from b^[n] to produce the return value.
351 auto ord_v = vector_splat(builder, builder.CreateUIToFP(ord, to_llvm_type<T>(context)), batch_size);
352
353 builder.CreateStore(builder.CreateFSub(taylor_c_load_diff(s, diff_ptr, n_uvars, ord, b_idx),
354 builder.CreateFDiv(builder.CreateLoad(acc), ord_v)),
355 retval);
356 });
357
358 // Return the result.
359 builder.CreateRet(builder.CreateLoad(retval));
360
361 // Verify.
362 s.verify_function(f);
363
364 // Restore the original insertion block.
365 builder.SetInsertPoint(orig_bb);
366 } else {
367 // The function was created before. Check if the signatures match.
368 // NOTE: there could be a mismatch if the derivative function was created
369 // and then optimised - optimisation might remove arguments which are compile-time
370 // constants.
371 if (!compare_function_signature(f, val_t, fargs)) {
372 throw std::invalid_argument("Inconsistent function signature for the Taylor derivative of the hyperbolic "
373 "tangent in compact mode detected");
374 }
375 }
376
377 return f;
378 }
379
380 // All the other cases.
381 template <typename T, typename U, std::enable_if_t<!is_num_param_v<U>, int> = 0>
taylor_c_diff_func_tanh_impl(llvm_state &,const tanh_impl &,const U &,std::uint32_t,std::uint32_t)382 llvm::Function *taylor_c_diff_func_tanh_impl(llvm_state &, const tanh_impl &, const U &, std::uint32_t, std::uint32_t)
383 {
384 throw std::invalid_argument("An invalid argument type was encountered while trying to build the Taylor derivative "
385 "of a hyperbolic tangent in compact mode");
386 }
387
388 template <typename T>
taylor_c_diff_func_tanh(llvm_state & s,const tanh_impl & fn,std::uint32_t n_uvars,std::uint32_t batch_size)389 llvm::Function *taylor_c_diff_func_tanh(llvm_state &s, const tanh_impl &fn, std::uint32_t n_uvars,
390 std::uint32_t batch_size)
391 {
392 assert(fn.args().size() == 1u);
393
394 return std::visit([&](const auto &v) { return taylor_c_diff_func_tanh_impl<T>(s, fn, v, n_uvars, batch_size); },
395 fn.args()[0].value());
396 }
397
398 } // namespace
399
taylor_c_diff_func_dbl(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const400 llvm::Function *tanh_impl::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
401 bool) const
402 {
403 return taylor_c_diff_func_tanh<double>(s, *this, n_uvars, batch_size);
404 }
405
taylor_c_diff_func_ldbl(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const406 llvm::Function *tanh_impl::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
407 bool) const
408 {
409 return taylor_c_diff_func_tanh<long double>(s, *this, n_uvars, batch_size);
410 }
411
412 #if defined(HEYOKA_HAVE_REAL128)
413
taylor_c_diff_func_f128(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const414 llvm::Function *tanh_impl::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
415 bool) const
416 {
417 return taylor_c_diff_func_tanh<mppp::real128>(s, *this, n_uvars, batch_size);
418 }
419
420 #endif
421
422 } // namespace detail
423
tanh(expression e)424 expression tanh(expression e)
425 {
426 return expression{func{detail::tanh_impl(std::move(e))}};
427 }
428
429 } // namespace heyoka
430
431 HEYOKA_S11N_FUNC_EXPORT_IMPLEMENT(heyoka::detail::tanh_impl)
432