1 // Copyright 2020, 2021 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com)
2 //
3 // This file is part of the heyoka library.
4 //
5 // This Source Code Form is subject to the terms of the Mozilla
6 // Public License v. 2.0. If a copy of the MPL was not distributed
7 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 
9 #include <heyoka/config.hpp>
10 
11 #include <cassert>
12 #include <cstdint>
13 #include <initializer_list>
14 #include <stdexcept>
15 #include <string>
16 #include <type_traits>
17 #include <unordered_map>
18 #include <utility>
19 #include <variant>
20 #include <vector>
21 
22 #include <boost/numeric/conversion/cast.hpp>
23 
24 #include <fmt/format.h>
25 
26 #include <llvm/IR/Attributes.h>
27 #include <llvm/IR/BasicBlock.h>
28 #include <llvm/IR/DerivedTypes.h>
29 #include <llvm/IR/Function.h>
30 #include <llvm/IR/IRBuilder.h>
31 #include <llvm/IR/LLVMContext.h>
32 #include <llvm/IR/Module.h>
33 #include <llvm/IR/Type.h>
34 #include <llvm/IR/Value.h>
35 #include <llvm/Support/Casting.h>
36 
37 #if defined(HEYOKA_HAVE_REAL128)
38 
39 #include <mp++/real128.hpp>
40 
41 #endif
42 
43 #include <heyoka/detail/llvm_helpers.hpp>
44 #include <heyoka/detail/llvm_vector_type.hpp>
45 #include <heyoka/detail/sleef.hpp>
46 #include <heyoka/detail/string_conv.hpp>
47 #include <heyoka/detail/taylor_common.hpp>
48 #include <heyoka/expression.hpp>
49 #include <heyoka/func.hpp>
50 #include <heyoka/llvm_state.hpp>
51 #include <heyoka/math/square.hpp>
52 #include <heyoka/math/tanh.hpp>
53 #include <heyoka/number.hpp>
54 #include <heyoka/s11n.hpp>
55 #include <heyoka/taylor.hpp>
56 #include <heyoka/variable.hpp>
57 
58 #if defined(_MSC_VER) && !defined(__clang__)
59 
60 // NOTE: MSVC has issues with the other "using"
61 // statement form.
62 using namespace fmt::literals;
63 
64 #else
65 
66 using fmt::literals::operator""_format;
67 
68 #endif
69 
70 namespace heyoka
71 {
72 
73 namespace detail
74 {
75 
tanh_impl(expression e)76 tanh_impl::tanh_impl(expression e) : func_base("tanh", std::vector{std::move(e)}) {}
77 
tanh_impl()78 tanh_impl::tanh_impl() : tanh_impl(0_dbl) {}
79 
gradient() const80 std::vector<expression> tanh_impl::gradient() const
81 {
82     assert(args().size() == 1u);
83     return {1_dbl - square(tanh(args()[0]))};
84 }
85 
codegen_dbl(llvm_state & s,const std::vector<llvm::Value * > & args) const86 llvm::Value *tanh_impl::codegen_dbl(llvm_state &s, const std::vector<llvm::Value *> &args) const
87 {
88     assert(args.size() == 1u);
89     assert(args[0] != nullptr);
90 
91     if (auto vec_t = llvm::dyn_cast<llvm_vector_type>(args[0]->getType())) {
92         if (const auto sfn = sleef_function_name(s.context(), "tanh", vec_t->getElementType(),
93                                                  boost::numeric_cast<std::uint32_t>(vec_t->getNumElements()));
94             !sfn.empty()) {
95             return llvm_invoke_external(
96                 s, sfn, vec_t, args,
97                 // NOTE: in theory we may add ReadNone here as well,
98                 // but for some reason, at least up to LLVM 10,
99                 // this causes strange codegen issues. Revisit
100                 // in the future.
101                 {llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn});
102         }
103     }
104 
105     return call_extern_vec(s, args[0], "tanh");
106 }
107 
codegen_ldbl(llvm_state & s,const std::vector<llvm::Value * > & args) const108 llvm::Value *tanh_impl::codegen_ldbl(llvm_state &s, const std::vector<llvm::Value *> &args) const
109 {
110     assert(args.size() == 1u);
111     assert(args[0] != nullptr);
112 
113     return call_extern_vec(s, args[0],
114 #if defined(_MSC_VER)
115                            // NOTE: it seems like the MSVC stdlib does not have a tanhl function,
116                            // because LLVM complains about the symbol "tanhl" not being
117                            // defined. Hence, use our own wrapper instead.
118                            "heyoka_tanhl"
119 #else
120                            "tanhl"
121 #endif
122     );
123 }
124 
125 #if defined(HEYOKA_HAVE_REAL128)
126 
codegen_f128(llvm_state & s,const std::vector<llvm::Value * > & args) const127 llvm::Value *tanh_impl::codegen_f128(llvm_state &s, const std::vector<llvm::Value *> &args) const
128 {
129     assert(args.size() == 1u);
130     assert(args[0] != nullptr);
131 
132     return call_extern_vec(s, args[0], "tanhq");
133 }
134 
135 #endif
136 
taylor_decompose(taylor_dc_t & u_vars_defs)137 taylor_dc_t::size_type tanh_impl::taylor_decompose(taylor_dc_t &u_vars_defs) &&
138 {
139     assert(args().size() == 1u);
140 
141     // Append the tanh decomposition.
142     u_vars_defs.emplace_back(func{std::move(*this)}, std::vector<std::uint32_t>{});
143 
144     // Append the auxiliary function tanh(arg) * tanh(arg).
145     u_vars_defs.emplace_back(square(expression{"u_{}"_format(u_vars_defs.size() - 1u)}), std::vector<std::uint32_t>{});
146 
147     // Add the hidden dep.
148     (u_vars_defs.end() - 2)->second.push_back(boost::numeric_cast<std::uint32_t>(u_vars_defs.size() - 1u));
149 
150     return u_vars_defs.size() - 2u;
151 }
152 
153 namespace
154 {
155 
156 // Derivative of tanh(number).
157 template <typename T, typename U, std::enable_if_t<is_num_param_v<U>, int> = 0>
taylor_diff_tanh_impl(llvm_state & s,const tanh_impl & f,const std::vector<std::uint32_t> &,const U & num,const std::vector<llvm::Value * > &,llvm::Value * par_ptr,std::uint32_t,std::uint32_t order,std::uint32_t,std::uint32_t batch_size)158 llvm::Value *taylor_diff_tanh_impl(llvm_state &s, const tanh_impl &f, const std::vector<std::uint32_t> &, const U &num,
159                                    const std::vector<llvm::Value *> &, llvm::Value *par_ptr, std::uint32_t,
160                                    std::uint32_t order, std::uint32_t, std::uint32_t batch_size)
161 {
162     if (order == 0u) {
163         return codegen_from_values<T>(s, f, {taylor_codegen_numparam<T>(s, num, par_ptr, batch_size)});
164     } else {
165         return vector_splat(s.builder(), codegen<T>(s, number{0.}), batch_size);
166     }
167 }
168 
169 template <typename T>
taylor_diff_tanh_impl(llvm_state & s,const tanh_impl & f,const std::vector<std::uint32_t> & deps,const variable & var,const std::vector<llvm::Value * > & arr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t,std::uint32_t batch_size)170 llvm::Value *taylor_diff_tanh_impl(llvm_state &s, const tanh_impl &f, const std::vector<std::uint32_t> &deps,
171                                    const variable &var, const std::vector<llvm::Value *> &arr, llvm::Value *,
172                                    std::uint32_t n_uvars, std::uint32_t order, std::uint32_t, std::uint32_t batch_size)
173 {
174     auto &builder = s.builder();
175 
176     // Fetch the index of the variable.
177     const auto b_idx = uname_to_index(var.name());
178 
179     if (order == 0u) {
180         return codegen_from_values<T>(s, f, {taylor_fetch_diff(arr, b_idx, 0, n_uvars)});
181     }
182 
183     // NOTE: iteration in the [1, order] range.
184     std::vector<llvm::Value *> sum;
185     for (std::uint32_t j = 1; j <= order; ++j) {
186         // NOTE: the only hidden dependency contains the index of the
187         // u variable whose definition is tanh(var) * tanh(var).
188         auto bj = taylor_fetch_diff(arr, b_idx, j, n_uvars);
189         auto cnj = taylor_fetch_diff(arr, deps[0], order - j, n_uvars);
190 
191         auto fac = vector_splat(builder, codegen<T>(s, number(static_cast<T>(j))), batch_size);
192 
193         // Add j*cnj*bj to the sum.
194         sum.push_back(builder.CreateFMul(fac, builder.CreateFMul(cnj, bj)));
195     }
196 
197     // Init the return value as the result of the sum.
198     auto ret_acc = pairwise_sum(builder, sum);
199 
200     // Divide by order.
201     ret_acc
202         = builder.CreateFDiv(ret_acc, vector_splat(builder, codegen<T>(s, number(static_cast<T>(order))), batch_size));
203 
204     // Create and return the result.
205     return builder.CreateFSub(taylor_fetch_diff(arr, b_idx, order, n_uvars), ret_acc);
206 }
207 
208 // All the other cases.
209 template <typename T, typename U, std::enable_if_t<!is_num_param_v<U>, int> = 0>
taylor_diff_tanh_impl(llvm_state &,const tanh_impl &,const std::vector<std::uint32_t> &,const U &,const std::vector<llvm::Value * > &,llvm::Value *,std::uint32_t,std::uint32_t,std::uint32_t,std::uint32_t)210 llvm::Value *taylor_diff_tanh_impl(llvm_state &, const tanh_impl &, const std::vector<std::uint32_t> &, const U &,
211                                    const std::vector<llvm::Value *> &, llvm::Value *, std::uint32_t, std::uint32_t,
212                                    std::uint32_t, std::uint32_t)
213 {
214     throw std::invalid_argument(
215         "An invalid argument type was encountered while trying to build the Taylor derivative of a hyperbolic tangent");
216 }
217 
218 template <typename T>
taylor_diff_tanh(llvm_state & s,const tanh_impl & f,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size)219 llvm::Value *taylor_diff_tanh(llvm_state &s, const tanh_impl &f, const std::vector<std::uint32_t> &deps,
220                               const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, std::uint32_t n_uvars,
221                               std::uint32_t order, std::uint32_t idx, std::uint32_t batch_size)
222 {
223     assert(f.args().size() == 1u);
224 
225     if (deps.size() != 1u) {
226         throw std::invalid_argument(
227             "A hidden dependency vector of size 1 is expected in order to compute the Taylor "
228             "derivative of the hyperbolic tangent, but a vector of size {} was passed instead"_format(deps.size()));
229     }
230 
231     return std::visit(
232         [&](const auto &v) {
233             return taylor_diff_tanh_impl<T>(s, f, deps, v, arr, par_ptr, n_uvars, order, idx, batch_size);
234         },
235         f.args()[0].value());
236 }
237 
238 } // namespace
239 
taylor_diff_dbl(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const240 llvm::Value *tanh_impl::taylor_diff_dbl(llvm_state &s, const std::vector<std::uint32_t> &deps,
241                                         const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
242                                         std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
243                                         std::uint32_t batch_size, bool) const
244 {
245     return taylor_diff_tanh<double>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
246 }
247 
taylor_diff_ldbl(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const248 llvm::Value *tanh_impl::taylor_diff_ldbl(llvm_state &s, const std::vector<std::uint32_t> &deps,
249                                          const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
250                                          std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
251                                          std::uint32_t batch_size, bool) const
252 {
253     return taylor_diff_tanh<long double>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
254 }
255 
256 #if defined(HEYOKA_HAVE_REAL128)
257 
taylor_diff_f128(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const258 llvm::Value *tanh_impl::taylor_diff_f128(llvm_state &s, const std::vector<std::uint32_t> &deps,
259                                          const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
260                                          std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
261                                          std::uint32_t batch_size, bool) const
262 {
263     return taylor_diff_tanh<mppp::real128>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
264 }
265 
266 #endif
267 
268 namespace
269 {
270 
271 // Derivative of tanh(number).
272 template <typename T, typename U, std::enable_if_t<is_num_param_v<U>, int> = 0>
taylor_c_diff_func_tanh_impl(llvm_state & s,const tanh_impl & fn,const U & num,std::uint32_t n_uvars,std::uint32_t batch_size)273 llvm::Function *taylor_c_diff_func_tanh_impl(llvm_state &s, const tanh_impl &fn, const U &num, std::uint32_t n_uvars,
274                                              std::uint32_t batch_size)
275 {
276     return taylor_c_diff_func_unary_num_det<T>(s, fn, num, n_uvars, batch_size, "tanh", 1);
277 }
278 
279 // Derivative of tanh(variable).
280 template <typename T>
taylor_c_diff_func_tanh_impl(llvm_state & s,const tanh_impl & fn,const variable & var,std::uint32_t n_uvars,std::uint32_t batch_size)281 llvm::Function *taylor_c_diff_func_tanh_impl(llvm_state &s, const tanh_impl &fn, const variable &var,
282                                              std::uint32_t n_uvars, std::uint32_t batch_size)
283 {
284     auto &module = s.module();
285     auto &builder = s.builder();
286     auto &context = s.context();
287 
288     // Fetch the floating-point type.
289     auto val_t = to_llvm_vector_type<T>(context, batch_size);
290 
291     // Fetch the function name and arguments.
292     const auto na_pair = taylor_c_diff_func_name_args<T>(context, "tanh", n_uvars, batch_size, {var}, 1);
293     const auto &fname = na_pair.first;
294     const auto &fargs = na_pair.second;
295 
296     // Try to see if we already created the function.
297     auto f = module.getFunction(fname);
298 
299     if (f == nullptr) {
300         // The function was not created before, do it now.
301 
302         // Fetch the current insertion block.
303         auto orig_bb = builder.GetInsertBlock();
304 
305         // The return type is val_t.
306         auto *ft = llvm::FunctionType::get(val_t, fargs, false);
307         // Create the function
308         f = llvm::Function::Create(ft, llvm::Function::InternalLinkage, fname, &module);
309         assert(f != nullptr);
310 
311         // Fetch the necessary function arguments.
312         auto ord = f->args().begin();
313         auto diff_ptr = f->args().begin() + 2;
314         auto b_idx = f->args().begin() + 5;
315         auto dep_idx = f->args().begin() + 6;
316 
317         // Create a new basic block to start insertion into.
318         builder.SetInsertPoint(llvm::BasicBlock::Create(context, "entry", f));
319 
320         // Create the return value.
321         auto retval = builder.CreateAlloca(val_t);
322 
323         // Create the accumulator.
324         auto acc = builder.CreateAlloca(val_t);
325 
326         llvm_if_then_else(
327             s, builder.CreateICmpEQ(ord, builder.getInt32(0)),
328             [&]() {
329                 // For order 0, invoke the function on the order 0 of b_idx.
330                 builder.CreateStore(codegen_from_values<T>(
331                                         s, fn, {taylor_c_load_diff(s, diff_ptr, n_uvars, builder.getInt32(0), b_idx)}),
332                                     retval);
333             },
334             [&]() {
335                 // Init the accumulator.
336                 builder.CreateStore(vector_splat(builder, codegen<T>(s, number{0.}), batch_size), acc);
337 
338                 // Run the loop.
339                 llvm_loop_u32(s, builder.getInt32(1), builder.CreateAdd(ord, builder.getInt32(1)), [&](llvm::Value *j) {
340                     auto bj = taylor_c_load_diff(s, diff_ptr, n_uvars, j, b_idx);
341                     auto cnj = taylor_c_load_diff(s, diff_ptr, n_uvars, builder.CreateSub(ord, j), dep_idx);
342 
343                     auto fac = vector_splat(builder, builder.CreateUIToFP(j, to_llvm_type<T>(context)), batch_size);
344 
345                     builder.CreateStore(builder.CreateFAdd(builder.CreateLoad(acc),
346                                                            builder.CreateFMul(fac, builder.CreateFMul(cnj, bj))),
347                                         acc);
348                 });
349 
350                 // Divide by the order and subtract from b^[n] to produce the return value.
351                 auto ord_v = vector_splat(builder, builder.CreateUIToFP(ord, to_llvm_type<T>(context)), batch_size);
352 
353                 builder.CreateStore(builder.CreateFSub(taylor_c_load_diff(s, diff_ptr, n_uvars, ord, b_idx),
354                                                        builder.CreateFDiv(builder.CreateLoad(acc), ord_v)),
355                                     retval);
356             });
357 
358         // Return the result.
359         builder.CreateRet(builder.CreateLoad(retval));
360 
361         // Verify.
362         s.verify_function(f);
363 
364         // Restore the original insertion block.
365         builder.SetInsertPoint(orig_bb);
366     } else {
367         // The function was created before. Check if the signatures match.
368         // NOTE: there could be a mismatch if the derivative function was created
369         // and then optimised - optimisation might remove arguments which are compile-time
370         // constants.
371         if (!compare_function_signature(f, val_t, fargs)) {
372             throw std::invalid_argument("Inconsistent function signature for the Taylor derivative of the hyperbolic "
373                                         "tangent in compact mode detected");
374         }
375     }
376 
377     return f;
378 }
379 
380 // All the other cases.
381 template <typename T, typename U, std::enable_if_t<!is_num_param_v<U>, int> = 0>
taylor_c_diff_func_tanh_impl(llvm_state &,const tanh_impl &,const U &,std::uint32_t,std::uint32_t)382 llvm::Function *taylor_c_diff_func_tanh_impl(llvm_state &, const tanh_impl &, const U &, std::uint32_t, std::uint32_t)
383 {
384     throw std::invalid_argument("An invalid argument type was encountered while trying to build the Taylor derivative "
385                                 "of a hyperbolic tangent in compact mode");
386 }
387 
388 template <typename T>
taylor_c_diff_func_tanh(llvm_state & s,const tanh_impl & fn,std::uint32_t n_uvars,std::uint32_t batch_size)389 llvm::Function *taylor_c_diff_func_tanh(llvm_state &s, const tanh_impl &fn, std::uint32_t n_uvars,
390                                         std::uint32_t batch_size)
391 {
392     assert(fn.args().size() == 1u);
393 
394     return std::visit([&](const auto &v) { return taylor_c_diff_func_tanh_impl<T>(s, fn, v, n_uvars, batch_size); },
395                       fn.args()[0].value());
396 }
397 
398 } // namespace
399 
taylor_c_diff_func_dbl(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const400 llvm::Function *tanh_impl::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
401                                                   bool) const
402 {
403     return taylor_c_diff_func_tanh<double>(s, *this, n_uvars, batch_size);
404 }
405 
taylor_c_diff_func_ldbl(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const406 llvm::Function *tanh_impl::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
407                                                    bool) const
408 {
409     return taylor_c_diff_func_tanh<long double>(s, *this, n_uvars, batch_size);
410 }
411 
412 #if defined(HEYOKA_HAVE_REAL128)
413 
taylor_c_diff_func_f128(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const414 llvm::Function *tanh_impl::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
415                                                    bool) const
416 {
417     return taylor_c_diff_func_tanh<mppp::real128>(s, *this, n_uvars, batch_size);
418 }
419 
420 #endif
421 
422 } // namespace detail
423 
tanh(expression e)424 expression tanh(expression e)
425 {
426     return expression{func{detail::tanh_impl(std::move(e))}};
427 }
428 
429 } // namespace heyoka
430 
431 HEYOKA_S11N_FUNC_EXPORT_IMPLEMENT(heyoka::detail::tanh_impl)
432