1 // Copyright 2020, 2021 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com)
2 //
3 // This file is part of the heyoka library.
4 //
5 // This Source Code Form is subject to the terms of the Mozilla
6 // Public License v. 2.0. If a copy of the MPL was not distributed
7 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 
9 #include <heyoka/config.hpp>
10 
11 #include <cassert>
12 #include <cmath>
13 #include <cstdint>
14 #include <initializer_list>
15 #include <stdexcept>
16 #include <string>
17 #include <type_traits>
18 #include <unordered_map>
19 #include <utility>
20 #include <variant>
21 #include <vector>
22 
23 #include <boost/numeric/conversion/cast.hpp>
24 
25 #include <fmt/format.h>
26 
27 #include <llvm/IR/Attributes.h>
28 #include <llvm/IR/BasicBlock.h>
29 #include <llvm/IR/DerivedTypes.h>
30 #include <llvm/IR/Function.h>
31 #include <llvm/IR/IRBuilder.h>
32 #include <llvm/IR/LLVMContext.h>
33 #include <llvm/IR/Module.h>
34 #include <llvm/IR/Type.h>
35 #include <llvm/IR/Value.h>
36 #include <llvm/Support/Casting.h>
37 
38 #if defined(HEYOKA_HAVE_REAL128)
39 
40 #include <mp++/real128.hpp>
41 
42 #endif
43 
44 #include <heyoka/detail/llvm_helpers.hpp>
45 #include <heyoka/detail/llvm_vector_type.hpp>
46 #include <heyoka/detail/sleef.hpp>
47 #include <heyoka/detail/string_conv.hpp>
48 #include <heyoka/detail/taylor_common.hpp>
49 #include <heyoka/expression.hpp>
50 #include <heyoka/func.hpp>
51 #include <heyoka/llvm_state.hpp>
52 #include <heyoka/math/square.hpp>
53 #include <heyoka/math/tan.hpp>
54 #include <heyoka/number.hpp>
55 #include <heyoka/s11n.hpp>
56 #include <heyoka/taylor.hpp>
57 #include <heyoka/variable.hpp>
58 
59 #if defined(_MSC_VER) && !defined(__clang__)
60 
61 // NOTE: MSVC has issues with the other "using"
62 // statement form.
63 using namespace fmt::literals;
64 
65 #else
66 
67 using fmt::literals::operator""_format;
68 
69 #endif
70 
71 namespace heyoka
72 {
73 
74 namespace detail
75 {
76 
tan_impl(expression e)77 tan_impl::tan_impl(expression e) : func_base("tan", std::vector{std::move(e)}) {}
78 
tan_impl()79 tan_impl::tan_impl() : tan_impl(0_dbl) {}
80 
codegen_dbl(llvm_state & s,const std::vector<llvm::Value * > & args) const81 llvm::Value *tan_impl::codegen_dbl(llvm_state &s, const std::vector<llvm::Value *> &args) const
82 {
83     assert(args.size() == 1u);
84     assert(args[0] != nullptr);
85 
86     if (auto vec_t = llvm::dyn_cast<llvm_vector_type>(args[0]->getType())) {
87         if (const auto sfn = sleef_function_name(s.context(), "tan", vec_t->getElementType(),
88                                                  boost::numeric_cast<std::uint32_t>(vec_t->getNumElements()));
89             !sfn.empty()) {
90             return llvm_invoke_external(
91                 s, sfn, vec_t, args,
92                 // NOTE: in theory we may add ReadNone here as well,
93                 // but for some reason, at least up to LLVM 10,
94                 // this causes strange codegen issues. Revisit
95                 // in the future.
96                 {llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn});
97         }
98     }
99 
100     return call_extern_vec(s, args[0], "tan");
101 }
102 
codegen_ldbl(llvm_state & s,const std::vector<llvm::Value * > & args) const103 llvm::Value *tan_impl::codegen_ldbl(llvm_state &s, const std::vector<llvm::Value *> &args) const
104 {
105     assert(args.size() == 1u);
106     assert(args[0] != nullptr);
107 
108     return call_extern_vec(s, args[0],
109 #if defined(_MSC_VER)
110                            // NOTE: it seems like the MSVC stdlib does not have a tanl function,
111                            // because LLVM complains about the symbol "tanl" not being
112                            // defined. Hence, use our own wrapper instead.
113                            "heyoka_tanl"
114 #else
115                            "tanl"
116 #endif
117     );
118 }
119 
120 #if defined(HEYOKA_HAVE_REAL128)
121 
codegen_f128(llvm_state & s,const std::vector<llvm::Value * > & args) const122 llvm::Value *tan_impl::codegen_f128(llvm_state &s, const std::vector<llvm::Value *> &args) const
123 {
124     assert(args.size() == 1u);
125     assert(args[0] != nullptr);
126 
127     return call_extern_vec(s, args[0], "tanq");
128 }
129 
130 #endif
131 
eval_dbl(const std::unordered_map<std::string,double> & map,const std::vector<double> & pars) const132 double tan_impl::eval_dbl(const std::unordered_map<std::string, double> &map, const std::vector<double> &pars) const
133 {
134     assert(args().size() == 1u);
135 
136     return std::tan(heyoka::eval_dbl(args()[0], map, pars));
137 }
138 
eval_ldbl(const std::unordered_map<std::string,long double> & map,const std::vector<long double> & pars) const139 long double tan_impl::eval_ldbl(const std::unordered_map<std::string, long double> &map,
140                                 const std::vector<long double> &pars) const
141 {
142     assert(args().size() == 1u);
143 
144     return std::tan(heyoka::eval_ldbl(args()[0], map, pars));
145 }
146 
147 #if defined(HEYOKA_HAVE_REAL128)
eval_f128(const std::unordered_map<std::string,mppp::real128> & map,const std::vector<mppp::real128> & pars) const148 mppp::real128 tan_impl::eval_f128(const std::unordered_map<std::string, mppp::real128> &map,
149                                   const std::vector<mppp::real128> &pars) const
150 {
151     assert(args().size() == 1u);
152 
153     return mppp::tan(heyoka::eval_f128(args()[0], map, pars));
154 }
155 #endif
156 
eval_batch_dbl(std::vector<double> & out,const std::unordered_map<std::string,std::vector<double>> & map,const std::vector<double> & pars) const157 void tan_impl::eval_batch_dbl(std::vector<double> &out, const std::unordered_map<std::string, std::vector<double>> &map,
158                               const std::vector<double> &pars) const
159 {
160     assert(args().size() == 1u);
161 
162     heyoka::eval_batch_dbl(out, args()[0], map, pars);
163     for (auto &el : out) {
164         el = std::tan(el);
165     }
166 }
167 
eval_num_dbl(const std::vector<double> & a) const168 double tan_impl::eval_num_dbl(const std::vector<double> &a) const
169 {
170     if (a.size() != 1u) {
171         throw std::invalid_argument(
172             "Inconsistent number of arguments when computing the numerical value of the "
173             "tangent over doubles (1 argument was expected, but {} arguments were provided"_format(a.size()));
174     }
175 
176     return std::tan(a[0]);
177 }
178 
deval_num_dbl(const std::vector<double> & a,std::vector<double>::size_type i) const179 double tan_impl::deval_num_dbl(const std::vector<double> &a, std::vector<double>::size_type i) const
180 {
181     if (a.size() != 1u || i != 0u) {
182         throw std::invalid_argument("Inconsistent number of arguments or derivative requested when computing the "
183                                     "numerical derivative of the tangent");
184     }
185 
186     return std::tan(a[0]);
187 }
188 
taylor_decompose(taylor_dc_t & u_vars_defs)189 taylor_dc_t::size_type tan_impl::taylor_decompose(taylor_dc_t &u_vars_defs) &&
190 {
191     assert(args().size() == 1u);
192 
193     // Append the tan decomposition.
194     u_vars_defs.emplace_back(func{std::move(*this)}, std::vector<std::uint32_t>{});
195 
196     // Append the auxiliary function tan(arg) * tan(arg).
197     u_vars_defs.emplace_back(square(expression{variable{"u_{}"_format(u_vars_defs.size() - 1u)}}),
198                              std::vector<std::uint32_t>{});
199 
200     // Add the hidden dep.
201     (u_vars_defs.end() - 2)->second.push_back(boost::numeric_cast<std::uint32_t>(u_vars_defs.size() - 1u));
202 
203     return u_vars_defs.size() - 2u;
204 }
205 
206 namespace
207 {
208 
209 // Derivative of tan(number).
210 template <typename T, typename U, std::enable_if_t<is_num_param_v<U>, int> = 0>
taylor_diff_tan_impl(llvm_state & s,const tan_impl & f,const std::vector<std::uint32_t> &,const U & num,const std::vector<llvm::Value * > &,llvm::Value * par_ptr,std::uint32_t,std::uint32_t order,std::uint32_t,std::uint32_t batch_size)211 llvm::Value *taylor_diff_tan_impl(llvm_state &s, const tan_impl &f, const std::vector<std::uint32_t> &, const U &num,
212                                   const std::vector<llvm::Value *> &, llvm::Value *par_ptr, std::uint32_t,
213                                   std::uint32_t order, std::uint32_t, std::uint32_t batch_size)
214 {
215     if (order == 0u) {
216         return codegen_from_values<T>(s, f, {taylor_codegen_numparam<T>(s, num, par_ptr, batch_size)});
217     } else {
218         return vector_splat(s.builder(), codegen<T>(s, number{0.}), batch_size);
219     }
220 }
221 
222 template <typename T>
taylor_diff_tan_impl(llvm_state & s,const tan_impl & f,const std::vector<std::uint32_t> & deps,const variable & var,const std::vector<llvm::Value * > & arr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t,std::uint32_t batch_size)223 llvm::Value *taylor_diff_tan_impl(llvm_state &s, const tan_impl &f, const std::vector<std::uint32_t> &deps,
224                                   const variable &var, const std::vector<llvm::Value *> &arr, llvm::Value *,
225                                   std::uint32_t n_uvars, std::uint32_t order, std::uint32_t, std::uint32_t batch_size)
226 {
227     auto &builder = s.builder();
228 
229     // Fetch the index of the variable.
230     const auto u_idx = uname_to_index(var.name());
231 
232     if (order == 0u) {
233         return codegen_from_values<T>(s, f, {taylor_fetch_diff(arr, u_idx, 0, n_uvars)});
234     }
235 
236     // NOTE: iteration in the [1, order] range.
237     std::vector<llvm::Value *> sum;
238     for (std::uint32_t j = 1; j <= order; ++j) {
239         // NOTE: the only hidden dependency contains the index of the
240         // u variable whose definition is tan(var) * tan(var).
241         auto bj = taylor_fetch_diff(arr, u_idx, j, n_uvars);
242         auto cnj = taylor_fetch_diff(arr, deps[0], order - j, n_uvars);
243 
244         auto fac = vector_splat(builder, codegen<T>(s, number(static_cast<T>(j))), batch_size);
245 
246         // Add j*cnj*bj to the sum.
247         sum.push_back(builder.CreateFMul(fac, builder.CreateFMul(cnj, bj)));
248     }
249 
250     // Init the return value as the result of the sum.
251     auto ret_acc = pairwise_sum(builder, sum);
252 
253     // Divide by order.
254     ret_acc
255         = builder.CreateFDiv(ret_acc, vector_splat(builder, codegen<T>(s, number(static_cast<T>(order))), batch_size));
256 
257     // Create and return the result.
258     return builder.CreateFAdd(taylor_fetch_diff(arr, u_idx, order, n_uvars), ret_acc);
259 }
260 
261 // All the other cases.
262 template <typename T, typename U, std::enable_if_t<!is_num_param_v<U>, int> = 0>
taylor_diff_tan_impl(llvm_state &,const tan_impl &,const std::vector<std::uint32_t> &,const U &,const std::vector<llvm::Value * > &,llvm::Value *,std::uint32_t,std::uint32_t,std::uint32_t,std::uint32_t)263 llvm::Value *taylor_diff_tan_impl(llvm_state &, const tan_impl &, const std::vector<std::uint32_t> &, const U &,
264                                   const std::vector<llvm::Value *> &, llvm::Value *, std::uint32_t, std::uint32_t,
265                                   std::uint32_t, std::uint32_t)
266 {
267     throw std::invalid_argument(
268         "An invalid argument type was encountered while trying to build the Taylor derivative of a tangent");
269 }
270 
271 template <typename T>
taylor_diff_tan(llvm_state & s,const tan_impl & f,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size)272 llvm::Value *taylor_diff_tan(llvm_state &s, const tan_impl &f, const std::vector<std::uint32_t> &deps,
273                              const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, std::uint32_t n_uvars,
274                              std::uint32_t order, std::uint32_t idx, std::uint32_t batch_size)
275 {
276     assert(f.args().size() == 1u);
277 
278     if (deps.size() != 1u) {
279         throw std::invalid_argument(
280             "A hidden dependency vector of size 1 is expected in order to compute the Taylor "
281             "derivative of the tangent, but a vector of size {} was passed instead"_format(deps.size()));
282     }
283 
284     return std::visit(
285         [&](const auto &v) {
286             return taylor_diff_tan_impl<T>(s, f, deps, v, arr, par_ptr, n_uvars, order, idx, batch_size);
287         },
288         f.args()[0].value());
289 }
290 
291 } // namespace
292 
taylor_diff_dbl(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const293 llvm::Value *tan_impl::taylor_diff_dbl(llvm_state &s, const std::vector<std::uint32_t> &deps,
294                                        const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
295                                        std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
296                                        std::uint32_t batch_size, bool) const
297 {
298     return taylor_diff_tan<double>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
299 }
300 
taylor_diff_ldbl(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const301 llvm::Value *tan_impl::taylor_diff_ldbl(llvm_state &s, const std::vector<std::uint32_t> &deps,
302                                         const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
303                                         std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
304                                         std::uint32_t batch_size, bool) const
305 {
306     return taylor_diff_tan<long double>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
307 }
308 
309 #if defined(HEYOKA_HAVE_REAL128)
310 
taylor_diff_f128(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const311 llvm::Value *tan_impl::taylor_diff_f128(llvm_state &s, const std::vector<std::uint32_t> &deps,
312                                         const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
313                                         std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
314                                         std::uint32_t batch_size, bool) const
315 {
316     return taylor_diff_tan<mppp::real128>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
317 }
318 
319 #endif
320 
321 namespace
322 {
323 
324 // Derivative of tan(number).
325 template <typename T, typename U, std::enable_if_t<is_num_param_v<U>, int> = 0>
taylor_c_diff_func_tan_impl(llvm_state & s,const tan_impl & fn,const U & num,std::uint32_t n_uvars,std::uint32_t batch_size)326 llvm::Function *taylor_c_diff_func_tan_impl(llvm_state &s, const tan_impl &fn, const U &num, std::uint32_t n_uvars,
327                                             std::uint32_t batch_size)
328 {
329     return taylor_c_diff_func_unary_num_det<T>(s, fn, num, n_uvars, batch_size, "tan", 1);
330 }
331 
332 // Derivative of tan(variable).
333 template <typename T>
taylor_c_diff_func_tan_impl(llvm_state & s,const tan_impl & fn,const variable & var,std::uint32_t n_uvars,std::uint32_t batch_size)334 llvm::Function *taylor_c_diff_func_tan_impl(llvm_state &s, const tan_impl &fn, const variable &var,
335                                             std::uint32_t n_uvars, std::uint32_t batch_size)
336 {
337     auto &module = s.module();
338     auto &builder = s.builder();
339     auto &context = s.context();
340 
341     // Fetch the floating-point type.
342     auto val_t = to_llvm_vector_type<T>(context, batch_size);
343 
344     // Fetch the function name and arguments.
345     const auto na_pair = taylor_c_diff_func_name_args<T>(context, "tan", n_uvars, batch_size, {var}, 1);
346     const auto &fname = na_pair.first;
347     const auto &fargs = na_pair.second;
348 
349     // Try to see if we already created the function.
350     auto f = module.getFunction(fname);
351 
352     if (f == nullptr) {
353         // The function was not created before, do it now.
354 
355         // Fetch the current insertion block.
356         auto orig_bb = builder.GetInsertBlock();
357 
358         // The return type is val_t.
359         auto *ft = llvm::FunctionType::get(val_t, fargs, false);
360         // Create the function
361         f = llvm::Function::Create(ft, llvm::Function::InternalLinkage, fname, &module);
362         assert(f != nullptr);
363 
364         // Fetch the necessary function arguments.
365         auto ord = f->args().begin();
366         auto diff_ptr = f->args().begin() + 2;
367         auto var_idx = f->args().begin() + 5;
368         auto dep_idx = f->args().begin() + 6;
369 
370         // Create a new basic block to start insertion into.
371         builder.SetInsertPoint(llvm::BasicBlock::Create(context, "entry", f));
372 
373         // Create the return value.
374         auto retval = builder.CreateAlloca(val_t);
375 
376         // Create the accumulator.
377         auto acc = builder.CreateAlloca(val_t);
378 
379         llvm_if_then_else(
380             s, builder.CreateICmpEQ(ord, builder.getInt32(0)),
381             [&]() {
382                 // For order 0, invoke the function on the order 0 of var_idx.
383                 builder.CreateStore(
384                     codegen_from_values<T>(s, fn,
385                                            {taylor_c_load_diff(s, diff_ptr, n_uvars, builder.getInt32(0), var_idx)}),
386                     retval);
387             },
388             [&]() {
389                 // Init the accumulator.
390                 builder.CreateStore(vector_splat(builder, codegen<T>(s, number{0.}), batch_size), acc);
391 
392                 // Run the loop.
393                 llvm_loop_u32(s, builder.getInt32(1), builder.CreateAdd(ord, builder.getInt32(1)), [&](llvm::Value *j) {
394                     auto bj = taylor_c_load_diff(s, diff_ptr, n_uvars, j, var_idx);
395                     auto cnj = taylor_c_load_diff(s, diff_ptr, n_uvars, builder.CreateSub(ord, j), dep_idx);
396 
397                     auto fac = vector_splat(builder, builder.CreateUIToFP(j, to_llvm_type<T>(context)), batch_size);
398 
399                     builder.CreateStore(builder.CreateFAdd(builder.CreateLoad(acc),
400                                                            builder.CreateFMul(fac, builder.CreateFMul(cnj, bj))),
401                                         acc);
402                 });
403 
404                 // Divide by the order and add to b^[n] to produce the return value.
405                 auto ord_v = vector_splat(builder, builder.CreateUIToFP(ord, to_llvm_type<T>(context)), batch_size);
406 
407                 builder.CreateStore(builder.CreateFAdd(taylor_c_load_diff(s, diff_ptr, n_uvars, ord, var_idx),
408                                                        builder.CreateFDiv(builder.CreateLoad(acc), ord_v)),
409                                     retval);
410             });
411 
412         // Return the result.
413         builder.CreateRet(builder.CreateLoad(retval));
414 
415         // Verify.
416         s.verify_function(f);
417 
418         // Restore the original insertion block.
419         builder.SetInsertPoint(orig_bb);
420     } else {
421         // The function was created before. Check if the signatures match.
422         // NOTE: there could be a mismatch if the derivative function was created
423         // and then optimised - optimisation might remove arguments which are compile-time
424         // constants.
425         if (!compare_function_signature(f, val_t, fargs)) {
426             throw std::invalid_argument(
427                 "Inconsistent function signature for the Taylor derivative of the tangent in compact mode detected");
428         }
429     }
430 
431     return f;
432 }
433 
434 // All the other cases.
435 template <typename T, typename U, std::enable_if_t<!is_num_param_v<U>, int> = 0>
taylor_c_diff_func_tan_impl(llvm_state &,const tan_impl &,const U &,std::uint32_t,std::uint32_t)436 llvm::Function *taylor_c_diff_func_tan_impl(llvm_state &, const tan_impl &, const U &, std::uint32_t, std::uint32_t)
437 {
438     throw std::invalid_argument("An invalid argument type was encountered while trying to build the Taylor derivative "
439                                 "of a tangent in compact mode");
440 }
441 
442 template <typename T>
taylor_c_diff_func_tan(llvm_state & s,const tan_impl & fn,std::uint32_t n_uvars,std::uint32_t batch_size)443 llvm::Function *taylor_c_diff_func_tan(llvm_state &s, const tan_impl &fn, std::uint32_t n_uvars,
444                                        std::uint32_t batch_size)
445 {
446     assert(fn.args().size() == 1u);
447 
448     return std::visit([&](const auto &v) { return taylor_c_diff_func_tan_impl<T>(s, fn, v, n_uvars, batch_size); },
449                       fn.args()[0].value());
450 }
451 
452 } // namespace
453 
taylor_c_diff_func_dbl(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const454 llvm::Function *tan_impl::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
455                                                  bool) const
456 {
457     return taylor_c_diff_func_tan<double>(s, *this, n_uvars, batch_size);
458 }
459 
taylor_c_diff_func_ldbl(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const460 llvm::Function *tan_impl::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
461                                                   bool) const
462 {
463     return taylor_c_diff_func_tan<long double>(s, *this, n_uvars, batch_size);
464 }
465 
466 #if defined(HEYOKA_HAVE_REAL128)
467 
taylor_c_diff_func_f128(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const468 llvm::Function *tan_impl::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
469                                                   bool) const
470 {
471     return taylor_c_diff_func_tan<mppp::real128>(s, *this, n_uvars, batch_size);
472 }
473 
474 #endif
475 
gradient() const476 std::vector<expression> tan_impl::gradient() const
477 {
478     assert(args().size() == 1u);
479     // NOTE: if single-precision floats are implemented,
480     // should 1_dbl become 1_flt?
481     return {1_dbl + square(tan(args()[0]))};
482 }
483 
484 } // namespace detail
485 
tan(expression e)486 expression tan(expression e)
487 {
488     return expression{func{detail::tan_impl(std::move(e))}};
489 }
490 
491 } // namespace heyoka
492 
493 HEYOKA_S11N_FUNC_EXPORT_IMPLEMENT(heyoka::detail::tan_impl)
494