1 // Copyright 2020, 2021 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com)
2 //
3 // This file is part of the heyoka library.
4 //
5 // This Source Code Form is subject to the terms of the Mozilla
6 // Public License v. 2.0. If a copy of the MPL was not distributed
7 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 
9 #include <heyoka/config.hpp>
10 
11 #include <cassert>
12 #include <cmath>
13 #include <cstdint>
14 #include <initializer_list>
15 #include <stdexcept>
16 #include <string>
17 #include <type_traits>
18 #include <unordered_map>
19 #include <utility>
20 #include <variant>
21 #include <vector>
22 
23 #include <boost/numeric/conversion/cast.hpp>
24 
25 #include <fmt/format.h>
26 
27 #include <llvm/IR/Attributes.h>
28 #include <llvm/IR/BasicBlock.h>
29 #include <llvm/IR/DerivedTypes.h>
30 #include <llvm/IR/Function.h>
31 #include <llvm/IR/IRBuilder.h>
32 #include <llvm/IR/LLVMContext.h>
33 #include <llvm/IR/Module.h>
34 #include <llvm/IR/Type.h>
35 #include <llvm/IR/Value.h>
36 #include <llvm/Support/Casting.h>
37 
38 #if defined(HEYOKA_HAVE_REAL128)
39 
40 #include <mp++/real128.hpp>
41 
42 #endif
43 
44 #include <heyoka/detail/llvm_helpers.hpp>
45 #include <heyoka/detail/llvm_vector_type.hpp>
46 #include <heyoka/detail/sleef.hpp>
47 #include <heyoka/detail/string_conv.hpp>
48 #include <heyoka/detail/taylor_common.hpp>
49 #include <heyoka/expression.hpp>
50 #include <heyoka/func.hpp>
51 #include <heyoka/llvm_state.hpp>
52 #include <heyoka/math/cos.hpp>
53 #include <heyoka/math/neg.hpp>
54 #include <heyoka/math/sin.hpp>
55 #include <heyoka/number.hpp>
56 #include <heyoka/s11n.hpp>
57 #include <heyoka/taylor.hpp>
58 #include <heyoka/variable.hpp>
59 
60 #if defined(_MSC_VER) && !defined(__clang__)
61 
62 // NOTE: MSVC has issues with the other "using"
63 // statement form.
64 using namespace fmt::literals;
65 
66 #else
67 
68 using fmt::literals::operator""_format;
69 
70 #endif
71 
72 namespace heyoka
73 {
74 
75 namespace detail
76 {
77 
cos_impl(expression e)78 cos_impl::cos_impl(expression e) : func_base("cos", std::vector{std::move(e)}) {}
79 
cos_impl()80 cos_impl::cos_impl() : cos_impl(0_dbl) {}
81 
codegen_dbl(llvm_state & s,const std::vector<llvm::Value * > & args) const82 llvm::Value *cos_impl::codegen_dbl(llvm_state &s, const std::vector<llvm::Value *> &args) const
83 {
84     assert(args.size() == 1u);
85     assert(args[0] != nullptr);
86 
87     if (auto vec_t = llvm::dyn_cast<llvm_vector_type>(args[0]->getType())) {
88         if (const auto sfn = sleef_function_name(s.context(), "cos", vec_t->getElementType(),
89                                                  boost::numeric_cast<std::uint32_t>(vec_t->getNumElements()));
90             !sfn.empty()) {
91             return llvm_invoke_external(
92                 s, sfn, vec_t, args,
93                 // NOTE: in theory we may add ReadNone here as well,
94                 // but for some reason, at least up to LLVM 10,
95                 // this causes strange codegen issues. Revisit
96                 // in the future.
97                 {llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn});
98         }
99     }
100 
101     return llvm_invoke_intrinsic(s, "llvm.cos", {args[0]->getType()}, args);
102 }
103 
codegen_ldbl(llvm_state & s,const std::vector<llvm::Value * > & args) const104 llvm::Value *cos_impl::codegen_ldbl(llvm_state &s, const std::vector<llvm::Value *> &args) const
105 {
106     assert(args.size() == 1u);
107     assert(args[0] != nullptr);
108 
109     return llvm_invoke_intrinsic(s, "llvm.cos", {args[0]->getType()}, args);
110 }
111 
112 #if defined(HEYOKA_HAVE_REAL128)
113 
codegen_f128(llvm_state & s,const std::vector<llvm::Value * > & args) const114 llvm::Value *cos_impl::codegen_f128(llvm_state &s, const std::vector<llvm::Value *> &args) const
115 {
116     assert(args.size() == 1u);
117     assert(args[0] != nullptr);
118 
119     return call_extern_vec(s, args[0], "cosq");
120 }
121 
122 #endif
123 
eval_dbl(const std::unordered_map<std::string,double> & map,const std::vector<double> & pars) const124 double cos_impl::eval_dbl(const std::unordered_map<std::string, double> &map, const std::vector<double> &pars) const
125 {
126     assert(args().size() == 1u);
127 
128     return std::cos(heyoka::eval_dbl(args()[0], map, pars));
129 }
130 
eval_ldbl(const std::unordered_map<std::string,long double> & map,const std::vector<long double> & pars) const131 long double cos_impl::eval_ldbl(const std::unordered_map<std::string, long double> &map,
132                                 const std::vector<long double> &pars) const
133 {
134     assert(args().size() == 1u);
135 
136     return std::cos(heyoka::eval_ldbl(args()[0], map, pars));
137 }
138 
139 #if defined(HEYOKA_HAVE_REAL128)
eval_f128(const std::unordered_map<std::string,mppp::real128> & map,const std::vector<mppp::real128> & pars) const140 mppp::real128 cos_impl::eval_f128(const std::unordered_map<std::string, mppp::real128> &map,
141                                   const std::vector<mppp::real128> &pars) const
142 {
143     assert(args().size() == 1u);
144 
145     return mppp::cos(heyoka::eval_f128(args()[0], map, pars));
146 }
147 #endif
148 
eval_batch_dbl(std::vector<double> & out,const std::unordered_map<std::string,std::vector<double>> & map,const std::vector<double> & pars) const149 void cos_impl::eval_batch_dbl(std::vector<double> &out, const std::unordered_map<std::string, std::vector<double>> &map,
150                               const std::vector<double> &pars) const
151 {
152     assert(args().size() == 1u);
153 
154     heyoka::eval_batch_dbl(out, args()[0], map, pars);
155     for (auto &el : out) {
156         el = std::cos(el);
157     }
158 }
159 
eval_num_dbl(const std::vector<double> & a) const160 double cos_impl::eval_num_dbl(const std::vector<double> &a) const
161 {
162     if (a.size() != 1u) {
163         throw std::invalid_argument(
164             "Inconsistent number of arguments when computing the numerical value of the "
165             "cosine over doubles (1 argument was expected, but {} arguments were provided"_format(a.size()));
166     }
167 
168     return std::cos(a[0]);
169 }
170 
deval_num_dbl(const std::vector<double> & a,std::vector<double>::size_type i) const171 double cos_impl::deval_num_dbl(const std::vector<double> &a, std::vector<double>::size_type i) const
172 {
173     if (a.size() != 1u || i != 0u) {
174         throw std::invalid_argument("Inconsistent number of arguments or derivative requested when computing the "
175                                     "numerical derivative of the cosine");
176     }
177 
178     return -std::sin(a[0]);
179 }
180 
taylor_decompose(taylor_dc_t & u_vars_defs)181 taylor_dc_t::size_type cos_impl::taylor_decompose(taylor_dc_t &u_vars_defs) &&
182 {
183     assert(args().size() == 1u);
184 
185     // Append the sine decomposition.
186     u_vars_defs.emplace_back(sin(args()[0]), std::vector<std::uint32_t>{});
187 
188     // Append the cosine decomposition.
189     u_vars_defs.emplace_back(func{std::move(*this)}, std::vector<std::uint32_t>{});
190 
191     // Add the hidden deps.
192     (u_vars_defs.end() - 2)->second.push_back(boost::numeric_cast<std::uint32_t>(u_vars_defs.size() - 1u));
193     (u_vars_defs.end() - 1)->second.push_back(boost::numeric_cast<std::uint32_t>(u_vars_defs.size() - 2u));
194 
195     // Compute the return value (pointing to the
196     // decomposed cosine).
197     return u_vars_defs.size() - 1u;
198 }
199 
200 namespace
201 {
202 
203 // Derivative of cos(number).
204 template <typename T, typename U, std::enable_if_t<is_num_param_v<U>, int> = 0>
taylor_diff_cos_impl(llvm_state & s,const cos_impl & f,const std::vector<std::uint32_t> &,const U & num,const std::vector<llvm::Value * > &,llvm::Value * par_ptr,std::uint32_t,std::uint32_t order,std::uint32_t,std::uint32_t batch_size)205 llvm::Value *taylor_diff_cos_impl(llvm_state &s, const cos_impl &f, const std::vector<std::uint32_t> &, const U &num,
206                                   const std::vector<llvm::Value *> &, llvm::Value *par_ptr, std::uint32_t,
207                                   std::uint32_t order, std::uint32_t, std::uint32_t batch_size)
208 {
209     if (order == 0u) {
210         return codegen_from_values<T>(s, f, {taylor_codegen_numparam<T>(s, num, par_ptr, batch_size)});
211     } else {
212         return vector_splat(s.builder(), codegen<T>(s, number{0.}), batch_size);
213     }
214 }
215 
216 template <typename T>
taylor_diff_cos_impl(llvm_state & s,const cos_impl & f,const std::vector<std::uint32_t> & deps,const variable & var,const std::vector<llvm::Value * > & arr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t,std::uint32_t batch_size)217 llvm::Value *taylor_diff_cos_impl(llvm_state &s, const cos_impl &f, const std::vector<std::uint32_t> &deps,
218                                   const variable &var, const std::vector<llvm::Value *> &arr, llvm::Value *,
219                                   std::uint32_t n_uvars, std::uint32_t order, std::uint32_t, std::uint32_t batch_size)
220 {
221     auto &builder = s.builder();
222 
223     // Fetch the index of the variable.
224     const auto u_idx = uname_to_index(var.name());
225 
226     if (order == 0u) {
227         return codegen_from_values<T>(s, f, {taylor_fetch_diff(arr, u_idx, 0, n_uvars)});
228     }
229 
230     // NOTE: iteration in the [1, order] range
231     // (i.e., order included).
232     std::vector<llvm::Value *> sum;
233     for (std::uint32_t j = 1; j <= order; ++j) {
234         // NOTE: the only hidden dependency contains the index of the
235         // u variable whose definition is sin(var).
236         auto v0 = taylor_fetch_diff(arr, deps[0], order - j, n_uvars);
237         auto v1 = taylor_fetch_diff(arr, u_idx, j, n_uvars);
238 
239         auto fac = vector_splat(builder, codegen<T>(s, number(static_cast<T>(j))), batch_size);
240 
241         // Add j*v0*v1 to the sum.
242         sum.push_back(builder.CreateFMul(fac, builder.CreateFMul(v0, v1)));
243     }
244 
245     // Init the return value as the result of the sum.
246     auto ret_acc = pairwise_sum(builder, sum);
247 
248     // Compute and return the result: -ret_acc / order
249     auto div = vector_splat(builder, codegen<T>(s, number(-static_cast<T>(order))), batch_size);
250 
251     return builder.CreateFDiv(ret_acc, div);
252 }
253 
254 // All the other cases.
255 template <typename T, typename U, std::enable_if_t<!is_num_param_v<U>, int> = 0>
taylor_diff_cos_impl(llvm_state &,const cos_impl &,const std::vector<std::uint32_t> &,const U &,const std::vector<llvm::Value * > &,llvm::Value *,std::uint32_t,std::uint32_t,std::uint32_t,std::uint32_t)256 llvm::Value *taylor_diff_cos_impl(llvm_state &, const cos_impl &, const std::vector<std::uint32_t> &, const U &,
257                                   const std::vector<llvm::Value *> &, llvm::Value *, std::uint32_t, std::uint32_t,
258                                   std::uint32_t, std::uint32_t)
259 {
260     throw std::invalid_argument(
261         "An invalid argument type was encountered while trying to build the Taylor derivative of a cosine");
262 }
263 
264 template <typename T>
taylor_diff_cos(llvm_state & s,const cos_impl & f,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size)265 llvm::Value *taylor_diff_cos(llvm_state &s, const cos_impl &f, const std::vector<std::uint32_t> &deps,
266                              const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, std::uint32_t n_uvars,
267                              std::uint32_t order, std::uint32_t idx, std::uint32_t batch_size)
268 {
269     assert(f.args().size() == 1u);
270 
271     if (deps.size() != 1u) {
272         throw std::invalid_argument(
273             "A hidden dependency vector of size 1 is expected in order to compute the Taylor "
274             "derivative of the cosine, but a vector of size {} was passed instead"_format(deps.size()));
275     }
276 
277     return std::visit(
278         [&](const auto &v) {
279             return taylor_diff_cos_impl<T>(s, f, deps, v, arr, par_ptr, n_uvars, order, idx, batch_size);
280         },
281         f.args()[0].value());
282 }
283 
284 } // namespace
285 
taylor_diff_dbl(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const286 llvm::Value *cos_impl::taylor_diff_dbl(llvm_state &s, const std::vector<std::uint32_t> &deps,
287                                        const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
288                                        std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
289                                        std::uint32_t batch_size, bool) const
290 {
291     return taylor_diff_cos<double>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
292 }
293 
taylor_diff_ldbl(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const294 llvm::Value *cos_impl::taylor_diff_ldbl(llvm_state &s, const std::vector<std::uint32_t> &deps,
295                                         const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
296                                         std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
297                                         std::uint32_t batch_size, bool) const
298 {
299     return taylor_diff_cos<long double>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
300 }
301 
302 #if defined(HEYOKA_HAVE_REAL128)
303 
taylor_diff_f128(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const304 llvm::Value *cos_impl::taylor_diff_f128(llvm_state &s, const std::vector<std::uint32_t> &deps,
305                                         const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
306                                         std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
307                                         std::uint32_t batch_size, bool) const
308 {
309     return taylor_diff_cos<mppp::real128>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
310 }
311 
312 #endif
313 
314 namespace
315 {
316 
317 // Derivative of cos(number).
318 template <typename T, typename U, std::enable_if_t<is_num_param_v<U>, int> = 0>
taylor_c_diff_func_cos_impl(llvm_state & s,const cos_impl & fn,const U & num,std::uint32_t n_uvars,std::uint32_t batch_size)319 llvm::Function *taylor_c_diff_func_cos_impl(llvm_state &s, const cos_impl &fn, const U &num, std::uint32_t n_uvars,
320                                             std::uint32_t batch_size)
321 {
322     return taylor_c_diff_func_unary_num_det<T>(s, fn, num, n_uvars, batch_size, "cos", 1);
323 }
324 
325 // Derivative of cos(variable).
326 template <typename T>
taylor_c_diff_func_cos_impl(llvm_state & s,const cos_impl & fn,const variable & var,std::uint32_t n_uvars,std::uint32_t batch_size)327 llvm::Function *taylor_c_diff_func_cos_impl(llvm_state &s, const cos_impl &fn, const variable &var,
328                                             std::uint32_t n_uvars, std::uint32_t batch_size)
329 {
330     auto &module = s.module();
331     auto &builder = s.builder();
332     auto &context = s.context();
333 
334     // Fetch the floating-point type.
335     auto val_t = to_llvm_vector_type<T>(context, batch_size);
336 
337     // Fetch the function name and arguments.
338     const auto na_pair = taylor_c_diff_func_name_args<T>(context, "cos", n_uvars, batch_size, {var}, 1);
339     const auto &fname = na_pair.first;
340     const auto &fargs = na_pair.second;
341 
342     // Try to see if we already created the function.
343     auto f = module.getFunction(fname);
344 
345     if (f == nullptr) {
346         // The function was not created before, do it now.
347 
348         // Fetch the current insertion block.
349         auto orig_bb = builder.GetInsertBlock();
350 
351         // The return type is val_t.
352         auto *ft = llvm::FunctionType::get(val_t, fargs, false);
353         // Create the function
354         f = llvm::Function::Create(ft, llvm::Function::InternalLinkage, fname, &module);
355         assert(f != nullptr);
356 
357         // Fetch the necessary function arguments.
358         auto ord = f->args().begin();
359         auto diff_ptr = f->args().begin() + 2;
360         auto var_idx = f->args().begin() + 5;
361         auto dep_idx = f->args().begin() + 6;
362 
363         // Create a new basic block to start insertion into.
364         builder.SetInsertPoint(llvm::BasicBlock::Create(context, "entry", f));
365 
366         // Create the return value.
367         auto retval = builder.CreateAlloca(val_t);
368 
369         // Create the accumulator.
370         auto acc = builder.CreateAlloca(val_t);
371 
372         llvm_if_then_else(
373             s, builder.CreateICmpEQ(ord, builder.getInt32(0)),
374             [&]() {
375                 // For order 0, invoke the function on the order 0 of var_idx.
376                 builder.CreateStore(
377                     codegen_from_values<T>(s, fn,
378                                            {taylor_c_load_diff(s, diff_ptr, n_uvars, builder.getInt32(0), var_idx)}),
379                     retval);
380             },
381             [&]() {
382                 // Init the accumulator.
383                 builder.CreateStore(vector_splat(builder, codegen<T>(s, number{0.}), batch_size), acc);
384 
385                 // Run the loop.
386                 llvm_loop_u32(s, builder.getInt32(1), builder.CreateAdd(ord, builder.getInt32(1)), [&](llvm::Value *j) {
387                     auto b_nj = taylor_c_load_diff(s, diff_ptr, n_uvars, builder.CreateSub(ord, j), dep_idx);
388                     auto cj = taylor_c_load_diff(s, diff_ptr, n_uvars, j, var_idx);
389 
390                     auto j_v = vector_splat(builder, builder.CreateUIToFP(j, to_llvm_type<T>(context)), batch_size);
391 
392                     builder.CreateStore(builder.CreateFAdd(builder.CreateLoad(acc),
393                                                            builder.CreateFMul(j_v, builder.CreateFMul(b_nj, cj))),
394                                         acc);
395                 });
396 
397                 // Divide by the order and negate to produce the return value.
398                 auto ord_v = vector_splat(builder, builder.CreateUIToFP(ord, to_llvm_type<T>(context)), batch_size);
399                 builder.CreateStore(builder.CreateFDiv(builder.CreateLoad(acc), builder.CreateFNeg(ord_v)), retval);
400             });
401 
402         // Return the result.
403         builder.CreateRet(builder.CreateLoad(retval));
404 
405         // Verify.
406         s.verify_function(f);
407 
408         // Restore the original insertion block.
409         builder.SetInsertPoint(orig_bb);
410     } else {
411         // The function was created before. Check if the signatures match.
412         // NOTE: there could be a mismatch if the derivative function was created
413         // and then optimised - optimisation might remove arguments which are compile-time
414         // constants.
415         if (!compare_function_signature(f, val_t, fargs)) {
416             throw std::invalid_argument(
417                 "Inconsistent function signature for the Taylor derivative of the cosine in compact mode detected");
418         }
419     }
420 
421     return f;
422 }
423 
424 // All the other cases.
425 template <typename T, typename U, std::enable_if_t<!is_num_param_v<U>, int> = 0>
taylor_c_diff_func_cos_impl(llvm_state &,const cos_impl &,const U &,std::uint32_t,std::uint32_t)426 llvm::Function *taylor_c_diff_func_cos_impl(llvm_state &, const cos_impl &, const U &, std::uint32_t, std::uint32_t)
427 {
428     throw std::invalid_argument("An invalid argument type was encountered while trying to build the Taylor derivative "
429                                 "of a cosine in compact mode");
430 }
431 
432 template <typename T>
taylor_c_diff_func_cos(llvm_state & s,const cos_impl & fn,std::uint32_t n_uvars,std::uint32_t batch_size)433 llvm::Function *taylor_c_diff_func_cos(llvm_state &s, const cos_impl &fn, std::uint32_t n_uvars,
434                                        std::uint32_t batch_size)
435 {
436     assert(fn.args().size() == 1u);
437 
438     return std::visit([&](const auto &v) { return taylor_c_diff_func_cos_impl<T>(s, fn, v, n_uvars, batch_size); },
439                       fn.args()[0].value());
440 }
441 
442 } // namespace
443 
taylor_c_diff_func_dbl(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const444 llvm::Function *cos_impl::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
445                                                  bool) const
446 {
447     return taylor_c_diff_func_cos<double>(s, *this, n_uvars, batch_size);
448 }
449 
taylor_c_diff_func_ldbl(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const450 llvm::Function *cos_impl::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
451                                                   bool) const
452 {
453     return taylor_c_diff_func_cos<long double>(s, *this, n_uvars, batch_size);
454 }
455 
456 #if defined(HEYOKA_HAVE_REAL128)
457 
taylor_c_diff_func_f128(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const458 llvm::Function *cos_impl::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
459                                                   bool) const
460 {
461     return taylor_c_diff_func_cos<mppp::real128>(s, *this, n_uvars, batch_size);
462 }
463 
464 #endif
465 
gradient() const466 std::vector<expression> cos_impl::gradient() const
467 {
468     assert(args().size() == 1u);
469     return {-sin(args()[0])};
470 }
471 
472 } // namespace detail
473 
cos(expression e)474 expression cos(expression e)
475 {
476     if (auto fptr = detail::is_neg(e)) {
477         // Simplify cos(-x) to cos(x).
478         assert(fptr->args().size() == 1u);
479         return cos(fptr->args()[0]);
480     } else {
481         // Simplify cos(number) to its value.
482         if (auto num_ptr = std::get_if<number>(&e.value())) {
483             return expression{std::visit(
484                 [](const auto &x) {
485                     using std::cos;
486 
487                     return number{cos(x)};
488                 },
489                 num_ptr->value())};
490         } else {
491             return expression{func{detail::cos_impl(std::move(e))}};
492         }
493     }
494 }
495 
496 } // namespace heyoka
497 
498 HEYOKA_S11N_FUNC_EXPORT_IMPLEMENT(heyoka::detail::cos_impl)
499