1 // Copyright 2020, 2021 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com)
2 //
3 // This file is part of the heyoka library.
4 //
5 // This Source Code Form is subject to the terms of the Mozilla
6 // Public License v. 2.0. If a copy of the MPL was not distributed
7 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 
9 #include <heyoka/config.hpp>
10 
11 #include <cassert>
12 #include <cstdint>
13 #include <initializer_list>
14 #include <stdexcept>
15 #include <string>
16 #include <type_traits>
17 #include <unordered_map>
18 #include <utility>
19 #include <variant>
20 #include <vector>
21 
22 #include <boost/numeric/conversion/cast.hpp>
23 
24 #include <fmt/format.h>
25 
26 #include <llvm/IR/Attributes.h>
27 #include <llvm/IR/BasicBlock.h>
28 #include <llvm/IR/DerivedTypes.h>
29 #include <llvm/IR/Function.h>
30 #include <llvm/IR/IRBuilder.h>
31 #include <llvm/IR/LLVMContext.h>
32 #include <llvm/IR/Module.h>
33 #include <llvm/IR/Type.h>
34 #include <llvm/IR/Value.h>
35 #include <llvm/Support/Casting.h>
36 
37 #if defined(HEYOKA_HAVE_REAL128)
38 
39 #include <mp++/real128.hpp>
40 
41 #endif
42 
43 #include <heyoka/detail/llvm_helpers.hpp>
44 #include <heyoka/detail/llvm_vector_type.hpp>
45 #include <heyoka/detail/sleef.hpp>
46 #include <heyoka/detail/string_conv.hpp>
47 #include <heyoka/detail/taylor_common.hpp>
48 #include <heyoka/expression.hpp>
49 #include <heyoka/func.hpp>
50 #include <heyoka/llvm_state.hpp>
51 #include <heyoka/math/acos.hpp>
52 #include <heyoka/math/pow.hpp>
53 #include <heyoka/math/sqrt.hpp>
54 #include <heyoka/math/square.hpp>
55 #include <heyoka/number.hpp>
56 #include <heyoka/s11n.hpp>
57 #include <heyoka/taylor.hpp>
58 #include <heyoka/variable.hpp>
59 
60 #if defined(_MSC_VER) && !defined(__clang__)
61 
62 // NOTE: MSVC has issues with the other "using"
63 // statement form.
64 using namespace fmt::literals;
65 
66 #else
67 
68 using fmt::literals::operator""_format;
69 
70 #endif
71 
72 namespace heyoka
73 {
74 
75 namespace detail
76 {
77 
acos_impl(expression e)78 acos_impl::acos_impl(expression e) : func_base("acos", std::vector{std::move(e)}) {}
79 
acos_impl()80 acos_impl::acos_impl() : acos_impl(0_dbl) {}
81 
gradient() const82 std::vector<expression> acos_impl::gradient() const
83 {
84     assert(args().size() == 1u);
85     return {-pow(1_dbl - square(args()[0]), -.5)};
86 }
87 
codegen_dbl(llvm_state & s,const std::vector<llvm::Value * > & args) const88 llvm::Value *acos_impl::codegen_dbl(llvm_state &s, const std::vector<llvm::Value *> &args) const
89 {
90     assert(args.size() == 1u);
91     assert(args[0] != nullptr);
92 
93     if (auto vec_t = llvm::dyn_cast<llvm_vector_type>(args[0]->getType())) {
94         if (const auto sfn = sleef_function_name(s.context(), "acos", vec_t->getElementType(),
95                                                  boost::numeric_cast<std::uint32_t>(vec_t->getNumElements()));
96             !sfn.empty()) {
97             return llvm_invoke_external(
98                 s, sfn, vec_t, args,
99                 // NOTE: in theory we may add ReadNone here as well,
100                 // but for some reason, at least up to LLVM 10,
101                 // this causes strange codegen issues. Revisit
102                 // in the future.
103                 {llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn});
104         }
105     }
106 
107     return call_extern_vec(s, args[0], "acos");
108 }
109 
codegen_ldbl(llvm_state & s,const std::vector<llvm::Value * > & args) const110 llvm::Value *acos_impl::codegen_ldbl(llvm_state &s, const std::vector<llvm::Value *> &args) const
111 {
112     assert(args.size() == 1u);
113     assert(args[0] != nullptr);
114 
115     return call_extern_vec(s, args[0],
116 #if defined(_MSC_VER)
117                            // NOTE: it seems like the MSVC stdlib does not have an acosl function,
118                            // because LLVM complains about the symbol "acosl" not being
119                            // defined. Hence, use our own wrapper instead.
120                            "heyoka_acosl"
121 #else
122                            "acosl"
123 #endif
124     );
125 }
126 
127 #if defined(HEYOKA_HAVE_REAL128)
128 
codegen_f128(llvm_state & s,const std::vector<llvm::Value * > & args) const129 llvm::Value *acos_impl::codegen_f128(llvm_state &s, const std::vector<llvm::Value *> &args) const
130 {
131     assert(args.size() == 1u);
132     assert(args[0] != nullptr);
133 
134     return call_extern_vec(s, args[0], "acosq");
135 }
136 
137 #endif
138 
eval_dbl(const std::unordered_map<std::string,double> & map,const std::vector<double> & pars) const139 double acos_impl::eval_dbl(const std::unordered_map<std::string, double> &map, const std::vector<double> &pars) const
140 {
141     assert(args().size() == 1u);
142 
143     return std::acos(heyoka::eval_dbl(args()[0], map, pars));
144 }
145 
eval_ldbl(const std::unordered_map<std::string,long double> & map,const std::vector<long double> & pars) const146 long double acos_impl::eval_ldbl(const std::unordered_map<std::string, long double> &map,
147                                  const std::vector<long double> &pars) const
148 {
149     assert(args().size() == 1u);
150 
151     return std::acos(heyoka::eval_ldbl(args()[0], map, pars));
152 }
153 
154 #if defined(HEYOKA_HAVE_REAL128)
eval_f128(const std::unordered_map<std::string,mppp::real128> & map,const std::vector<mppp::real128> & pars) const155 mppp::real128 acos_impl::eval_f128(const std::unordered_map<std::string, mppp::real128> &map,
156                                    const std::vector<mppp::real128> &pars) const
157 {
158     assert(args().size() == 1u);
159 
160     return mppp::acos(heyoka::eval_f128(args()[0], map, pars));
161 }
162 #endif
163 
taylor_decompose(taylor_dc_t & u_vars_defs)164 taylor_dc_t::size_type acos_impl::taylor_decompose(taylor_dc_t &u_vars_defs) &&
165 {
166     assert(args().size() == 1u);
167 
168     // Append arg * arg.
169     u_vars_defs.emplace_back(square(args()[0]), std::vector<std::uint32_t>{});
170 
171     // Append 1 - arg * arg.
172     u_vars_defs.emplace_back(1_dbl - expression{"u_{}"_format(u_vars_defs.size() - 1u)}, std::vector<std::uint32_t>{});
173 
174     // Append sqrt(1 - arg * arg).
175     u_vars_defs.emplace_back(sqrt(expression{"u_{}"_format(u_vars_defs.size() - 1u)}), std::vector<std::uint32_t>{});
176 
177     // Append the acos decomposition.
178     u_vars_defs.emplace_back(func{std::move(*this)}, std::vector<std::uint32_t>{});
179 
180     // Add the hidden dep.
181     (u_vars_defs.end() - 1)->second.push_back(boost::numeric_cast<std::uint32_t>(u_vars_defs.size() - 2u));
182 
183     // Compute the return value (pointing to the
184     // decomposed acos).
185     return u_vars_defs.size() - 1u;
186 }
187 
188 namespace
189 {
190 
191 // Derivative of acos(number).
192 template <typename T, typename U, std::enable_if_t<is_num_param_v<U>, int> = 0>
taylor_diff_acos_impl(llvm_state & s,const acos_impl & f,const std::vector<std::uint32_t> &,const U & num,const std::vector<llvm::Value * > &,llvm::Value * par_ptr,std::uint32_t,std::uint32_t order,std::uint32_t,std::uint32_t batch_size)193 llvm::Value *taylor_diff_acos_impl(llvm_state &s, const acos_impl &f, const std::vector<std::uint32_t> &, const U &num,
194                                    const std::vector<llvm::Value *> &, llvm::Value *par_ptr, std::uint32_t,
195                                    std::uint32_t order, std::uint32_t, std::uint32_t batch_size)
196 {
197     if (order == 0u) {
198         return codegen_from_values<T>(s, f, {taylor_codegen_numparam<T>(s, num, par_ptr, batch_size)});
199     } else {
200         return vector_splat(s.builder(), codegen<T>(s, number{0.}), batch_size);
201     }
202 }
203 
204 template <typename T>
taylor_diff_acos_impl(llvm_state & s,const acos_impl & f,const std::vector<std::uint32_t> & deps,const variable & var,const std::vector<llvm::Value * > & arr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size)205 llvm::Value *taylor_diff_acos_impl(llvm_state &s, const acos_impl &f, const std::vector<std::uint32_t> &deps,
206                                    const variable &var, const std::vector<llvm::Value *> &arr, llvm::Value *,
207                                    std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
208                                    std::uint32_t batch_size)
209 {
210     assert(deps.size() == 1u);
211 
212     auto &builder = s.builder();
213 
214     // Fetch the index of the variable argument.
215     const auto b_idx = uname_to_index(var.name());
216 
217     if (order == 0u) {
218         return codegen_from_values<T>(s, f, {taylor_fetch_diff(arr, b_idx, 0, n_uvars)});
219     }
220 
221     if (order == 1u) {
222         // Special-case the first-order derivative, in order
223         // to avoid an empty summation below.
224         return builder.CreateFNeg(
225             builder.CreateFDiv(taylor_fetch_diff(arr, b_idx, 1, n_uvars), taylor_fetch_diff(arr, deps[0], 0, n_uvars)));
226     }
227 
228     // Create the fp version of the order.
229     auto ord_fp = vector_splat(builder, codegen<T>(s, number(static_cast<T>(order))), batch_size);
230 
231     // Assemble the first part of the result: n*b^[n].
232     auto ret = builder.CreateFMul(ord_fp, taylor_fetch_diff(arr, b_idx, order, n_uvars));
233 
234     // Compute -n*c^[0].
235     auto n_c0 = builder.CreateFNeg(builder.CreateFMul(ord_fp, taylor_fetch_diff(arr, deps[0], 0, n_uvars)));
236 
237     // NOTE: iteration in the [1, order) range.
238     std::vector<llvm::Value *> sum;
239     for (std::uint32_t j = 1; j < order; ++j) {
240         // NOTE: the only hidden dependency contains the index of the
241         // u variable whose definition is sqrt(1 - var * var).
242         auto cnj = taylor_fetch_diff(arr, deps[0], order - j, n_uvars);
243         auto aj = taylor_fetch_diff(arr, idx, j, n_uvars);
244 
245         auto fac = vector_splat(builder, codegen<T>(s, number(static_cast<T>(j))), batch_size);
246 
247         // Add j*cnj*aj to the sum.
248         sum.push_back(builder.CreateFMul(fac, builder.CreateFMul(cnj, aj)));
249     }
250 
251     // Update ret.
252     ret = builder.CreateFAdd(ret, pairwise_sum(builder, sum));
253 
254     // Divide by -n*c^[0] and return.
255     return builder.CreateFDiv(ret, n_c0);
256 }
257 
258 // All the other cases.
259 template <typename T, typename U, std::enable_if_t<!is_num_param_v<U>, int> = 0>
taylor_diff_acos_impl(llvm_state &,const acos_impl &,const std::vector<std::uint32_t> &,const U &,const std::vector<llvm::Value * > &,llvm::Value *,std::uint32_t,std::uint32_t,std::uint32_t,std::uint32_t)260 llvm::Value *taylor_diff_acos_impl(llvm_state &, const acos_impl &, const std::vector<std::uint32_t> &, const U &,
261                                    const std::vector<llvm::Value *> &, llvm::Value *, std::uint32_t, std::uint32_t,
262                                    std::uint32_t, std::uint32_t)
263 {
264     throw std::invalid_argument(
265         "An invalid argument type was encountered while trying to build the Taylor derivative of an inverse cosine");
266 }
267 
268 template <typename T>
taylor_diff_acos(llvm_state & s,const acos_impl & f,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size)269 llvm::Value *taylor_diff_acos(llvm_state &s, const acos_impl &f, const std::vector<std::uint32_t> &deps,
270                               const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, std::uint32_t n_uvars,
271                               std::uint32_t order, std::uint32_t idx, std::uint32_t batch_size)
272 {
273     assert(f.args().size() == 1u);
274 
275     if (deps.size() != 1u) {
276         throw std::invalid_argument(
277             "A hidden dependency vector of size 1 is expected in order to compute the Taylor "
278             "derivative of the inverse cosine, but a vector of size {} was passed instead"_format(deps.size()));
279     }
280 
281     return std::visit(
282         [&](const auto &v) {
283             return taylor_diff_acos_impl<T>(s, f, deps, v, arr, par_ptr, n_uvars, order, idx, batch_size);
284         },
285         f.args()[0].value());
286 }
287 
288 } // namespace
289 
taylor_diff_dbl(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const290 llvm::Value *acos_impl::taylor_diff_dbl(llvm_state &s, const std::vector<std::uint32_t> &deps,
291                                         const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
292                                         std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
293                                         std::uint32_t batch_size, bool) const
294 {
295     return taylor_diff_acos<double>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
296 }
297 
taylor_diff_ldbl(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const298 llvm::Value *acos_impl::taylor_diff_ldbl(llvm_state &s, const std::vector<std::uint32_t> &deps,
299                                          const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
300                                          std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
301                                          std::uint32_t batch_size, bool) const
302 {
303     return taylor_diff_acos<long double>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
304 }
305 
306 #if defined(HEYOKA_HAVE_REAL128)
307 
taylor_diff_f128(llvm_state & s,const std::vector<std::uint32_t> & deps,const std::vector<llvm::Value * > & arr,llvm::Value * par_ptr,llvm::Value *,std::uint32_t n_uvars,std::uint32_t order,std::uint32_t idx,std::uint32_t batch_size,bool) const308 llvm::Value *acos_impl::taylor_diff_f128(llvm_state &s, const std::vector<std::uint32_t> &deps,
309                                          const std::vector<llvm::Value *> &arr, llvm::Value *par_ptr, llvm::Value *,
310                                          std::uint32_t n_uvars, std::uint32_t order, std::uint32_t idx,
311                                          std::uint32_t batch_size, bool) const
312 {
313     return taylor_diff_acos<mppp::real128>(s, *this, deps, arr, par_ptr, n_uvars, order, idx, batch_size);
314 }
315 
316 #endif
317 
318 namespace
319 {
320 
321 // Derivative of acos(number).
322 template <typename T, typename U, std::enable_if_t<is_num_param_v<U>, int> = 0>
taylor_c_diff_func_acos_impl(llvm_state & s,const acos_impl & fn,const U & num,std::uint32_t n_uvars,std::uint32_t batch_size)323 llvm::Function *taylor_c_diff_func_acos_impl(llvm_state &s, const acos_impl &fn, const U &num, std::uint32_t n_uvars,
324                                              std::uint32_t batch_size)
325 {
326     return taylor_c_diff_func_unary_num_det<T>(s, fn, num, n_uvars, batch_size, "acos", 1);
327 }
328 
329 // Derivative of acos(variable).
330 template <typename T>
taylor_c_diff_func_acos_impl(llvm_state & s,const acos_impl & fn,const variable & var,std::uint32_t n_uvars,std::uint32_t batch_size)331 llvm::Function *taylor_c_diff_func_acos_impl(llvm_state &s, const acos_impl &fn, const variable &var,
332                                              std::uint32_t n_uvars, std::uint32_t batch_size)
333 {
334     auto &module = s.module();
335     auto &builder = s.builder();
336     auto &context = s.context();
337 
338     // Fetch the floating-point type.
339     auto val_t = to_llvm_vector_type<T>(context, batch_size);
340 
341     // Fetch the function name and arguments.
342     const auto na_pair = taylor_c_diff_func_name_args<T>(context, "acos", n_uvars, batch_size, {var}, 1);
343     const auto &fname = na_pair.first;
344     const auto &fargs = na_pair.second;
345 
346     // Try to see if we already created the function.
347     auto f = module.getFunction(fname);
348 
349     if (f == nullptr) {
350         // The function was not created before, do it now.
351 
352         // Fetch the current insertion block.
353         auto orig_bb = builder.GetInsertBlock();
354 
355         // The return type is val_t.
356         auto *ft = llvm::FunctionType::get(val_t, fargs, false);
357         // Create the function
358         f = llvm::Function::Create(ft, llvm::Function::InternalLinkage, fname, &module);
359         assert(f != nullptr);
360 
361         // Fetch the necessary function arguments.
362         auto ord = f->args().begin();
363         auto a_idx = f->args().begin() + 1;
364         auto diff_ptr = f->args().begin() + 2;
365         auto b_idx = f->args().begin() + 5;
366         auto c_idx = f->args().begin() + 6;
367 
368         // Create a new basic block to start insertion into.
369         builder.SetInsertPoint(llvm::BasicBlock::Create(context, "entry", f));
370 
371         // Create the return value.
372         auto retval = builder.CreateAlloca(val_t);
373 
374         // Create the accumulator.
375         auto acc = builder.CreateAlloca(val_t);
376 
377         llvm_if_then_else(
378             s, builder.CreateICmpEQ(ord, builder.getInt32(0)),
379             [&]() {
380                 // For order 0, invoke the function on the order 0 of b_idx.
381                 builder.CreateStore(codegen_from_values<T>(
382                                         s, fn, {taylor_c_load_diff(s, diff_ptr, n_uvars, builder.getInt32(0), b_idx)}),
383                                     retval);
384             },
385             [&]() {
386                 // Compute the fp version of the order.
387                 auto ord_fp = vector_splat(builder, builder.CreateUIToFP(ord, to_llvm_type<T>(context)), batch_size);
388 
389                 // Compute n*b^[n].
390                 auto ret = builder.CreateFMul(ord_fp, taylor_c_load_diff(s, diff_ptr, n_uvars, ord, b_idx));
391 
392                 // Compute -n*c^[0].
393                 auto n_c0 = builder.CreateFNeg(
394                     builder.CreateFMul(ord_fp, taylor_c_load_diff(s, diff_ptr, n_uvars, builder.getInt32(0), c_idx)));
395 
396                 // Init the accumulator.
397                 builder.CreateStore(vector_splat(builder, codegen<T>(s, number{0.}), batch_size), acc);
398 
399                 // Run the loop.
400                 llvm_loop_u32(s, builder.getInt32(1), ord, [&](llvm::Value *j) {
401                     auto c_nj = taylor_c_load_diff(s, diff_ptr, n_uvars, builder.CreateSub(ord, j), c_idx);
402                     auto aj = taylor_c_load_diff(s, diff_ptr, n_uvars, j, a_idx);
403 
404                     auto fac = vector_splat(builder, builder.CreateUIToFP(j, to_llvm_type<T>(context)), batch_size);
405 
406                     builder.CreateStore(builder.CreateFAdd(builder.CreateLoad(acc),
407                                                            builder.CreateFMul(fac, builder.CreateFMul(c_nj, aj))),
408                                         acc);
409                 });
410 
411                 // Update ret.
412                 ret = builder.CreateFAdd(ret, builder.CreateLoad(acc));
413 
414                 // Divide by -n*c^[0].
415                 ret = builder.CreateFDiv(ret, n_c0);
416 
417                 // Store into retval.
418                 builder.CreateStore(ret, retval);
419             });
420 
421         // Return the result.
422         builder.CreateRet(builder.CreateLoad(retval));
423 
424         // Verify.
425         s.verify_function(f);
426 
427         // Restore the original insertion block.
428         builder.SetInsertPoint(orig_bb);
429     } else {
430         // The function was created before. Check if the signatures match.
431         // NOTE: there could be a mismatch if the derivative function was created
432         // and then optimised - optimisation might remove arguments which are compile-time
433         // constants.
434         if (!compare_function_signature(f, val_t, fargs)) {
435             throw std::invalid_argument(
436                 "Inconsistent function signature for the Taylor derivative of the inverse cosine "
437                 "in compact mode detected");
438         }
439     }
440 
441     return f;
442 }
443 
444 // All the other cases.
445 template <typename T, typename U, std::enable_if_t<!is_num_param_v<U>, int> = 0>
taylor_c_diff_func_acos_impl(llvm_state &,const acos_impl &,const U &,std::uint32_t,std::uint32_t)446 llvm::Function *taylor_c_diff_func_acos_impl(llvm_state &, const acos_impl &, const U &, std::uint32_t, std::uint32_t)
447 {
448     throw std::invalid_argument("An invalid argument type was encountered while trying to build the Taylor derivative "
449                                 "of an inverse cosine in compact mode");
450 }
451 
452 template <typename T>
taylor_c_diff_func_acos(llvm_state & s,const acos_impl & fn,std::uint32_t n_uvars,std::uint32_t batch_size)453 llvm::Function *taylor_c_diff_func_acos(llvm_state &s, const acos_impl &fn, std::uint32_t n_uvars,
454                                         std::uint32_t batch_size)
455 {
456     assert(fn.args().size() == 1u);
457 
458     return std::visit([&](const auto &v) { return taylor_c_diff_func_acos_impl<T>(s, fn, v, n_uvars, batch_size); },
459                       fn.args()[0].value());
460 }
461 
462 } // namespace
463 
taylor_c_diff_func_dbl(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const464 llvm::Function *acos_impl::taylor_c_diff_func_dbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
465                                                   bool) const
466 {
467     return taylor_c_diff_func_acos<double>(s, *this, n_uvars, batch_size);
468 }
469 
taylor_c_diff_func_ldbl(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const470 llvm::Function *acos_impl::taylor_c_diff_func_ldbl(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
471                                                    bool) const
472 {
473     return taylor_c_diff_func_acos<long double>(s, *this, n_uvars, batch_size);
474 }
475 
476 #if defined(HEYOKA_HAVE_REAL128)
477 
taylor_c_diff_func_f128(llvm_state & s,std::uint32_t n_uvars,std::uint32_t batch_size,bool) const478 llvm::Function *acos_impl::taylor_c_diff_func_f128(llvm_state &s, std::uint32_t n_uvars, std::uint32_t batch_size,
479                                                    bool) const
480 {
481     return taylor_c_diff_func_acos<mppp::real128>(s, *this, n_uvars, batch_size);
482 }
483 
484 #endif
485 
486 } // namespace detail
487 
acos(expression e)488 expression acos(expression e)
489 {
490     return expression{func{detail::acos_impl(std::move(e))}};
491 }
492 
493 } // namespace heyoka
494 
495 HEYOKA_S11N_FUNC_EXPORT_IMPLEMENT(heyoka::detail::acos_impl)
496