1 // Copyright 2020, 2021 Francesco Biscani (bluescarni@gmail.com), Dario Izzo (dario.izzo@gmail.com)
2 //
3 // This file is part of the heyoka library.
4 //
5 // This Source Code Form is subject to the terms of the Mozilla
6 // Public License v. 2.0. If a copy of the MPL was not distributed
7 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
8
9 #include <heyoka/config.hpp>
10
11 #include <algorithm>
12 #include <cassert>
13 #include <cmath>
14 #include <cstddef>
15 #include <cstdint>
16 #include <functional>
17 #include <initializer_list>
18 #include <limits>
19 #include <stdexcept>
20 #include <string>
21 #include <typeindex>
22 #include <typeinfo>
23 #include <unordered_map>
24 #include <utility>
25 #include <vector>
26
27 #include <boost/math/constants/constants.hpp>
28 #include <boost/numeric/conversion/cast.hpp>
29
30 #include <fmt/format.h>
31
32 #include <llvm/Config/llvm-config.h>
33 #include <llvm/IR/Attributes.h>
34 #include <llvm/IR/BasicBlock.h>
35 #include <llvm/IR/Constant.h>
36 #include <llvm/IR/Constants.h>
37 #include <llvm/IR/DataLayout.h>
38 #include <llvm/IR/DerivedTypes.h>
39 #include <llvm/IR/Function.h>
40 #include <llvm/IR/GlobalVariable.h>
41 #include <llvm/IR/IRBuilder.h>
42 #include <llvm/IR/InstrTypes.h>
43 #include <llvm/IR/Intrinsics.h>
44 #include <llvm/IR/LLVMContext.h>
45 #include <llvm/IR/Module.h>
46 #include <llvm/IR/Operator.h>
47 #include <llvm/IR/Type.h>
48 #include <llvm/IR/Value.h>
49 #include <llvm/Support/Alignment.h>
50 #include <llvm/Support/Casting.h>
51 #include <llvm/Support/raw_ostream.h>
52
53 #if defined(HEYOKA_HAVE_REAL128)
54
55 #include <mp++/real128.hpp>
56
57 #endif
58
59 #include <heyoka/detail/binomial.hpp>
60 #include <heyoka/detail/llvm_fwd.hpp>
61 #include <heyoka/detail/llvm_helpers.hpp>
62 #include <heyoka/detail/llvm_vector_type.hpp>
63 #include <heyoka/detail/logging_impl.hpp>
64 #include <heyoka/detail/sleef.hpp>
65 #include <heyoka/detail/visibility.hpp>
66 #include <heyoka/llvm_state.hpp>
67 #include <heyoka/number.hpp>
68
69 #if defined(_MSC_VER) && !defined(__clang__)
70
71 // NOTE: MSVC has issues with the other "using"
72 // statement form.
73 using namespace fmt::literals;
74
75 #else
76
77 using fmt::literals::operator""_format;
78
79 #endif
80
81 namespace heyoka::detail
82 {
83
84 namespace
85 {
86
87 // The global type map to associate a C++ type to an LLVM type.
__anonb5603a6a0202() 88 const auto type_map = []() {
89 std::unordered_map<std::type_index, llvm::Type *(*)(llvm::LLVMContext &)> retval;
90
91 // Try to associate C++ double to LLVM double.
92 if (std::numeric_limits<double>::is_iec559 && std::numeric_limits<double>::digits == 53) {
93 retval[typeid(double)] = [](llvm::LLVMContext &c) {
94 auto ret = llvm::Type::getDoubleTy(c);
95 assert(ret != nullptr);
96 return ret;
97 };
98 }
99
100 // Try to associate C++ long double to an LLVM fp type.
101 if (std::numeric_limits<long double>::is_iec559) {
102 if (std::numeric_limits<long double>::digits == 53) {
103 retval[typeid(long double)] = [](llvm::LLVMContext &c) {
104 // IEEE double-precision format (this is the case on MSVC for instance).
105 auto ret = llvm::Type::getDoubleTy(c);
106 assert(ret != nullptr);
107 return ret;
108 };
109 #if defined(HEYOKA_ARCH_X86)
110 } else if (std::numeric_limits<long double>::digits == 64) {
111 retval[typeid(long double)] = [](llvm::LLVMContext &c) {
112 // x86 extended precision format.
113 auto ret = llvm::Type::getX86_FP80Ty(c);
114 assert(ret != nullptr);
115 return ret;
116 };
117 #endif
118 } else if (std::numeric_limits<long double>::digits == 113) {
119 retval[typeid(long double)] = [](llvm::LLVMContext &c) {
120 // IEEE quadruple-precision format (e.g., ARM 64).
121 auto ret = llvm::Type::getFP128Ty(c);
122 assert(ret != nullptr);
123 return ret;
124 };
125 }
126 }
127
128 #if defined(HEYOKA_HAVE_REAL128)
129
130 // Associate mppp::real128 to fp128.
131 retval[typeid(mppp::real128)] = [](llvm::LLVMContext &c) {
132 auto ret = llvm::Type::getFP128Ty(c);
133 assert(ret != nullptr);
134 return ret;
135 };
136
137 #endif
138
139 return retval;
140 }();
141
142 } // namespace
143
144 // Implementation of the function to associate a C++ type to
145 // an LLVM type.
to_llvm_type_impl(llvm::LLVMContext & c,const std::type_info & tp)146 llvm::Type *to_llvm_type_impl(llvm::LLVMContext &c, const std::type_info &tp)
147 {
148 const auto it = type_map.find(tp);
149
150 if (it == type_map.end()) {
151 throw std::invalid_argument("Unable to associate the C++ type '{}' to an LLVM type"_format(tp.name()));
152 } else {
153 return it->second(c);
154 }
155 }
156
157 // Helper to produce a unique string for the type t.
llvm_mangle_type(llvm::Type * t)158 std::string llvm_mangle_type(llvm::Type *t)
159 {
160 assert(t != nullptr);
161
162 if (auto *v_t = llvm::dyn_cast<llvm_vector_type>(t)) {
163 // If the type is a vector, get the name of the element type
164 // and append the vector size.
165 return "{}_{}"_format(llvm_type_name(v_t->getElementType()), v_t->getNumElements());
166 } else {
167 // Otherwise just return the type name.
168 return llvm_type_name(t);
169 }
170 }
171
172 // Helper to load into a vector of size vector_size the sequential scalar data starting at ptr.
173 // If vector_size is 1, a scalar is loaded instead.
load_vector_from_memory(ir_builder & builder,llvm::Value * ptr,std::uint32_t vector_size)174 llvm::Value *load_vector_from_memory(ir_builder &builder, llvm::Value *ptr, std::uint32_t vector_size)
175 {
176 assert(vector_size > 0u);
177
178 if (vector_size == 1u) {
179 // Scalar case.
180 return builder.CreateLoad(ptr);
181 }
182
183 // Fetch the pointer type (this will result in an assertion
184 // failure if ptr is not a pointer).
185 auto ptr_t = llvm::cast<llvm::PointerType>(ptr->getType());
186
187 // Create the vector type.
188 auto vector_t = make_vector_type(ptr_t->getElementType(), vector_size);
189 assert(vector_t != nullptr);
190
191 // Create the output vector.
192 auto ret = static_cast<llvm::Value *>(llvm::UndefValue::get(vector_t));
193
194 // Fill it.
195 for (std::uint32_t i = 0; i < vector_size; ++i) {
196 ret = builder.CreateInsertElement(ret,
197 builder.CreateLoad(builder.CreateInBoundsGEP(ptr, {builder.getInt32(i)})), i);
198 }
199
200 return ret;
201 }
202
203 // Helper to store the content of vector vec to the pointer ptr. If vec is not a vector,
204 // a plain store will be performed.
store_vector_to_memory(ir_builder & builder,llvm::Value * ptr,llvm::Value * vec)205 void store_vector_to_memory(ir_builder &builder, llvm::Value *ptr, llvm::Value *vec)
206 {
207 if (auto v_ptr_t = llvm::dyn_cast<llvm_vector_type>(vec->getType())) {
208 // Determine the vector size.
209 const auto vector_size = boost::numeric_cast<std::uint32_t>(v_ptr_t->getNumElements());
210
211 for (std::uint32_t i = 0; i < vector_size; ++i) {
212 builder.CreateStore(builder.CreateExtractElement(vec, i),
213 builder.CreateInBoundsGEP(ptr, {builder.getInt32(i)}));
214 }
215 } else {
216 // Not a vector, store vec directly.
217 builder.CreateStore(vec, ptr);
218 }
219 }
220
221 // Gather a vector of type vec_tp from the vector of pointers ptrs. align is the alignment of the
222 // scalar values stored in ptrs.
gather_vector_from_memory(ir_builder & builder,llvm::Type * vec_tp,llvm::Value * ptrs,std::size_t align)223 llvm::Value *gather_vector_from_memory(ir_builder &builder, llvm::Type *vec_tp, llvm::Value *ptrs, std::size_t align)
224 {
225 if (llvm::isa<llvm_vector_type>(vec_tp)) {
226 assert(llvm::isa<llvm_vector_type>(ptrs->getType()));
227
228 return builder.CreateMaskedGather(
229 #if LLVM_VERSION_MAJOR >= 13
230 // NOTE: new initial argument required since LLVM 13
231 // (the vector type to gather).
232 vec_tp,
233 #endif
234 ptrs,
235 #if LLVM_VERSION_MAJOR == 10
236 align
237 #else
238 llvm::Align(align)
239 #endif
240 );
241 } else {
242 assert(!llvm::isa<llvm_vector_type>(ptrs->getType()));
243
244 return builder.CreateLoad(ptrs);
245 }
246 }
247
248 // Create a SIMD vector of size vector_size filled with the value c. If vector_size is 1,
249 // c will be returned.
vector_splat(ir_builder & builder,llvm::Value * c,std::uint32_t vector_size)250 llvm::Value *vector_splat(ir_builder &builder, llvm::Value *c, std::uint32_t vector_size)
251 {
252 assert(vector_size > 0u);
253
254 if (vector_size == 1u) {
255 return c;
256 }
257
258 return builder.CreateVectorSplat(boost::numeric_cast<unsigned>(vector_size), c);
259 }
260
make_vector_type(llvm::Type * t,std::uint32_t vector_size)261 llvm::Type *make_vector_type(llvm::Type *t, std::uint32_t vector_size)
262 {
263 assert(t != nullptr);
264 assert(vector_size > 0u);
265
266 if (vector_size == 1u) {
267 return t;
268 } else {
269 auto retval = llvm_vector_type::get(t, boost::numeric_cast<unsigned>(vector_size));
270
271 assert(retval != nullptr);
272
273 return retval;
274 }
275 }
276
277 // Convert the input LLVM vector to a std::vector of values. If vec is not a vector,
278 // return {vec}.
vector_to_scalars(ir_builder & builder,llvm::Value * vec)279 std::vector<llvm::Value *> vector_to_scalars(ir_builder &builder, llvm::Value *vec)
280 {
281 if (auto vec_t = llvm::dyn_cast<llvm_vector_type>(vec->getType())) {
282 // Fetch the vector width.
283 auto vector_size = vec_t->getNumElements();
284
285 // Extract the vector elements one by one.
286 std::vector<llvm::Value *> ret;
287 for (decltype(vector_size) i = 0; i < vector_size; ++i) {
288 ret.push_back(builder.CreateExtractElement(vec, boost::numeric_cast<std::uint64_t>(i)));
289 assert(ret.back() != nullptr);
290 }
291
292 return ret;
293 } else {
294 return {vec};
295 }
296 }
297
298 // Convert a std::vector of values into an LLVM vector of the corresponding size.
299 // If scalars contains only 1 value, return that value.
scalars_to_vector(ir_builder & builder,const std::vector<llvm::Value * > & scalars)300 llvm::Value *scalars_to_vector(ir_builder &builder, const std::vector<llvm::Value *> &scalars)
301 {
302 assert(!scalars.empty());
303
304 // Fetch the vector size.
305 const auto vector_size = scalars.size();
306
307 if (vector_size == 1u) {
308 return scalars[0];
309 }
310
311 // Fetch the scalar type.
312 auto scalar_t = scalars[0]->getType();
313
314 // Create the corresponding vector type.
315 auto vector_t = make_vector_type(scalar_t, boost::numeric_cast<std::uint32_t>(vector_size));
316 assert(vector_t != nullptr);
317
318 // Create an empty vector.
319 llvm::Value *vec = llvm::UndefValue::get(vector_t);
320 assert(vec != nullptr);
321
322 // Fill it up.
323 for (auto i = 0u; i < vector_size; ++i) {
324 assert(scalars[i]->getType() == scalar_t);
325
326 vec = builder.CreateInsertElement(vec, scalars[i], i);
327 }
328
329 return vec;
330 }
331
332 // Pairwise reduction of a vector of LLVM values.
pairwise_reduce(std::vector<llvm::Value * > & vals,const std::function<llvm::Value * (llvm::Value *,llvm::Value *)> & f)333 llvm::Value *pairwise_reduce(std::vector<llvm::Value *> &vals,
334 const std::function<llvm::Value *(llvm::Value *, llvm::Value *)> &f)
335 {
336 assert(!vals.empty());
337 assert(f);
338
339 // LCOV_EXCL_START
340 if (vals.size() == std::numeric_limits<decltype(vals.size())>::max()) {
341 throw std::overflow_error("Overflow detected in pairwise_reduce()");
342 }
343 // LCOV_EXCL_STOP
344
345 while (vals.size() != 1u) {
346 std::vector<llvm::Value *> new_vals;
347
348 for (decltype(vals.size()) i = 0; i < vals.size(); i += 2u) {
349 if (i + 1u == vals.size()) {
350 // We are at the last element of the vector
351 // and the size of the vector is odd. Just append
352 // the existing value.
353 new_vals.push_back(vals[i]);
354 } else {
355 new_vals.push_back(f(vals[i], vals[i + 1u]));
356 }
357 }
358
359 new_vals.swap(vals);
360 }
361
362 return vals[0];
363 }
364
365 // Pairwise summation of a vector of LLVM values.
366 // https://en.wikipedia.org/wiki/Pairwise_summation
pairwise_sum(ir_builder & builder,std::vector<llvm::Value * > & sum)367 llvm::Value *pairwise_sum(ir_builder &builder, std::vector<llvm::Value *> &sum)
368 {
369 return pairwise_reduce(
370 sum, [&builder](llvm::Value *a, llvm::Value *b) -> llvm::Value * { return builder.CreateFAdd(a, b); });
371 }
372
373 // Helper to invoke an intrinsic function with arguments 'args'. 'types' are the argument type(s) for
374 // overloaded intrinsics.
llvm_invoke_intrinsic(llvm_state & s,const std::string & name,const std::vector<llvm::Type * > & types,const std::vector<llvm::Value * > & args)375 llvm::Value *llvm_invoke_intrinsic(llvm_state &s, const std::string &name, const std::vector<llvm::Type *> &types,
376 const std::vector<llvm::Value *> &args)
377 {
378 // Fetch the intrinsic ID from the name.
379 const auto intrinsic_ID = llvm::Function::lookupIntrinsicID(name);
380 if (intrinsic_ID == 0) {
381 throw std::invalid_argument("Cannot fetch the ID of the intrinsic '{}'"_format(name));
382 }
383
384 // Fetch the declaration.
385 // NOTE: for generic intrinsics to work, we need to specify
386 // the desired argument type(s). See:
387 // https://stackoverflow.com/questions/11985247/llvm-insert-intrinsic-function-cos
388 // And the docs of the getDeclaration() function.
389 auto callee_f = llvm::Intrinsic::getDeclaration(&s.module(), intrinsic_ID, types);
390 if (callee_f == nullptr) {
391 throw std::invalid_argument("Error getting the declaration of the intrinsic '{}'"_format(name));
392 }
393 if (!callee_f->isDeclaration()) {
394 // It does not make sense to have a definition of a builtin.
395 throw std::invalid_argument("The intrinsic '{}' must be only declared, not defined"_format(name));
396 }
397
398 // Check the number of arguments.
399 if (callee_f->arg_size() != args.size()) {
400 throw std::invalid_argument(
401 "Incorrect # of arguments passed while calling the intrinsic '{}': {} are "
402 "expected, but {} were provided instead"_format(name, callee_f->arg_size(), args.size()));
403 }
404
405 // Create the function call.
406 auto r = s.builder().CreateCall(callee_f, args);
407 assert(r != nullptr);
408
409 return r;
410 }
411
412 // Helper to invoke an external function called 'name' with arguments args and return type ret_type.
llvm_invoke_external(llvm_state & s,const std::string & name,llvm::Type * ret_type,const std::vector<llvm::Value * > & args,const std::vector<int> & attrs)413 llvm::Value *llvm_invoke_external(llvm_state &s, const std::string &name, llvm::Type *ret_type,
414 const std::vector<llvm::Value *> &args, const std::vector<int> &attrs)
415 {
416 // Look up the name in the global module table.
417 auto callee_f = s.module().getFunction(name);
418
419 if (callee_f == nullptr) {
420 // The function does not exist yet, make the prototype.
421 std::vector<llvm::Type *> arg_types;
422 for (auto a : args) {
423 arg_types.push_back(a->getType());
424 }
425 auto *ft = llvm::FunctionType::get(ret_type, arg_types, false);
426 callee_f = llvm::Function::Create(ft, llvm::Function::ExternalLinkage, name, &s.module());
427 if (callee_f == nullptr) {
428 throw std::invalid_argument("Unable to create the prototype for the external function '{}'"_format(name));
429 }
430
431 // Add the function attributes.
432 for (const auto &att : attrs) {
433 // NOTE: convert back to the LLVM attribute enum.
434 callee_f->addFnAttr(boost::numeric_cast<llvm::Attribute::AttrKind>(att));
435 }
436 } else {
437 // The function declaration exists already. Check that it is only a
438 // declaration and not a definition.
439 if (!callee_f->isDeclaration()) {
440 throw std::invalid_argument("Cannot call the function '{}' as an external function, because "
441 "it is defined as an internal module function"_format(name));
442 }
443 // Check the number of arguments.
444 if (callee_f->arg_size() != args.size()) {
445 throw std::invalid_argument(
446 "Incorrect # of arguments passed while calling the external function '{}': {} "
447 "are expected, but {} were provided instead"_format(name, callee_f->arg_size(), args.size()));
448 }
449 // NOTE: perhaps in the future we should consider adding more checks here
450 // (e.g., argument types, return type).
451 }
452
453 // Create the function call.
454 auto r = s.builder().CreateCall(callee_f, args);
455 assert(r != nullptr);
456 // NOTE: we used to have r->setTailCall(true) here, but:
457 // - when optimising, the tail call attribute is automatically
458 // added,
459 // - it is not 100% clear to me whether it is always safe to enable it:
460 // https://llvm.org/docs/CodeGenerator.html#tail-calls
461
462 return r;
463 }
464
465 // Helper to invoke an internal module function called 'name' with arguments 'args'.
llvm_invoke_internal(llvm_state & s,const std::string & name,const std::vector<llvm::Value * > & args)466 llvm::Value *llvm_invoke_internal(llvm_state &s, const std::string &name, const std::vector<llvm::Value *> &args)
467 {
468 auto callee_f = s.module().getFunction(name);
469
470 if (callee_f == nullptr) {
471 throw std::invalid_argument("Unknown internal function: '{}'"_format(name));
472 }
473
474 if (callee_f->isDeclaration()) {
475 throw std::invalid_argument("The internal function '{}' cannot be just a "
476 "declaration, a definition is needed"_format(name));
477 }
478
479 // Check the number of arguments.
480 if (callee_f->arg_size() != args.size()) {
481 throw std::invalid_argument(
482 "Incorrect # of arguments passed while calling the internal function '{}': {} are "
483 "expected, but {} were provided instead"_format(name, callee_f->arg_size(), args.size()));
484 }
485 // NOTE: perhaps in the future we should consider adding more checks here
486 // (e.g., argument types).
487
488 // Create the function call.
489 auto r = s.builder().CreateCall(callee_f, args);
490 assert(r != nullptr);
491 // NOTE: we used to have r->setTailCall(true) here, but:
492 // - when optimising, the tail call attribute is automatically
493 // added,
494 // - it is not 100% clear to me whether it is always safe to enable it:
495 // https://llvm.org/docs/CodeGenerator.html#tail-calls
496
497 return r;
498 }
499
500 // Create an LLVM for loop in the form:
501 //
502 // for (auto i = begin; i < end; i = next_cur(i)) {
503 // body(i);
504 // }
505 //
506 // The default implementation of i = next_cur(i),
507 // if next_cur is not provided, is ++i.
508 //
509 // begin/end must be 32-bit unsigned integer values.
llvm_loop_u32(llvm_state & s,llvm::Value * begin,llvm::Value * end,const std::function<void (llvm::Value *)> & body,const std::function<llvm::Value * (llvm::Value *)> & next_cur)510 void llvm_loop_u32(llvm_state &s, llvm::Value *begin, llvm::Value *end, const std::function<void(llvm::Value *)> &body,
511 const std::function<llvm::Value *(llvm::Value *)> &next_cur)
512 {
513 assert(body);
514 assert(begin->getType() == end->getType());
515 assert(begin->getType() == s.builder().getInt32Ty());
516
517 auto &context = s.context();
518 auto &builder = s.builder();
519
520 // Fetch the current function.
521 assert(builder.GetInsertBlock() != nullptr);
522 auto f = builder.GetInsertBlock()->getParent();
523 assert(f != nullptr);
524
525 // Pre-create loop and afterloop blocks. Note that these have just
526 // been created, they have not been inserted yet in the IR.
527 auto *loop_bb = llvm::BasicBlock::Create(context);
528 auto *after_bb = llvm::BasicBlock::Create(context);
529
530 // NOTE: we need a special case if the body of the loop is
531 // never to be executed (that is, begin >= end).
532 // In such a case, we will jump directly to after_bb.
533 // NOTE: unsigned integral comparison.
534 auto skip_cond = builder.CreateICmp(llvm::CmpInst::ICMP_UGE, begin, end);
535 builder.CreateCondBr(skip_cond, after_bb, loop_bb);
536
537 // Get a reference to the current block for
538 // later usage in the phi node.
539 auto preheader_bb = builder.GetInsertBlock();
540
541 // Add the loop block and start insertion into it.
542 f->getBasicBlockList().push_back(loop_bb);
543 builder.SetInsertPoint(loop_bb);
544
545 // Create the phi node and add the first pair of arguments.
546 auto cur = builder.CreatePHI(builder.getInt32Ty(), 2);
547 cur->addIncoming(begin, preheader_bb);
548
549 // Execute the loop body and the post-body code.
550 llvm::Value *next;
551 try {
552 body(cur);
553
554 // Compute the next value of the iteration. Use the next_cur
555 // function if provided, otherwise, by default, increase cur by 1.
556 // NOTE: addition works regardless of integral signedness.
557 next = next_cur ? next_cur(cur) : builder.CreateAdd(cur, builder.getInt32(1));
558 } catch (...) {
559 // NOTE: at this point after_bb has not been
560 // inserted into any parent, and thus it will not
561 // be cleaned up automatically. Do it manually.
562 after_bb->deleteValue();
563
564 throw;
565 }
566
567 // Compute the end condition.
568 // NOTE: we use the unsigned less-than predicate.
569 auto end_cond = builder.CreateICmp(llvm::CmpInst::ICMP_ULT, next, end);
570
571 // Get a reference to the current block for later use,
572 // and insert the "after loop" block.
573 auto loop_end_bb = builder.GetInsertBlock();
574 f->getBasicBlockList().push_back(after_bb);
575
576 // Insert the conditional branch into the end of loop_end_bb.
577 builder.CreateCondBr(end_cond, loop_bb, after_bb);
578
579 // Any new code will be inserted in after_bb.
580 builder.SetInsertPoint(after_bb);
581
582 // Add a new entry to the PHI node for the backedge.
583 cur->addIncoming(next, loop_end_bb);
584 }
585
586 // Given an input pointer value, return the
587 // pointed-to type.
pointee_type(llvm::Value * ptr)588 llvm::Type *pointee_type(llvm::Value *ptr)
589 {
590 assert(llvm::isa<llvm::PointerType>(ptr->getType())); // LCOV_EXCL_LINE
591
592 return ptr->getType()->getPointerElementType();
593 }
594
595 // Small helper to fetch a string representation
596 // of an LLVM type.
llvm_type_name(llvm::Type * t)597 std::string llvm_type_name(llvm::Type *t)
598 {
599 assert(t != nullptr);
600
601 std::string retval;
602 llvm::raw_string_ostream ostr(retval);
603
604 t->print(ostr, false, true);
605
606 return ostr.str();
607 }
608
609 // This function will return true if:
610 //
611 // - the return type of f is ret, and
612 // - the argument types of f are the same as in 'args'.
613 //
614 // Otherwise, the function will return false.
compare_function_signature(llvm::Function * f,llvm::Type * ret,const std::vector<llvm::Type * > & args)615 bool compare_function_signature(llvm::Function *f, llvm::Type *ret, const std::vector<llvm::Type *> &args)
616 {
617 assert(f != nullptr);
618 assert(ret != nullptr);
619
620 if (ret != f->getReturnType()) {
621 // Mismatched return types.
622 return false;
623 }
624
625 auto it = f->arg_begin();
626 for (auto arg_type : args) {
627 if (it == f->arg_end() || it->getType() != arg_type) {
628 // f has fewer arguments than args, or the current
629 // arguments' types do not match.
630 return false;
631 }
632 ++it;
633 }
634
635 // In order for the signatures to match,
636 // we must be at the end of f's arguments list
637 // (otherwise f has more arguments than args).
638 return it == f->arg_end();
639 }
640
641 // Create an LLVM if statement in the form:
642 // if (cond) {
643 // then_f();
644 // } else {
645 // else_f();
646 // }
llvm_if_then_else(llvm_state & s,llvm::Value * cond,const std::function<void ()> & then_f,const std::function<void ()> & else_f)647 void llvm_if_then_else(llvm_state &s, llvm::Value *cond, const std::function<void()> &then_f,
648 const std::function<void()> &else_f)
649 {
650 auto &context = s.context();
651 auto &builder = s.builder();
652
653 assert(cond->getType() == builder.getInt1Ty());
654
655 // Fetch the current function.
656 assert(builder.GetInsertBlock() != nullptr);
657 auto f = builder.GetInsertBlock()->getParent();
658 assert(f != nullptr);
659
660 // Create and insert the "then" block.
661 auto *then_bb = llvm::BasicBlock::Create(context, "", f);
662
663 // Create but do not insert the "else" and merge blocks.
664 auto *else_bb = llvm::BasicBlock::Create(context);
665 auto *merge_bb = llvm::BasicBlock::Create(context);
666
667 // Create the conditional jump.
668 builder.CreateCondBr(cond, then_bb, else_bb);
669
670 // Emit the code for the "then" branch.
671 builder.SetInsertPoint(then_bb);
672 try {
673 then_f();
674 } catch (...) {
675 // NOTE: else_bb and merge_bb have not been
676 // inserted into any parent yet, clean them
677 // up manually.
678 else_bb->deleteValue();
679 merge_bb->deleteValue();
680
681 throw;
682 }
683
684 // Jump to the merge block.
685 builder.CreateBr(merge_bb);
686
687 // Emit the "else" block.
688 f->getBasicBlockList().push_back(else_bb);
689 builder.SetInsertPoint(else_bb);
690 try {
691 else_f();
692 } catch (...) {
693 // NOTE: merge_bb has not been
694 // inserted into any parent yet, clean it
695 // up manually.
696 merge_bb->deleteValue();
697
698 throw;
699 }
700
701 // Jump to the merge block.
702 builder.CreateBr(merge_bb);
703
704 // Emit the merge block.
705 f->getBasicBlockList().push_back(merge_bb);
706 builder.SetInsertPoint(merge_bb);
707 }
708
709 // Helper to create a global zero-inited array variable in the module m
710 // with type t. The array is mutable and with internal linkage.
make_global_zero_array(llvm::Module & m,llvm::ArrayType * t)711 llvm::Value *make_global_zero_array(llvm::Module &m, llvm::ArrayType *t)
712 {
713 assert(t != nullptr);
714
715 // Make the global array.
716 auto gl_arr = new llvm::GlobalVariable(m, t, false, llvm::GlobalVariable::InternalLinkage,
717 llvm::ConstantAggregateZero::get(t));
718
719 // Return it.
720 return gl_arr;
721 }
722
723 // Helper to invoke an external function on a vector argument.
724 // The function will be called on each element of the vector separately,
725 // and the return will be re-assembled as a vector.
call_extern_vec(llvm_state & s,llvm::Value * arg,const std::string & fname)726 llvm::Value *call_extern_vec(llvm_state &s, llvm::Value *arg, const std::string &fname)
727 {
728 assert(arg != nullptr);
729
730 auto &builder = s.builder();
731
732 // Decompose the argument into scalars.
733 auto scalars = vector_to_scalars(builder, arg);
734
735 // Invoke the function on each scalar.
736 std::vector<llvm::Value *> retvals;
737 for (auto scal : scalars) {
738 retvals.push_back(llvm_invoke_external(
739 s, fname, scal->getType(), {scal},
740 // NOTE: in theory we may add ReadNone here as well,
741 // but for some reason, at least up to LLVM 10,
742 // this causes strange codegen issues. Revisit
743 // in the future.
744 {llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn}));
745 }
746
747 // Build a vector with the results.
748 return scalars_to_vector(builder, retvals);
749 }
750
751 // Helper to invoke an external function on 2 vector arguments.
752 // The function will be called on each elements of the vectors separately,
753 // and the return will be re-assembled as a vector.
call_extern_vec(llvm_state & s,llvm::Value * arg0,llvm::Value * arg1,const std::string & fname)754 llvm::Value *call_extern_vec(llvm_state &s, llvm::Value *arg0, llvm::Value *arg1, const std::string &fname)
755 {
756 assert(arg0 != nullptr);
757 assert(arg1 != nullptr);
758 assert(arg0->getType() == arg1->getType());
759
760 auto &builder = s.builder();
761
762 // Decompose the arguments into scalars.
763 auto scalars0 = vector_to_scalars(builder, arg0);
764 auto scalars1 = vector_to_scalars(builder, arg1);
765
766 // Invoke the function on each pair of scalars.
767 std::vector<llvm::Value *> retvals;
768 for (decltype(scalars0.size()) i = 0; i < scalars0.size(); ++i) {
769 retvals.push_back(llvm_invoke_external(
770 s, fname, scalars0[i]->getType(), {scalars0[i], scalars1[i]},
771 // NOTE: in theory we may add ReadNone here as well,
772 // but for some reason, at least up to LLVM 10,
773 // this causes strange codegen issues. Revisit
774 // in the future.
775 {llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn}));
776 }
777
778 // Build a vector with the results.
779 return scalars_to_vector(builder, retvals);
780 }
781
782 // Create an LLVM for loop in the form:
783 //
784 // while (cond()) {
785 // body();
786 // }
llvm_while_loop(llvm_state & s,const std::function<llvm::Value * ()> & cond,const std::function<void ()> & body)787 void llvm_while_loop(llvm_state &s, const std::function<llvm::Value *()> &cond, const std::function<void()> &body)
788 {
789 assert(body);
790 assert(cond);
791
792 auto &context = s.context();
793 auto &builder = s.builder();
794
795 // Fetch the current function.
796 assert(builder.GetInsertBlock() != nullptr);
797 auto f = builder.GetInsertBlock()->getParent();
798 assert(f != nullptr);
799
800 // Do a first evaluation of cond.
801 // NOTE: if this throws, we have not created any block
802 // yet, no need for manual cleanup.
803 auto cmp = cond();
804 assert(cmp != nullptr);
805 assert(cmp->getType() == builder.getInt1Ty());
806
807 // Pre-create loop and afterloop blocks. Note that these have just
808 // been created, they have not been inserted yet in the IR.
809 auto *loop_bb = llvm::BasicBlock::Create(context);
810 auto *after_bb = llvm::BasicBlock::Create(context);
811
812 // NOTE: we need a special case if the body of the loop is
813 // never to be executed (that is, cond returns false).
814 // In such a case, we will jump directly to after_bb.
815 builder.CreateCondBr(builder.CreateNot(cmp), after_bb, loop_bb);
816
817 // Get a reference to the current block for
818 // later usage in the phi node.
819 auto preheader_bb = builder.GetInsertBlock();
820
821 // Add the loop block and start insertion into it.
822 f->getBasicBlockList().push_back(loop_bb);
823 builder.SetInsertPoint(loop_bb);
824
825 // Create the phi node and add the first pair of arguments.
826 auto cur = builder.CreatePHI(builder.getInt1Ty(), 2);
827 cur->addIncoming(cmp, preheader_bb);
828
829 // Execute the loop body and the post-body code.
830 try {
831 body();
832
833 // Compute the end condition.
834 cmp = cond();
835 assert(cmp != nullptr);
836 assert(cmp->getType() == builder.getInt1Ty());
837 } catch (...) {
838 // NOTE: at this point after_bb has not been
839 // inserted into any parent, and thus it will not
840 // be cleaned up automatically. Do it manually.
841 after_bb->deleteValue();
842
843 throw;
844 }
845
846 // Get a reference to the current block for later use,
847 // and insert the "after loop" block.
848 auto loop_end_bb = builder.GetInsertBlock();
849 f->getBasicBlockList().push_back(after_bb);
850
851 // Insert the conditional branch into the end of loop_end_bb.
852 builder.CreateCondBr(cmp, loop_bb, after_bb);
853
854 // Any new code will be inserted in after_bb.
855 builder.SetInsertPoint(after_bb);
856
857 // Add a new entry to the PHI node for the backedge.
858 cur->addIncoming(cmp, loop_end_bb);
859 }
860
861 // Helper to compute sin and cos simultaneously.
llvm_sincos(llvm_state & s,llvm::Value * x)862 std::pair<llvm::Value *, llvm::Value *> llvm_sincos(llvm_state &s, llvm::Value *x)
863 {
864 auto &context = s.context();
865
866 #if defined(HEYOKA_HAVE_REAL128)
867 // Determine the scalar type of the vector arguments.
868 auto *x_t = x->getType()->getScalarType();
869
870 if (x_t == llvm::Type::getFP128Ty(context)) {
871 // NOTE: for __float128 we cannot use the intrinsics, we need
872 // to call an external function.
873 auto &builder = s.builder();
874
875 // Convert the vector argument to scalars.
876 auto x_scalars = vector_to_scalars(builder, x);
877
878 // Execute the sincosq() function on the scalar values and store
879 // the results in res_scalars.
880 // NOTE: need temp storage because sincosq uses pointers
881 // for output values.
882 auto s_all = builder.CreateAlloca(x_t);
883 auto c_all = builder.CreateAlloca(x_t);
884 std::vector<llvm::Value *> res_sin, res_cos;
885 for (decltype(x_scalars.size()) i = 0; i < x_scalars.size(); ++i) {
886 llvm_invoke_external(
887 s, "sincosq", builder.getVoidTy(), {x_scalars[i], s_all, c_all},
888 {llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn});
889
890 res_sin.emplace_back(builder.CreateLoad(s_all));
891 res_cos.emplace_back(builder.CreateLoad(c_all));
892 }
893
894 // Reconstruct the return value as a vector.
895 return {scalars_to_vector(builder, res_sin), scalars_to_vector(builder, res_cos)};
896 } else {
897 #endif
898 if (auto vec_t = llvm::dyn_cast<llvm_vector_type>(x->getType())) {
899 // NOTE: although there exists a SLEEF function for computing sin/cos
900 // at the same time, we cannot use it directly because it returns a pair
901 // of SIMD vectors rather than a single one and that does not play
902 // well with the calling conventions. In theory we could write a wrapper
903 // for these sincos functions using pointers for output values,
904 // but compiling such a wrapper requires correctly
905 // setting up the SIMD compilation flags. Perhaps we can consider this in the
906 // future to improve performance.
907 const auto sfn_sin = sleef_function_name(context, "sin", vec_t->getElementType(),
908 boost::numeric_cast<std::uint32_t>(vec_t->getNumElements()));
909 const auto sfn_cos = sleef_function_name(context, "cos", vec_t->getElementType(),
910 boost::numeric_cast<std::uint32_t>(vec_t->getNumElements()));
911
912 if (!sfn_sin.empty() && !sfn_cos.empty()) {
913 auto ret_sin = llvm_invoke_external(
914 s, sfn_sin, vec_t, {x},
915 // NOTE: in theory we may add ReadNone here as well,
916 // but for some reason, at least up to LLVM 10,
917 // this causes strange codegen issues. Revisit
918 // in the future.
919 {llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn});
920
921 auto ret_cos = llvm_invoke_external(
922 s, sfn_cos, vec_t, {x},
923 // NOTE: in theory we may add ReadNone here as well,
924 // but for some reason, at least up to LLVM 10,
925 // this causes strange codegen issues. Revisit
926 // in the future.
927 {llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn});
928
929 return {ret_sin, ret_cos};
930 }
931 }
932
933 // Compute sin and cos via intrinsics.
934 auto *sin_x = llvm_invoke_intrinsic(s, "llvm.sin", {x->getType()}, {x});
935 auto *cos_x = llvm_invoke_intrinsic(s, "llvm.cos", {x->getType()}, {x});
936
937 return {sin_x, cos_x};
938 #if defined(HEYOKA_HAVE_REAL128)
939 }
940 #endif
941 }
942
943 // Helper to compute abs(x_v).
llvm_abs(llvm_state & s,llvm::Value * x_v)944 llvm::Value *llvm_abs(llvm_state &s, llvm::Value *x_v)
945 {
946 #if defined(HEYOKA_HAVE_REAL128)
947 // Determine the scalar type of the vector argument.
948 auto *x_t = x_v->getType()->getScalarType();
949
950 if (x_t == llvm::Type::getFP128Ty(s.context())) {
951 return call_extern_vec(s, x_v, "fabsq");
952 } else {
953 #endif
954 return llvm_invoke_intrinsic(s, "llvm.fabs", {x_v->getType()}, {x_v});
955 #if defined(HEYOKA_HAVE_REAL128)
956 }
957 #endif
958 }
959
960 // Helper to reduce x modulo y, that is, to compute:
961 // x - y * floor(x / y).
llvm_modulus(llvm_state & s,llvm::Value * x,llvm::Value * y)962 llvm::Value *llvm_modulus(llvm_state &s, llvm::Value *x, llvm::Value *y)
963 {
964 auto &builder = s.builder();
965
966 #if defined(HEYOKA_HAVE_REAL128)
967 // Determine the scalar type of the vector arguments.
968 auto *x_t = x->getType()->getScalarType();
969
970 auto &context = s.context();
971
972 if (x_t == llvm::Type::getFP128Ty(context)) {
973 return call_extern_vec(s, x, y, "heyoka_modulus128");
974 } else {
975 #endif
976 auto quo = builder.CreateFDiv(x, y);
977 auto fl_quo = llvm_invoke_intrinsic(s, "llvm.floor", {quo->getType()}, {quo});
978
979 return builder.CreateFSub(x, builder.CreateFMul(y, fl_quo));
980 #if defined(HEYOKA_HAVE_REAL128)
981 }
982 #endif
983 }
984
985 // Minimum value, floating-point arguments. Implemented as std::min():
986 // return (b < a) ? b : a;
llvm_min(llvm_state & s,llvm::Value * a,llvm::Value * b)987 llvm::Value *llvm_min(llvm_state &s, llvm::Value *a, llvm::Value *b)
988 {
989 auto &builder = s.builder();
990
991 return builder.CreateSelect(builder.CreateFCmpOLT(b, a), b, a);
992 }
993
994 // Maximum value, floating-point arguments. Implemented as std::max():
995 // return (a < b) ? b : a;
llvm_max(llvm_state & s,llvm::Value * a,llvm::Value * b)996 llvm::Value *llvm_max(llvm_state &s, llvm::Value *a, llvm::Value *b)
997 {
998 auto &builder = s.builder();
999
1000 return builder.CreateSelect(builder.CreateFCmpOLT(a, b), b, a);
1001 }
1002
1003 // Branchless sign function.
1004 // NOTE: requires FP value.
llvm_sgn(llvm_state & s,llvm::Value * val)1005 llvm::Value *llvm_sgn(llvm_state &s, llvm::Value *val)
1006 {
1007 assert(val != nullptr);
1008 assert(val->getType()->getScalarType()->isFloatingPointTy());
1009
1010 auto &builder = s.builder();
1011
1012 // Build the zero constant.
1013 auto zero = llvm::Constant::getNullValue(val->getType());
1014
1015 // Run the comparisons.
1016 auto cmp0 = builder.CreateFCmpOLT(zero, val);
1017 auto cmp1 = builder.CreateFCmpOLT(val, zero);
1018
1019 // Convert to int32.
1020 llvm::Type *int_type;
1021 if (auto *v_t = llvm::dyn_cast<llvm_vector_type>(cmp0->getType())) {
1022 int_type = make_vector_type(builder.getInt32Ty(), boost::numeric_cast<std::uint32_t>(v_t->getNumElements()));
1023 } else {
1024 int_type = builder.getInt32Ty();
1025 }
1026 auto icmp0 = builder.CreateZExt(cmp0, int_type);
1027 auto icmp1 = builder.CreateZExt(cmp1, int_type);
1028
1029 // Compute and return the result.
1030 return builder.CreateSub(icmp0, icmp1);
1031 }
1032
1033 // Two-argument arctan.
1034 // NOTE: requires FP values of the same type.
llvm_atan2(llvm_state & s,llvm::Value * y,llvm::Value * x)1035 llvm::Value *llvm_atan2(llvm_state &s, llvm::Value *y, llvm::Value *x)
1036 {
1037 assert(y != nullptr);
1038 assert(x != nullptr);
1039 assert(y->getType() == x->getType());
1040 assert(y->getType()->getScalarType()->isFloatingPointTy());
1041
1042 auto &context = s.context();
1043
1044 // Determine the scalar type of the vector arguments.
1045 auto *x_t = x->getType()->getScalarType();
1046
1047 #if defined(HEYOKA_HAVE_REAL128)
1048 if (x_t == llvm::Type::getFP128Ty(context)) {
1049 return call_extern_vec(s, y, x, "atan2q");
1050 } else {
1051 #endif
1052 if (x_t == to_llvm_type<double>(context)) {
1053 if (auto vec_t = llvm::dyn_cast<llvm_vector_type>(x->getType())) {
1054 if (const auto sfn = sleef_function_name(context, "atan2", vec_t->getElementType(),
1055 boost::numeric_cast<std::uint32_t>(vec_t->getNumElements()));
1056 !sfn.empty()) {
1057 return llvm_invoke_external(
1058 s, sfn, vec_t, {y, x},
1059 // NOTE: in theory we may add ReadNone here as well,
1060 // but for some reason, at least up to LLVM 10,
1061 // this causes strange codegen issues. Revisit
1062 // in the future.
1063 {llvm::Attribute::NoUnwind, llvm::Attribute::Speculatable, llvm::Attribute::WillReturn});
1064 }
1065 }
1066
1067 return call_extern_vec(s, y, x, "atan2");
1068 } else if (x_t == to_llvm_type<long double>(context)) {
1069 return call_extern_vec(s, y, x,
1070 #if defined(_MSC_VER)
1071 // NOTE: it seems like the MSVC stdlib does not have an atan2l function,
1072 // because LLVM complains about the symbol "atan2l" not being
1073 // defined. Hence, use our own wrapper instead.
1074 "heyoka_atan2l"
1075 #else
1076 "atan2l"
1077 #endif
1078 );
1079 // LCOV_EXCL_START
1080 } else {
1081 throw std::invalid_argument(
1082 "Invalid floating-point type encountered in the LLVM implementation of atan2()");
1083 }
1084 // LCOV_EXCL_STOP
1085 #if defined(HEYOKA_HAVE_REAL128)
1086 }
1087 #endif
1088 }
1089
1090 namespace
1091 {
1092
1093 // Add a function to count the number of sign changes in the coefficients
1094 // of a polynomial of degree n. The coefficients are SIMD vectors of size batch_size
1095 // and scalar type T.
1096 template <typename T>
llvm_add_csc_impl(llvm_state & s,std::uint32_t n,std::uint32_t batch_size)1097 llvm::Function *llvm_add_csc_impl(llvm_state &s, std::uint32_t n, std::uint32_t batch_size)
1098 {
1099 assert(batch_size > 0u);
1100
1101 // Overflow check: we need to be able to index
1102 // into the array of coefficients.
1103 // LCOV_EXCL_START
1104 if (n == std::numeric_limits<std::uint32_t>::max()
1105 || batch_size > std::numeric_limits<std::uint32_t>::max() / (n + 1u)) {
1106 throw std::overflow_error("Overflow detected while adding a sign changes counter function");
1107 }
1108 // LCOV_EXCL_STOP
1109
1110 auto &md = s.module();
1111 auto &builder = s.builder();
1112 auto &context = s.context();
1113
1114 // Fetch the floating-point type.
1115 auto tp = to_llvm_vector_type<T>(context, batch_size);
1116
1117 // Fetch the function name.
1118 const auto fname = "heyoka_csc_degree_{}_{}"_format(n, llvm_mangle_type(tp));
1119
1120 // The function arguments:
1121 // - pointer to the return value,
1122 // - pointer to the array of coefficients.
1123 // NOTE: both pointers are to the scalar counterparts
1124 // of the vector types, so that we can call this from regular
1125 // C++ code.
1126 std::vector<llvm::Type *> fargs{llvm::PointerType::getUnqual(builder.getInt32Ty()),
1127 llvm::PointerType::getUnqual(to_llvm_type<T>(context))};
1128
1129 // Try to see if we already created the function.
1130 auto f = md.getFunction(fname);
1131
1132 if (f == nullptr) {
1133 // The function was not created before, do it now.
1134
1135 // Fetch the current insertion block.
1136 auto orig_bb = builder.GetInsertBlock();
1137
1138 // The return type is void.
1139 auto *ft = llvm::FunctionType::get(builder.getVoidTy(), fargs, false);
1140 // Create the function
1141 f = llvm::Function::Create(ft, llvm::Function::ExternalLinkage, fname, &md);
1142 assert(f != nullptr);
1143
1144 // Fetch the necessary function arguments.
1145 auto out_ptr = f->args().begin();
1146 out_ptr->setName("out_ptr");
1147 out_ptr->addAttr(llvm::Attribute::NoCapture);
1148 out_ptr->addAttr(llvm::Attribute::NoAlias);
1149 out_ptr->addAttr(llvm::Attribute::WriteOnly);
1150
1151 auto cf_ptr = f->args().begin() + 1;
1152 cf_ptr->setName("cf_ptr");
1153 cf_ptr->addAttr(llvm::Attribute::NoCapture);
1154 cf_ptr->addAttr(llvm::Attribute::NoAlias);
1155 cf_ptr->addAttr(llvm::Attribute::ReadOnly);
1156
1157 // Create a new basic block to start insertion into.
1158 builder.SetInsertPoint(llvm::BasicBlock::Create(context, "entry", f));
1159
1160 // Fetch the type for storing the last_nz_idx variable.
1161 auto last_nz_idx_t = make_vector_type(builder.getInt32Ty(), batch_size);
1162
1163 // The initial last nz idx is zero for all batch elements.
1164 auto last_nz_idx = builder.CreateAlloca(last_nz_idx_t);
1165 builder.CreateStore(llvm::Constant::getNullValue(last_nz_idx_t), last_nz_idx);
1166
1167 // NOTE: last_nz_idx is an index into the poly coefficient vector. Thus, in batch
1168 // mode, when loading from a vector of indices, we will have to apply an offset.
1169 // For instance, for batch_size = 4 and last_nz_idx = [0, 1, 1, 2], the actual
1170 // memory indices to load the scalar coefficients from are:
1171 // - 0 * 4 + 0 = 0
1172 // - 1 * 4 + 1 = 5
1173 // - 1 * 4 + 2 = 6
1174 // - 2 * 4 + 3 = 11.
1175 // That is, last_nz_idx * batch_size + offset, where offset is [0, 1, 2, 3].
1176 llvm::Value *offset;
1177 if (batch_size == 1u) {
1178 // In scalar mode the offset is simply zero.
1179 offset = builder.getInt32(0);
1180 } else {
1181 offset = llvm::UndefValue::get(make_vector_type(builder.getInt32Ty(), batch_size));
1182 for (std::uint32_t i = 0; i < batch_size; ++i) {
1183 offset = builder.CreateInsertElement(offset, builder.getInt32(i), i);
1184 }
1185 }
1186
1187 // Init the vector of coefficient pointers with the base pointer value.
1188 auto cf_ptr_v = vector_splat(builder, cf_ptr, batch_size);
1189
1190 // Init the return value with zero.
1191 auto retval = builder.CreateAlloca(last_nz_idx_t);
1192 builder.CreateStore(llvm::Constant::getNullValue(last_nz_idx_t), retval);
1193
1194 // The iteration range is [1, n].
1195 llvm_loop_u32(s, builder.getInt32(1), builder.getInt32(n + 1u), [&](llvm::Value *cur_n) {
1196 // Load the current poly coefficient(s).
1197 auto cur_cf = load_vector_from_memory(
1198 builder, builder.CreateInBoundsGEP(cf_ptr, {builder.CreateMul(cur_n, builder.getInt32(batch_size))}),
1199 batch_size);
1200
1201 // Load the last nonzero coefficient(s).
1202 auto last_nz_ptr_idx = builder.CreateAdd(
1203 offset, builder.CreateMul(builder.CreateLoad(last_nz_idx),
1204 vector_splat(builder, builder.getInt32(batch_size), batch_size)));
1205 auto last_nz_ptr = builder.CreateInBoundsGEP(cf_ptr_v, {last_nz_ptr_idx});
1206 auto last_nz_cf = batch_size > 1u
1207 ? gather_vector_from_memory(builder, cur_cf->getType(), last_nz_ptr, alignof(T))
1208 : static_cast<llvm::Value *>(builder.CreateLoad(last_nz_ptr));
1209
1210 // Compute the sign of the current coefficient(s).
1211 auto cur_sgn = llvm_sgn(s, cur_cf);
1212
1213 // Compute the sign of the last nonzero coefficient(s).
1214 auto last_nz_sgn = llvm_sgn(s, last_nz_cf);
1215
1216 // Add them and check if the result is zero (this indicates a sign change).
1217 auto cmp = builder.CreateICmpEQ(builder.CreateAdd(cur_sgn, last_nz_sgn),
1218 llvm::Constant::getNullValue(cur_sgn->getType()));
1219
1220 // We also need to check if last_nz_sgn is zero. If that is the case, it means
1221 // we haven't found any nonzero coefficient yet for the polynomial and we must
1222 // not modify retval yet.
1223 auto zero_cmp = builder.CreateICmpEQ(last_nz_sgn, llvm::Constant::getNullValue(last_nz_sgn->getType()));
1224 cmp = builder.CreateSelect(zero_cmp, llvm::Constant::getNullValue(cmp->getType()), cmp);
1225
1226 // Update retval.
1227 builder.CreateStore(builder.CreateAdd(builder.CreateLoad(retval), builder.CreateZExt(cmp, last_nz_idx_t)),
1228 retval);
1229
1230 // Update last_nz_idx.
1231 builder.CreateStore(
1232 builder.CreateSelect(builder.CreateICmpEQ(cur_sgn, llvm::Constant::getNullValue(cur_sgn->getType())),
1233 builder.CreateLoad(last_nz_idx), vector_splat(builder, cur_n, batch_size)),
1234 last_nz_idx);
1235 });
1236
1237 // Store the result.
1238 store_vector_to_memory(builder, out_ptr, builder.CreateLoad(retval));
1239
1240 // Return.
1241 builder.CreateRetVoid();
1242
1243 // Verify.
1244 s.verify_function(f);
1245
1246 // Restore the original insertion block.
1247 builder.SetInsertPoint(orig_bb);
1248 } else {
1249 // LCOV_EXCL_START
1250 // The function was created before. Check if the signatures match.
1251 if (!compare_function_signature(f, builder.getVoidTy(), fargs)) {
1252 throw std::invalid_argument(
1253 "Inconsistent function signature for the sign changes counter function detected");
1254 }
1255 // LCOV_EXCL_STOP
1256 }
1257
1258 return f;
1259 }
1260
1261 } // namespace
1262
llvm_add_csc_dbl(llvm_state & s,std::uint32_t n,std::uint32_t batch_size)1263 llvm::Function *llvm_add_csc_dbl(llvm_state &s, std::uint32_t n, std::uint32_t batch_size)
1264 {
1265 return llvm_add_csc_impl<double>(s, n, batch_size);
1266 }
1267
llvm_add_csc_ldbl(llvm_state & s,std::uint32_t n,std::uint32_t batch_size)1268 llvm::Function *llvm_add_csc_ldbl(llvm_state &s, std::uint32_t n, std::uint32_t batch_size)
1269 {
1270 return llvm_add_csc_impl<long double>(s, n, batch_size);
1271 }
1272
1273 #if defined(HEYOKA_HAVE_REAL128)
1274
llvm_add_csc_f128(llvm_state & s,std::uint32_t n,std::uint32_t batch_size)1275 llvm::Function *llvm_add_csc_f128(llvm_state &s, std::uint32_t n, std::uint32_t batch_size)
1276 {
1277 return llvm_add_csc_impl<mppp::real128>(s, n, batch_size);
1278 }
1279
1280 #endif
1281
1282 namespace
1283 {
1284
1285 // Variable template for the constant pi at different levels of precision.
1286 template <typename T>
1287 const auto inv_kep_E_pi = boost::math::constants::pi<T>();
1288
1289 #if defined(HEYOKA_HAVE_REAL128)
1290
1291 template <>
1292 const mppp::real128 inv_kep_E_pi<mppp::real128> = mppp::pi_128;
1293
1294 #endif
1295
1296 // Implementation of the inverse Kepler equation.
1297 template <typename T>
llvm_add_inv_kep_E_impl(llvm_state & s,std::uint32_t batch_size)1298 llvm::Function *llvm_add_inv_kep_E_impl(llvm_state &s, std::uint32_t batch_size)
1299 {
1300 using std::nextafter;
1301
1302 assert(batch_size > 0u);
1303
1304 auto &md = s.module();
1305 auto &builder = s.builder();
1306 auto &context = s.context();
1307
1308 // Fetch the floating-point type.
1309 auto tp = to_llvm_vector_type<T>(context, batch_size);
1310
1311 // Fetch the function name.
1312 const auto fname = "heyoka_inv_kep_E_{}"_format(llvm_mangle_type(tp));
1313
1314 // The function arguments:
1315 // - eccentricity,
1316 // - mean anomaly.
1317 std::vector<llvm::Type *> fargs{tp, tp};
1318
1319 // Try to see if we already created the function.
1320 auto f = md.getFunction(fname);
1321
1322 if (f == nullptr) {
1323 // The function was not created before, do it now.
1324
1325 // Fetch the current insertion block.
1326 auto orig_bb = builder.GetInsertBlock();
1327
1328 // The return type is tp.
1329 auto *ft = llvm::FunctionType::get(tp, fargs, false);
1330 // Create the function
1331 f = llvm::Function::Create(ft, llvm::Function::InternalLinkage, fname, &md);
1332 assert(f != nullptr);
1333
1334 // Fetch the necessary function arguments.
1335 auto ecc = f->args().begin();
1336 auto M_arg = f->args().begin() + 1;
1337
1338 // Create a new basic block to start insertion into.
1339 builder.SetInsertPoint(llvm::BasicBlock::Create(context, "entry", f));
1340
1341 // Create the return value.
1342 auto retval = builder.CreateAlloca(tp);
1343
1344 // Reduce M modulo 2*pi.
1345 auto M = llvm_modulus(s, M_arg, vector_splat(builder, codegen<T>(s, number{2 * inv_kep_E_pi<T>}), batch_size));
1346
1347 // Compute the initial guess from the usual elliptic expansion
1348 // to the third order in eccentricities:
1349 // E = M + e*sin(M) + e**2*sin(M)*cos(M) + e**3*sin(M)*(3/2*cos(M)**2 - 1/2) + ...
1350 auto [sin_M, cos_M] = llvm_sincos(s, M);
1351 // e*sin(M).
1352 auto e_sin_M = builder.CreateFMul(ecc, sin_M);
1353 // e*cos(M).
1354 auto e_cos_M = builder.CreateFMul(ecc, cos_M);
1355 // e**2.
1356 auto e2 = builder.CreateFMul(ecc, ecc);
1357 // cos(M)**2.
1358 auto cos_M_2 = builder.CreateFMul(cos_M, cos_M);
1359
1360 // 3/2 and 1/2 constants.
1361 auto c_3_2 = vector_splat(builder, codegen<T>(s, number{T(3) / 2}), batch_size);
1362 auto c_1_2 = vector_splat(builder, codegen<T>(s, number{T(1) / 2}), batch_size);
1363
1364 // M + e*sin(M).
1365 auto tmp1 = builder.CreateFAdd(M, e_sin_M);
1366 // e**2*sin(M)*cos(M).
1367 auto tmp2 = builder.CreateFMul(e_sin_M, e_cos_M);
1368 // e**3*sin(M).
1369 auto tmp3 = builder.CreateFMul(e2, e_sin_M);
1370 // 3/2*cos(M)**2 - 1/2.
1371 auto tmp4 = builder.CreateFSub(builder.CreateFMul(c_3_2, cos_M_2), c_1_2);
1372
1373 // Put it together.
1374 auto ig1 = builder.CreateFAdd(tmp1, tmp2);
1375 auto ig2 = builder.CreateFMul(tmp3, tmp4);
1376 auto ig = builder.CreateFAdd(ig1, ig2);
1377
1378 // Make extra sure the initial guess is in the [0, 2*pi) range.
1379 auto lb = vector_splat(builder, codegen<T>(s, number{0.}), batch_size);
1380 auto ub = vector_splat(builder, codegen<T>(s, number{nextafter(2 * inv_kep_E_pi<T>, T(0))}), batch_size);
1381 ig = llvm_max(s, ig, lb);
1382 ig = llvm_min(s, ig, ub);
1383
1384 // Store it.
1385 builder.CreateStore(ig, retval);
1386
1387 // Create the counter.
1388 auto *counter = builder.CreateAlloca(builder.getInt32Ty());
1389 builder.CreateStore(builder.getInt32(0), counter);
1390
1391 // Variables to store sin(E) and cos(E).
1392 auto sin_E = builder.CreateAlloca(tp);
1393 auto cos_E = builder.CreateAlloca(tp);
1394
1395 // Write the initial values for sin_E and cos_E.
1396 auto sin_cos_E = llvm_sincos(s, builder.CreateLoad(retval));
1397 builder.CreateStore(sin_cos_E.first, sin_E);
1398 builder.CreateStore(sin_cos_E.second, cos_E);
1399
1400 // Variable to hold the value of f(E) = E - e*sin(E) - M.
1401 auto fE = builder.CreateAlloca(tp);
1402 // Helper to compute f(E).
1403 auto fE_compute = [&]() {
1404 auto ret = builder.CreateFMul(ecc, builder.CreateLoad(sin_E));
1405 ret = builder.CreateFSub(builder.CreateLoad(retval), ret);
1406 return builder.CreateFSub(ret, M);
1407 };
1408 // Compute and store the initial value of f(E).
1409 builder.CreateStore(fE_compute(), fE);
1410
1411 // Define the stopping condition functor.
1412 // NOTE: hard-code this for the time being.
1413 auto max_iter = builder.getInt32(50);
1414 auto loop_cond = [&,
1415 // NOTE: tolerance is 4 * eps.
1416 tol = vector_splat(builder, codegen<T>(s, number{std::numeric_limits<T>::epsilon() * 4}),
1417 batch_size)]() -> llvm::Value * {
1418 auto c_cond = builder.CreateICmpULT(builder.CreateLoad(counter), max_iter);
1419
1420 // Keep on iterating as long as abs(f(E)) > tol.
1421 // NOTE: need reduction only in batch mode.
1422 auto tol_check = builder.CreateFCmpOGT(llvm_abs(s, builder.CreateLoad(fE)), tol);
1423 auto tol_cond = (batch_size == 1u) ? tol_check : builder.CreateOrReduce(tol_check);
1424
1425 // NOTE: this is a way of creating a logical AND.
1426 return builder.CreateSelect(c_cond, tol_cond, llvm::Constant::getNullValue(tol_cond->getType()));
1427 };
1428
1429 // Run the loop.
1430 llvm_while_loop(s, loop_cond, [&, one_c = vector_splat(builder, codegen<T>(s, number{1.}), batch_size)]() {
1431 // Compute the new value.
1432 auto old_val = builder.CreateLoad(retval);
1433 auto new_val = builder.CreateFDiv(
1434 builder.CreateLoad(fE), builder.CreateFSub(one_c, builder.CreateFMul(ecc, builder.CreateLoad(cos_E))));
1435 new_val = builder.CreateFSub(old_val, new_val);
1436
1437 // Bisect if new_val > ub.
1438 // NOTE: '>' is fine here, ub is the maximum allowed value.
1439 auto bcheck = builder.CreateFCmpOGT(new_val, ub);
1440 new_val = builder.CreateSelect(
1441 bcheck,
1442 builder.CreateFMul(vector_splat(builder, codegen<T>(s, number{T(1) / 2}), batch_size),
1443 builder.CreateFAdd(old_val, ub)),
1444 new_val);
1445
1446 // Bisect if new_val < lb.
1447 bcheck = builder.CreateFCmpOLT(new_val, lb);
1448 new_val = builder.CreateSelect(
1449 bcheck,
1450 builder.CreateFMul(vector_splat(builder, codegen<T>(s, number{T(1) / 2}), batch_size),
1451 builder.CreateFAdd(old_val, lb)),
1452 new_val);
1453
1454 // Store the new value.
1455 builder.CreateStore(new_val, retval);
1456
1457 // Update sin_E/cos_E.
1458 sin_cos_E = llvm_sincos(s, new_val);
1459 builder.CreateStore(sin_cos_E.first, sin_E);
1460 builder.CreateStore(sin_cos_E.second, cos_E);
1461
1462 // Update f(E).
1463 builder.CreateStore(fE_compute(), fE);
1464
1465 // Update the counter.
1466 builder.CreateStore(builder.CreateAdd(builder.CreateLoad(counter), builder.getInt32(1)), counter);
1467 });
1468
1469 // Check the counter.
1470 llvm_if_then_else(
1471 s, builder.CreateICmpEQ(builder.CreateLoad(counter), max_iter),
1472 [&]() {
1473 llvm_invoke_external(s, "heyoka_inv_kep_E_max_iter", builder.getVoidTy(), {},
1474 {llvm::Attribute::NoUnwind, llvm::Attribute::WillReturn});
1475 },
1476 []() {});
1477
1478 // Return the result.
1479 builder.CreateRet(builder.CreateLoad(retval));
1480
1481 // Verify.
1482 s.verify_function(f);
1483
1484 // Restore the original insertion block.
1485 builder.SetInsertPoint(orig_bb);
1486 } else {
1487 // The function was created before. Check if the signatures match.
1488 if (!compare_function_signature(f, tp, fargs)) {
1489 throw std::invalid_argument("Inconsistent function signature for the inverse Kepler equation detected");
1490 }
1491 }
1492
1493 return f;
1494 }
1495
1496 } // namespace
1497
llvm_add_inv_kep_E_dbl(llvm_state & s,std::uint32_t batch_size)1498 llvm::Function *llvm_add_inv_kep_E_dbl(llvm_state &s, std::uint32_t batch_size)
1499 {
1500 return llvm_add_inv_kep_E_impl<double>(s, batch_size);
1501 }
1502
llvm_add_inv_kep_E_ldbl(llvm_state & s,std::uint32_t batch_size)1503 llvm::Function *llvm_add_inv_kep_E_ldbl(llvm_state &s, std::uint32_t batch_size)
1504 {
1505 return llvm_add_inv_kep_E_impl<long double>(s, batch_size);
1506 }
1507
1508 #if defined(HEYOKA_HAVE_REAL128)
1509
llvm_add_inv_kep_E_f128(llvm_state & s,std::uint32_t batch_size)1510 llvm::Function *llvm_add_inv_kep_E_f128(llvm_state &s, std::uint32_t batch_size)
1511 {
1512 return llvm_add_inv_kep_E_impl<mppp::real128>(s, batch_size);
1513 }
1514
1515 #endif
1516
1517 namespace
1518 {
1519
1520 // Helper to create a global const array containing
1521 // all binomial coefficients up to (n, n). The coefficients are stored
1522 // as scalars and the return value is a pointer to the first coefficient.
1523 // The array has shape (n + 1, n + 1) and it is stored in row-major format.
1524 template <typename T>
llvm_add_bc_array_impl(llvm_state & s,std::uint32_t n)1525 llvm::Value *llvm_add_bc_array_impl(llvm_state &s, std::uint32_t n)
1526 {
1527 // Overflow check.
1528 // LCOV_EXCL_START
1529 if (n == std::numeric_limits<std::uint32_t>::max()
1530 || (n + 1u) > std::numeric_limits<std::uint32_t>::max() / (n + 1u)) {
1531 throw std::overflow_error("Overflow detected while adding an array of binomial coefficients");
1532 }
1533 // LCOV_EXCL_STOP
1534
1535 auto &md = s.module();
1536 auto &builder = s.builder();
1537 auto &context = s.context();
1538
1539 // Fetch the array type.
1540 auto *arr_type
1541 = llvm::ArrayType::get(to_llvm_type<T>(context), boost::numeric_cast<std::uint64_t>((n + 1u) * (n + 1u)));
1542
1543 // Generate the binomials as constants.
1544 std::vector<llvm::Constant *> bc_const;
1545 for (std::uint32_t i = 0; i <= n; ++i) {
1546 for (std::uint32_t j = 0; j <= n; ++j) {
1547 // NOTE: the Boost implementation requires j <= i. We don't care about
1548 // j > i anyway.
1549 const auto val = (j <= i) ? binomial<T>(i, j) : T(0);
1550 bc_const.push_back(llvm::cast<llvm::Constant>(codegen<T>(s, number{val})));
1551 }
1552 }
1553
1554 // Create the global array.
1555 auto *bc_const_arr = llvm::ConstantArray::get(arr_type, bc_const);
1556 auto *g_bc_const_arr = new llvm::GlobalVariable(md, bc_const_arr->getType(), true,
1557 llvm::GlobalVariable::InternalLinkage, bc_const_arr);
1558
1559 // Get out a pointer to the beginning of the array.
1560 return builder.CreateInBoundsGEP(g_bc_const_arr, {builder.getInt32(0), builder.getInt32(0)});
1561 }
1562
1563 } // namespace
1564
llvm_add_bc_array_dbl(llvm_state & s,std::uint32_t n)1565 llvm::Value *llvm_add_bc_array_dbl(llvm_state &s, std::uint32_t n)
1566 {
1567 return llvm_add_bc_array_impl<double>(s, n);
1568 }
1569
llvm_add_bc_array_ldbl(llvm_state & s,std::uint32_t n)1570 llvm::Value *llvm_add_bc_array_ldbl(llvm_state &s, std::uint32_t n)
1571 {
1572 return llvm_add_bc_array_impl<long double>(s, n);
1573 }
1574
1575 #if defined(HEYOKA_HAVE_REAL128)
1576
llvm_add_bc_array_f128(llvm_state & s,std::uint32_t n)1577 llvm::Value *llvm_add_bc_array_f128(llvm_state &s, std::uint32_t n)
1578 {
1579 return llvm_add_bc_array_impl<mppp::real128>(s, n);
1580 }
1581
1582 #endif
1583
1584 namespace
1585 {
1586
1587 // RAII helper to temporarily disable fast
1588 // math flags in a builder.
1589 class fmf_disabler
1590 {
1591 ir_builder &m_builder;
1592 llvm::FastMathFlags m_orig_fmf;
1593
1594 public:
fmf_disabler(ir_builder & b)1595 explicit fmf_disabler(ir_builder &b) : m_builder(b), m_orig_fmf(m_builder.getFastMathFlags())
1596 {
1597 // Reset the fast math flags.
1598 m_builder.setFastMathFlags(llvm::FastMathFlags{});
1599 }
~fmf_disabler()1600 ~fmf_disabler()
1601 {
1602 // Restore the original fast math flags.
1603 m_builder.setFastMathFlags(m_orig_fmf);
1604 }
1605
1606 fmf_disabler(const fmf_disabler &) = delete;
1607 fmf_disabler(fmf_disabler &&) = delete;
1608
1609 fmf_disabler &operator=(const fmf_disabler &) = delete;
1610 fmf_disabler &operator=(fmf_disabler &&) = delete;
1611 };
1612
1613 } // namespace
1614
1615 // NOTE: see the code in dfloat.hpp for the double-length primitives.
1616
1617 // Addition.
llvm_dl_add(llvm_state & state,llvm::Value * x_hi,llvm::Value * x_lo,llvm::Value * y_hi,llvm::Value * y_lo)1618 std::pair<llvm::Value *, llvm::Value *> llvm_dl_add(llvm_state &state, llvm::Value *x_hi, llvm::Value *x_lo,
1619 llvm::Value *y_hi, llvm::Value *y_lo)
1620 {
1621 auto &builder = state.builder();
1622
1623 // Temporarily disable the fast math flags.
1624 fmf_disabler fd(builder);
1625
1626 auto S = builder.CreateFAdd(x_hi, y_hi);
1627 auto T = builder.CreateFAdd(x_lo, y_lo);
1628 auto e = builder.CreateFSub(S, x_hi);
1629 auto f = builder.CreateFSub(T, x_lo);
1630
1631 auto t1 = builder.CreateFSub(S, e);
1632 t1 = builder.CreateFSub(x_hi, t1);
1633 auto s = builder.CreateFSub(y_hi, e);
1634 s = builder.CreateFAdd(s, t1);
1635
1636 t1 = builder.CreateFSub(T, f);
1637 t1 = builder.CreateFSub(x_lo, t1);
1638 auto t = builder.CreateFSub(y_lo, f);
1639 t = builder.CreateFAdd(t, t1);
1640
1641 s = builder.CreateFAdd(s, T);
1642 auto H = builder.CreateFAdd(S, s);
1643 auto h = builder.CreateFSub(S, H);
1644 h = builder.CreateFAdd(h, s);
1645
1646 h = builder.CreateFAdd(h, t);
1647 e = builder.CreateFAdd(H, h);
1648 f = builder.CreateFSub(H, e);
1649 f = builder.CreateFAdd(f, h);
1650
1651 return {e, f};
1652 }
1653
1654 // Less-than.
llvm_dl_lt(llvm_state & state,llvm::Value * x_hi,llvm::Value * x_lo,llvm::Value * y_hi,llvm::Value * y_lo)1655 llvm::Value *llvm_dl_lt(llvm_state &state, llvm::Value *x_hi, llvm::Value *x_lo, llvm::Value *y_hi, llvm::Value *y_lo)
1656 {
1657 auto &builder = state.builder();
1658
1659 // Temporarily disable the fast math flags.
1660 fmf_disabler fd(builder);
1661
1662 auto cond1 = builder.CreateFCmpOLT(x_hi, y_hi);
1663 auto cond2 = builder.CreateFCmpOEQ(x_hi, y_hi);
1664 auto cond3 = builder.CreateFCmpOLT(x_lo, y_lo);
1665 // NOTE: this is a logical AND.
1666 auto cond4 = builder.CreateSelect(cond2, cond3, llvm::ConstantInt::getNullValue(cond3->getType()));
1667 // NOTE: this is a logical OR.
1668 auto cond = builder.CreateSelect(cond1, llvm::ConstantInt::getAllOnesValue(cond4->getType()), cond4);
1669
1670 return cond;
1671 }
1672
1673 // Greater-than.
llvm_dl_gt(llvm_state & state,llvm::Value * x_hi,llvm::Value * x_lo,llvm::Value * y_hi,llvm::Value * y_lo)1674 llvm::Value *llvm_dl_gt(llvm_state &state, llvm::Value *x_hi, llvm::Value *x_lo, llvm::Value *y_hi, llvm::Value *y_lo)
1675 {
1676 auto &builder = state.builder();
1677
1678 // Temporarily disable the fast math flags.
1679 fmf_disabler fd(builder);
1680
1681 auto cond1 = builder.CreateFCmpOGT(x_hi, y_hi);
1682 auto cond2 = builder.CreateFCmpOEQ(x_hi, y_hi);
1683 auto cond3 = builder.CreateFCmpOGT(x_lo, y_lo);
1684 // NOTE: this is a logical AND.
1685 auto cond4 = builder.CreateSelect(cond2, cond3, llvm::ConstantInt::getNullValue(cond3->getType()));
1686 // NOTE: this is a logical OR.
1687 auto cond = builder.CreateSelect(cond1, llvm::ConstantInt::getAllOnesValue(cond4->getType()), cond4);
1688
1689 return cond;
1690 }
1691
1692 // NOTE: this will check that a pointer ptr passed to
1693 // a GEP instruction points, after the removal of vector,
1694 // to a value of type tp. This how the deprecated CreateInBoundsGEP()
1695 // function is implemented.
llvm_depr_GEP_type_check(llvm::Value * ptr,llvm::Type * tp)1696 bool llvm_depr_GEP_type_check(llvm::Value *ptr, llvm::Type *tp)
1697 {
1698 assert(llvm::isa<llvm::PointerType>(ptr->getType())); // LCOV_EXCL_LINE
1699
1700 return ptr->getType()->getScalarType()->getPointerElementType() == tp;
1701 }
1702
1703 } // namespace heyoka::detail
1704
1705 // NOTE: this function will be called by the LLVM implementation
1706 // of the inverse Kepler function when the maximum number of iterations
1707 // is exceeded.
heyoka_inv_kep_E_max_iter()1708 extern "C" HEYOKA_DLL_PUBLIC void heyoka_inv_kep_E_max_iter() noexcept
1709 {
1710 heyoka::detail::get_logger()->warn("iteration limit exceeded while solving the elliptic inverse Kepler equation");
1711 }
1712