1 /* Copyright 2009-2016 Francesco Biscani (bluescarni@gmail.com)
2 
3 This file is part of the Piranha library.
4 
5 The Piranha library is free software; you can redistribute it and/or modify
6 it under the terms of either:
7 
8   * the GNU Lesser General Public License as published by the Free
9     Software Foundation; either version 3 of the License, or (at your
10     option) any later version.
11 
12 or
13 
14   * the GNU General Public License as published by the Free Software
15     Foundation; either version 3 of the License, or (at your option) any
16     later version.
17 
18 or both in parallel, as here.
19 
20 The Piranha library is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
22 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
23 for more details.
24 
25 You should have received copies of the GNU General Public License and the
26 GNU Lesser General Public License along with the Piranha library.  If not,
27 see https://www.gnu.org/licenses/. */
28 
29 #ifndef PIRANHA_POLYNOMIAL_HPP
30 #define PIRANHA_POLYNOMIAL_HPP
31 
32 #include <algorithm>
33 #include <boost/numeric/conversion/cast.hpp>
34 #include <cmath> // For std::ceil.
35 #include <cstddef>
36 #include <functional>
37 #include <initializer_list>
38 #include <iterator>
39 #include <limits>
40 #include <map>
41 #include <mutex>
42 #include <numeric>
43 #include <stdexcept>
44 #include <string>
45 #include <tuple>
46 #include <type_traits>
47 #include <utility>
48 #include <vector>
49 
50 #include "base_series_multiplier.hpp"
51 #include "config.hpp"
52 #include "debug_access.hpp"
53 #include "detail/atomic_flag_array.hpp"
54 #include "detail/cf_mult_impl.hpp"
55 #include "detail/divisor_series_fwd.hpp"
56 #include "detail/parallel_vector_transform.hpp"
57 #include "detail/poisson_series_fwd.hpp"
58 #include "detail/polynomial_fwd.hpp"
59 #include "detail/safe_integral_adder.hpp"
60 #include "detail/sfinae_types.hpp"
61 #include "exceptions.hpp"
62 #include "forwarding.hpp"
63 #include "ipow_substitutable_series.hpp"
64 #include "is_cf.hpp"
65 #include "key_is_multipliable.hpp"
66 #include "kronecker_array.hpp"
67 #include "kronecker_monomial.hpp"
68 #include "math.hpp"
69 #include "monomial.hpp"
70 #include "mp_integer.hpp"
71 #include "pow.hpp"
72 #include "power_series.hpp"
73 #include "safe_cast.hpp"
74 #include "series.hpp"
75 #include "series_multiplier.hpp"
76 #include "settings.hpp"
77 #include "substitutable_series.hpp"
78 #include "symbol.hpp"
79 #include "symbol_set.hpp"
80 #include "t_substitutable_series.hpp"
81 #include "thread_pool.hpp"
82 #include "trigonometric_series.hpp"
83 #include "tuning.hpp"
84 #include "type_traits.hpp"
85 
86 namespace piranha
87 {
88 
89 namespace detail
90 {
91 
92 // Tag for polynomial instances.
93 struct polynomial_tag {
94 };
95 
96 // Type trait to check the key type in polynomial.
97 template <typename T>
98 struct is_polynomial_key {
99     static const bool value = false;
100 };
101 
102 template <typename T>
103 struct is_polynomial_key<kronecker_monomial<T>> {
104     static const bool value = true;
105 };
106 
107 template <typename T, typename U>
108 struct is_polynomial_key<monomial<T, U>> {
109     static const bool value = true;
110 };
111 
112 // Implementation detail to check if the monomial key supports the linear_argument() method.
113 template <typename Key>
114 struct key_has_linarg : detail::sfinae_types {
115     template <typename U>
116     static auto test(const U &u) -> decltype(u.linear_argument(std::declval<const symbol_set &>()));
117     static no test(...);
118     static const bool value = std::is_same<std::string, decltype(test(std::declval<Key>()))>::value;
119 };
120 
121 // Division utilities.
122 // Return iterator to the leading term of a polynomial, as defined by the operator<()
123 // of its key type. This is always available for any monomial type.
124 template <typename PType>
poly_lterm(const PType & p)125 inline auto poly_lterm(const PType &p) -> decltype(p._container().begin())
126 {
127     piranha_assert(!p.empty());
128     using term_type = typename PType::term_type;
129     return std::max_element(p._container().begin(), p._container().end(),
130                             [](const term_type &t1, const term_type &t2) { return t1.m_key < t2.m_key; });
131 }
132 
133 // Check that there are no negative exponents
134 // in a polynomial.
135 template <typename PType>
poly_expo_checker(const PType & p)136 inline void poly_expo_checker(const PType &p)
137 {
138     using term_type = typename PType::term_type;
139     using expo_type = typename term_type::key_type::value_type;
140     // Don't run any check if the exponent is unsigned.
141     if (std::is_integral<expo_type>::value && std::is_unsigned<expo_type>::value) {
142         return;
143     }
144     const auto &args = p.get_symbol_set();
145     if (std::any_of(p._container().begin(), p._container().end(),
146                     [&args](const term_type &t) { return t.m_key.has_negative_exponent(args); })) {
147         piranha_throw(std::invalid_argument, "negative exponents are not allowed");
148     }
149 }
150 
151 // Multiply polynomial by non-zero cf in place. Preconditions:
152 // - a is not zero.
153 // Type requirements:
154 // - cf type supports in-place multiplication.
155 template <typename PType>
poly_cf_mult(const typename PType::term_type::cf_type & a,PType & p)156 inline void poly_cf_mult(const typename PType::term_type::cf_type &a, PType &p)
157 {
158     piranha_assert(!math::is_zero(a));
159     const auto it_f = p._container().end();
160     for (auto it = p._container().begin(); it != it_f; ++it) {
161         it->m_cf *= a;
162         piranha_assert(!math::is_zero(it->m_cf));
163     }
164 }
165 
166 // Divide polynomial by non-zero cf in place. Preconditions:
167 // - a is not zero.
168 // Type requirements:
169 // - cf type supports divexact().
170 // NOTE: this is always used in exact divisions, we could wrap mp_integer::_divexact()
171 // to improve performance.
172 template <typename PType>
poly_exact_cf_div(PType & p,const typename PType::term_type::cf_type & a)173 inline void poly_exact_cf_div(PType &p, const typename PType::term_type::cf_type &a)
174 {
175     piranha_assert(!math::is_zero(a));
176     const auto it_f = p._container().end();
177     for (auto it = p._container().begin(); it != it_f; ++it) {
178         math::divexact(it->m_cf, it->m_cf, a);
179         piranha_assert(!math::is_zero(it->m_cf));
180     }
181 }
182 
183 // Univariate polynomial GCD via PRS-SR.
184 // Implementation based on subresultant polynomial remainder sequence:
185 // https://en.wikipedia.org/wiki/Polynomial_greatest_common_divisor
186 // See also Zippel, 8.6. This implementation is actually based on
187 // Winkler, chapter 4.
188 // NOTE: here we do not need the full sequence of remainders, consider
189 // removing the vector of polys once this is debugged and tested.
190 // Preconditions:
191 // - univariate polynomials on the same variable.
192 // Type requirements:
193 // - cf type supports exponentiation to unsigned, yielding the same type,
194 // - cf type * cf type is still cf type, and multipliable in-place,
195 // - cf type has divexact.
196 template <typename PType>
gcd_prs_sr(PType a,PType b)197 inline PType gcd_prs_sr(PType a, PType b)
198 {
199     using term_type = typename PType::term_type;
200     using cf_type = typename term_type::cf_type;
201     using key_type = typename term_type::key_type;
202     // Only univariate polynomials are allowed.
203     piranha_assert(a.get_symbol_set().size() == 1u && a.get_symbol_set() == b.get_symbol_set());
204     // If one of the two is zero, the gcd is the other.
205     if (math::is_zero(a) && !math::is_zero(b)) {
206         return b;
207     }
208     if (!math::is_zero(a) && math::is_zero(b)) {
209         return a;
210     }
211     // If both are zero, return zero.
212     if (math::is_zero(a) && math::is_zero(b)) {
213         PType retval;
214         retval.set_symbol_set(a.get_symbol_set());
215         return retval;
216     }
217     // Check exponents.
218     poly_expo_checker(a);
219     poly_expo_checker(b);
220     // Order so that deg(a) >= deg(b).
221     if (poly_lterm(a)->m_key < poly_lterm(b)->m_key) {
222         std::swap(a, b);
223     }
224     // Cache the arguments set. Make a copy as "a" will be moved below.
225     const auto args = a.get_symbol_set();
226     // NOTE: coefficients are alway ctible from ints.
227     cf_type h(1), g(1);
228     // Store the content of a and b for later use.
229     auto a_cont = a.content(), b_cont = b.content();
230     // NOTE: it seems like removing the content from the inputs
231     // can help performance. Let's revisit this after we implement
232     // the sorted data structure for polys.
233     // poly_exact_cf_div(a,a_cont);
234     // poly_exact_cf_div(b,b_cont);
235     std::vector<PType> F;
236     using size_type = typename std::vector<PType>::size_type;
237     F.push_back(std::move(a));
238     F.push_back(std::move(b));
239     PType fprime(F.back());
240     size_type i = 2u;
241     // A key representing the constant univariate monomial.
242     // NOTE: all monomials support construction from init list.
243     key_type zero_key{typename key_type::value_type(0)};
244     while (fprime.size() != 0u && poly_lterm(fprime)->m_key != zero_key) {
245         auto l2 = poly_lterm(F[static_cast<size_type>(i - 2u)]), l1 = poly_lterm(F[static_cast<size_type>(i - 1u)]);
246         // NOTE: we are using the degree here in order to maintain compatibility with
247         // all monomials. There is some small overhead in this, but it should not
248         // matter too much.
249         // NOTE: this will be used only on monomials with integral exponents, so it is always valid.
250         integer delta(l2->m_key.degree(args)), l1d(l1->m_key.degree(args));
251         piranha_assert(delta >= l1d);
252         // NOTE: this is a pseudo-remainder operation here, we don't use the poly method as we need the delta
253         // information below.
254         delta -= l1d;
255         poly_cf_mult(math::pow(l1->m_cf, static_cast<unsigned>(delta + 1)), F[static_cast<size_type>(i - 2u)]);
256         fprime = PType::udivrem(F[static_cast<size_type>(i - 2u)], F[static_cast<size_type>(i - 1u)]).second;
257         if (fprime.size() != 0u) {
258             const cf_type h_delta = math::pow(h, static_cast<unsigned>(delta));
259             auto tmp(fprime);
260             poly_exact_cf_div(tmp, g * h_delta);
261             g = l1->m_cf;
262             h = (math::pow(g, static_cast<unsigned>(delta)) * h);
263             math::divexact(h, h, h_delta);
264             F.push_back(std::move(tmp));
265             safe_integral_adder(i, size_type(1u));
266         }
267     }
268     auto retval = std::move(F.back());
269     auto content = retval.content();
270     poly_exact_cf_div(retval, content);
271     poly_cf_mult(math::gcd(a_cont, b_cont), retval);
272     return retval;
273 }
274 
275 // Establish the limits of the exponents of two polynomials. Will throw if a negative exponent is encountered.
276 // Preconditions:
277 // - equal symbol sets.
278 // - non-empty polys.
279 template <typename PType>
280 inline std::vector<std::pair<typename PType::term_type::key_type::value_type,
281                              typename PType::term_type::key_type::value_type>>
poly_establish_limits(const PType & n,const PType & d)282 poly_establish_limits(const PType &n, const PType &d)
283 {
284     using expo_type = typename PType::term_type::key_type::value_type;
285     using v_type = std::vector<expo_type>;
286     using vp_type = std::vector<std::pair<expo_type, expo_type>>;
287     using vp_size_type = typename vp_type::size_type;
288     piranha_assert(n.get_symbol_set() == d.get_symbol_set());
289     piranha_assert(n.size() != 0u && d.size() != 0u);
290     // Return value, init with the first element from the numerator.
291     vp_type retval;
292     v_type tmp;
293     auto it = n._container().begin(), it_f = n._container().end();
294     it->m_key.extract_exponents(tmp, n.get_symbol_set());
295     std::transform(tmp.begin(), tmp.end(), std::back_inserter(retval),
296                    [](const expo_type &e) { return std::make_pair(e, e); });
297     for (; it != it_f; ++it) {
298         it->m_key.extract_exponents(tmp, n.get_symbol_set());
299         for (decltype(tmp.size()) i = 0u; i < tmp.size(); ++i) {
300             // NOTE: greater-than comparability is ensured for all exponent types on which this
301             // method will be used.
302             if (tmp[i] < retval[static_cast<vp_size_type>(i)].first) {
303                 retval[static_cast<vp_size_type>(i)].first = tmp[i];
304             } else if (tmp[i] > retval[static_cast<vp_size_type>(i)].second) {
305                 retval[static_cast<vp_size_type>(i)].second = tmp[i];
306             }
307         }
308     }
309     // Denominator.
310     it_f = d._container().end();
311     for (it = d._container().begin(); it != it_f; ++it) {
312         it->m_key.extract_exponents(tmp, n.get_symbol_set());
313         for (decltype(tmp.size()) i = 0u; i < tmp.size(); ++i) {
314             if (tmp[i] < retval[static_cast<vp_size_type>(i)].first) {
315                 retval[static_cast<vp_size_type>(i)].first = tmp[i];
316             } else if (tmp[i] > retval[static_cast<vp_size_type>(i)].second) {
317                 retval[static_cast<vp_size_type>(i)].second = tmp[i];
318             }
319         }
320     }
321     // Check for negative expos.
322     if (std::any_of(retval.begin(), retval.end(),
323                     [](const std::pair<expo_type, expo_type> &p) { return p.first < expo_type(0); })) {
324         piranha_throw(std::invalid_argument, "negative exponents are not allowed");
325     }
326     return retval;
327 }
328 
329 // Simple dot product.
330 template <typename It1, typename It2>
poly_dot_product(It1 begin1,It1 end1,It2 begin2)331 inline auto poly_dot_product(It1 begin1, It1 end1, It2 begin2) -> decltype((*begin1) * (*begin2))
332 {
333     using ret_type = decltype((*begin1) * (*begin2));
334     return std::inner_product(begin1, end1, begin2, ret_type(0));
335 }
336 
337 // Convert multivariate polynomial to univariate. Preconditions:
338 // - at least 1 symbolic argument,
339 // - identical symbol set for n and d,
340 // - non-null input polys.
341 template <typename Cf, typename Key>
342 inline std::tuple<polynomial<Cf, k_monomial>, polynomial<Cf, k_monomial>, std::vector<typename k_monomial::value_type>>
poly_to_univariate(const polynomial<Cf,Key> & n,const polynomial<Cf,Key> & d)343 poly_to_univariate(const polynomial<Cf, Key> &n, const polynomial<Cf, Key> &d)
344 {
345     piranha_assert(n.get_symbol_set().size() > 0u);
346     piranha_assert(n.get_symbol_set() == d.get_symbol_set());
347     piranha_assert(n.size() != 0u && d.size() != 0u);
348     using k_expo_type = typename k_monomial::value_type;
349     using expo_type = typename Key::value_type;
350     // Cache the args.
351     const auto &args = n.get_symbol_set();
352     // Establish the limits of the two polynomials.
353     auto limits = poly_establish_limits(n, d);
354     // Extract just the vector of max exponents.
355     std::vector<expo_type> max_expos;
356     std::transform(limits.begin(), limits.end(), std::back_inserter(max_expos),
357                    [](const std::pair<expo_type, expo_type> &p) { return p.second; });
358     using me_size_type = decltype(max_expos.size());
359     // Next we need to check the limits of the codification. We do everything in multiprecision and
360     // then we will attempt to cast back.
361     std::vector<integer> mp_cv;
362     mp_cv.push_back(integer(1));
363     for (decltype(args.size()) i = 0u; i < args.size(); ++i) {
364         mp_cv.push_back(mp_cv.back() * (integer(1) + max_expos[static_cast<me_size_type>(i)]));
365     }
366     // Determine the max univariate expo.
367     integer mp_kM = poly_dot_product(max_expos.begin(), max_expos.end(), mp_cv.begin());
368     // Attempt casting everything down.
369     auto kM = static_cast<k_expo_type>(mp_kM);
370     (void)kM;
371     std::vector<k_expo_type> cv;
372     std::transform(mp_cv.begin(), mp_cv.end(), std::back_inserter(cv),
373                    [](const integer &z) { return static_cast<k_expo_type>(z); });
374     // Proceed to the codification.
375     auto encode_poly = [&args, &cv](const polynomial<Cf, Key> &p) -> polynomial<Cf, k_monomial> {
376         using term_type = typename polynomial<Cf, k_monomial>::term_type;
377         polynomial<Cf, k_monomial> retval;
378         // The only symbol will be the first symbol from the input polys.
379         symbol_set ss;
380         ss.add(*args.begin());
381         retval.set_symbol_set(ss);
382         // Prepare rehashed.
383         retval._container().rehash(p._container().bucket_count());
384         // Go with the codification.
385         std::vector<expo_type> tmp_expos;
386         for (const auto &t : p._container()) {
387             // Extract the exponents.
388             t.m_key.extract_exponents(tmp_expos, args);
389             // NOTE: unique insertion?
390             retval.insert(term_type{t.m_cf, k_monomial(static_cast<k_expo_type>(
391                                                 poly_dot_product(tmp_expos.begin(), tmp_expos.end(), cv.begin())))});
392         }
393         return retval;
394     };
395     return std::make_tuple(encode_poly(n), encode_poly(d), cv);
396 }
397 
398 // From univariate to multivariate. Preconditions:
399 // - n has only 1 symbol,
400 // - coding vector and args are consistent,
401 // - non-empty poly.
402 template <typename Key, typename Cf>
poly_from_univariate(const polynomial<Cf,k_monomial> & n,const std::vector<typename k_monomial::value_type> & cv,const symbol_set & args)403 inline polynomial<Cf, Key> poly_from_univariate(const polynomial<Cf, k_monomial> &n,
404                                                 const std::vector<typename k_monomial::value_type> &cv,
405                                                 const symbol_set &args)
406 {
407     piranha_assert(n.get_symbol_set().size() == 1u);
408     piranha_assert(cv.size() > 1u);
409     piranha_assert(args.size() == cv.size() - 1u);
410     piranha_assert(*n.get_symbol_set().begin() == *args.begin());
411     piranha_assert(n.size() != 0u);
412     using expo_type = typename Key::value_type;
413     using cv_size_type = decltype(cv.size());
414     auto decode_poly = [&args, &cv](const polynomial<Cf, k_monomial> &p) -> polynomial<Cf, Key> {
415         using term_type = typename polynomial<Cf, Key>::term_type;
416         // Init the return value.
417         polynomial<Cf, Key> retval;
418         retval.set_symbol_set(args);
419         retval._container().rehash(p._container().bucket_count());
420         // Go with the decodification.
421         std::vector<expo_type> tmp_expos;
422         using v_size_type = typename std::vector<expo_type>::size_type;
423         tmp_expos.resize(safe_cast<v_size_type>(args.size()));
424         for (const auto &t : p._container()) {
425             for (v_size_type i = 0u; i < tmp_expos.size(); ++i) {
426                 // NOTE: use construction here rather than static_cast as expo_type could be integer.
427                 tmp_expos[i] = expo_type((t.m_key.get_int() % cv[static_cast<cv_size_type>(i + 1u)])
428                                          / cv[static_cast<cv_size_type>(i)]);
429             }
430             retval.insert(term_type{t.m_cf, Key(tmp_expos.begin(), tmp_expos.end())});
431         }
432         return retval;
433     };
434     return decode_poly(n);
435 }
436 
437 // Exception to signal that heuristic GCD failed.
438 struct gcdheu_failure final : std::runtime_error {
gcdheu_failurepiranha::detail::gcdheu_failure439     explicit gcdheu_failure() : std::runtime_error("")
440     {
441     }
442 };
443 
444 // This implementation is taken straight from the Geddes book. We used to have an implementation based on the paper
445 // from Liao & Fateman, which supposedly is (or was) implemented in Maple, but that implementation had a bug which
446 // showed
447 // up very rarely. Strangely enough, the sympy implementation of heugcd (also based on the Liao paper) has exactly the
448 // same
449 // issue. Thus the motivation to use the implementation from Geddes.
450 // This function returns:
451 // - a status flag, to signal if the heuristic failed (true if failure occurred),
452 // - the GCD and the 2 cofactors (a / GCD and b / GCD).
453 // The function can also fail via throwing gcdheu_failure, which is a different type of failure from the status flag
454 // (the
455 // gcdheu_failure signals that too large integers are being generated, the status flag signals that too many iterations
456 // have been run). This difference is of no consequence for the user, as the exception is caught in try_gcdheu.
457 template <typename Poly>
gcdheu_geddes(const Poly & a,const Poly & b,symbol_set::size_type s_index=0u)458 std::pair<bool, std::tuple<Poly, Poly, Poly>> gcdheu_geddes(const Poly &a, const Poly &b,
459                                                             symbol_set::size_type s_index = 0u)
460 {
461     static_assert(is_mp_integer<typename Poly::term_type::cf_type>::value, "Invalid type.");
462     // Require identical symbol sets.
463     piranha_assert(a.get_symbol_set() == b.get_symbol_set());
464     // Helper function to compute the symmetric version of the modulo operation.
465     // See Geddes 4.2 and:
466     // http://www.maplesoft.com/support/help/maple/view.aspx?path=mod
467     auto mod_s = [](const integer &n, const integer &m) -> integer {
468         // NOTE: require that the denominator is strictly positive.
469         piranha_assert(m > 0);
470         auto retval = n % m;
471         if (retval < -((m - 1) / 2)) {
472             retval += m;
473         } else if (retval > m / 2) {
474             retval -= m;
475         }
476         return retval;
477     };
478     // Apply mod_s to all coefficients in a poly.
479     auto mod_s_poly = [mod_s](const Poly &p, const integer &m) -> Poly {
480         Poly retval(p);
481         auto it = retval._container().begin();
482         for (; it != retval._container().end();) {
483             it->m_cf = mod_s(it->m_cf, m);
484             if (math::is_zero(it->m_cf)) {
485                 it = retval._container().erase(it);
486             } else {
487                 ++it;
488             }
489         }
490         return retval;
491     };
492     // Cache it.
493     const auto &args = a.get_symbol_set();
494     // The contents of input polys.
495     const integer a_cont = a.content(), b_cont = b.content();
496     // Handle empty polys.
497     if (a.size() == 0u && b.size() == 0u) {
498         Poly retval;
499         retval.set_symbol_set(args);
500         return std::make_pair(false, std::make_tuple(retval, retval, retval));
501     }
502     if (a.size() == 0u) {
503         Poly retval;
504         retval.set_symbol_set(args);
505         // NOTE: 'retval + 1' could be improved performance-wise.
506         return std::make_pair(false, std::make_tuple(b, retval, retval + 1));
507     }
508     if (b.size() == 0u) {
509         Poly retval;
510         retval.set_symbol_set(args);
511         // NOTE: 'retval + 1' could be improved performance-wise.
512         return std::make_pair(false, std::make_tuple(a, retval + 1, retval));
513     }
514     // If we are at the first recursion, check the exponents.
515     if (s_index == 0u) {
516         poly_expo_checker(a);
517         poly_expo_checker(b);
518     }
519     // This is the case in which the polynomials are reduced to two integers (essentially, the zerovariate case).
520     if (a.is_single_coefficient() && b.is_single_coefficient()) {
521         piranha_assert(a.size() == 1u && b.size() == 1u);
522         Poly retval;
523         retval.set_symbol_set(args);
524         auto gab = math::gcd(a_cont, b_cont);
525         // NOTE: the creation of the gcd retval and of the cofactors can be improved performance-wise
526         // (e.g., exact division of coefficients).
527         return std::make_pair(false, std::make_tuple(retval + gab, a / gab, b / gab));
528     }
529     // s_index is the recursion index for the gcdheu() call.
530     piranha_assert(s_index < args.size());
531     // We need to work on primitive polys.
532     // NOTE: performance improvements.
533     const auto ap = a / a_cont;
534     const auto bp = b / b_cont;
535     // The current variable.
536     const std::string var = args[s_index].get_name();
537     integer xi{2 * std::min(ap.height(), bp.height()) + 2};
538     // Functor to update xi at each iteration.
539     auto update_xi = [&xi]() {
540         // NOTE: this is just a way of making xi bigger without too much correlation
541         // to its previous value.
542         xi = (xi * 73794) / 27011;
543     };
544     // NOTE: the values of 6 iterations is taken straight from the original implementation of the algorithm.
545     // The bits limit is larger than the original implementation, on the grounds that GMP should be rather efficient
546     // at bignum. It might as well be that other tuning params are better, but let's keep them hardcoded for now.
547     for (int i = 0; i < 6; ++i) {
548         if (integer(xi.bits_size()) * std::max(ap.degree({var}), bp.degree({var})) > 100000) {
549             piranha_throw(gcdheu_failure, );
550         }
551         auto res = gcdheu_geddes(ap.subs(var, xi), bp.subs(var, xi), static_cast<symbol_set::size_type>(s_index + 1u));
552         if (!res.first) {
553             Poly &gamma = std::get<0u>(res.second);
554             Poly G;
555             G.set_symbol_set(args);
556             unsigned j = 0;
557             while (gamma.size() != 0u) {
558                 Poly g(mod_s_poly(gamma, xi));
559                 // NOTE: term mult here could be useful, but probably it makes more
560                 // sense to just improve the series multiplication routine.
561                 G += g * math::pow(Poly{var}, j);
562                 gamma -= g;
563                 poly_exact_cf_div(gamma, xi);
564                 safe_integral_adder(j, 1u);
565             }
566             // NOTE: I don't know if this is possible at all, but it's better to be safe than
567             // sorry. This prevents divisions by zero below.
568             if (G.size() == 0u) {
569                 update_xi();
570                 continue;
571             }
572             try {
573                 // For the division test, we need to operate on the primitive
574                 // part of the candidate GCD.
575                 poly_exact_cf_div(G, G.content());
576                 // The division test.
577                 auto cf_a = ap / G;
578                 auto cf_b = bp / G;
579                 // Need to multiply by the GCD of the contents of a and b in order to produce
580                 // the GCD with the largest coefficients.
581                 const auto F = math::gcd(a_cont, b_cont);
582                 poly_cf_mult(F, G);
583                 // The cofactors above have been computed with the primitive parts of G and a,b.
584                 // We need to rescale them in order to find the true cofactors, that is, a / G
585                 // and b / G.
586                 poly_cf_mult(a_cont, cf_a);
587                 poly_exact_cf_div(cf_a, F);
588                 poly_cf_mult(b_cont, cf_b);
589                 poly_exact_cf_div(cf_b, F);
590                 return std::make_pair(false, std::make_tuple(std::move(G), std::move(cf_a), std::move(cf_b)));
591             } catch (const math::inexact_division &) {
592                 // Continue in case the division check fails.
593             }
594         }
595         update_xi();
596     }
597     return std::make_pair(true, std::make_tuple(Poly{}, Poly{}, Poly{}));
598 }
599 
600 // Namespace for generic polynomial division enabler, used to stuff
601 // in handy aliases. We put it here in order to share it with the enabler
602 // for the divexact specialisation.
603 namespace ptd
604 {
605 
606 template <typename T>
607 using cf_t = typename T::term_type::cf_type;
608 
609 template <typename T>
610 using expo_t = typename T::term_type::key_type::value_type;
611 
612 template <typename T, typename U>
613 using enabler = typename std::
614     enable_if<std::is_base_of<polynomial_tag, T>::value && std::is_same<T, U>::value
615                   && is_multipliable_in_place<cf_t<T>>::value
616                   && std::is_same<decltype(std::declval<const cf_t<T> &>() * std::declval<const cf_t<T> &>()),
617                                   cf_t<T>>::value
618                   && has_exact_division<cf_t<T>>::value && has_exact_ring_operations<cf_t<T>>::value
619                   && is_subtractable_in_place<T>::value
620                   && (std::is_integral<expo_t<T>>::value || is_mp_integer<expo_t<T>>::value),
621               int>::type;
622 }
623 }
624 
625 /// Polynomial GCD algorithms.
626 enum class polynomial_gcd_algorithm {
627     /// Automatic selection.
628     automatic,
629     /// Subresultant PRS.
630     prs_sr,
631     /// Heuristic GCD.
632     heuristic
633 };
634 
635 /// Polynomial class.
636 /**
637  * This class represents multivariate polynomials as collections of multivariate polynomial terms.
638  * \p Cf represents the ring over which the polynomial is defined, while \p Key represents the monomial type.
639  *
640  * Polynomials support an automatic degree-based truncation mechanism, disabled by default, which comes into play during
641  * polynomial multiplication. It allows to discard automatically all those terms, generated during series
642  * multiplication, whose total or partial degree is greater than a specified limit. This mechanism can be configured via
643  * a set of thread-safe static methods, and it is enabled if:
644  * - the total and partial degree of the series are represented by the same type \p D,
645  * - all the truncation-related requirements in piranha::power_series are satsified,
646  * - the type \p D is equality-comparable, subtractable and the type resulting from the subtraction is still \p D.
647  *
648  * This class satisfies the piranha::is_series and piranha::is_cf type traits.
649  *
650  * \warning
651  * The division and GCD operations are known to have poor performance, especially with large operands. Performance
652  * will be improved in future versions.
653  *
654  * ## Type requirements ##
655  *
656  * \p Cf must be suitable for use in piranha::series as first template argument,
657  * \p Key must be an instance of either piranha::monomial or piranha::kronecker_monomial.
658  *
659  * ## Exception safety guarantee ##
660  *
661  * This class provides the same guarantee as the base series type it derives from.
662  *
663  * ## Move semantics ##
664  *
665  * Move semantics is equivalent to the move semantics of the base series type it derives from.
666  */
667 template <typename Cf, typename Key>
668 class polynomial
669     : public power_series<trigonometric_series<ipow_substitutable_series<substitutable_series<t_substitutable_series<series<Cf,
670                                                                                                                             Key,
671                                                                                                                             polynomial<Cf,
672                                                                                                                                        Key>>,
673                                                                                                                      polynomial<Cf,
674                                                                                                                                 Key>>,
675                                                                                               polynomial<Cf, Key>>,
676                                                                          polynomial<Cf, Key>>>,
677                           polynomial<Cf, Key>>,
678       detail::polynomial_tag
679 {
680     // Check the key.
681     PIRANHA_TT_CHECK(detail::is_polynomial_key, Key);
682     // Make friend with debug class.
683     template <typename>
684     friend class debug_access;
685     // Make friend with all poly types.
686     template <typename, typename>
687     friend class polynomial;
688     // Make friend with Poisson series.
689     template <typename>
690     friend class poisson_series;
691     // Make friend with divisor series.
692     template <typename, typename>
693     friend class divisor_series;
694     // The base class.
695     using base
696         = power_series<trigonometric_series<ipow_substitutable_series<substitutable_series<t_substitutable_series<series<Cf,
697                                                                                                                          Key,
698                                                                                                                          polynomial<Cf,
699                                                                                                                                     Key>>,
700                                                                                                                   polynomial<Cf,
701                                                                                                                              Key>>,
702                                                                                            polynomial<Cf, Key>>,
703                                                                       polynomial<Cf, Key>>>,
704                        polynomial<Cf, Key>>;
705     // A couple of helpers from C++14.
706     template <typename T>
707     using decay_t = typename std::decay<T>::type;
708     template <bool B, typename T = void>
709     using enable_if_t = typename std::enable_if<B, T>::type;
710     // String constructor.
711     template <typename Str>
construct_from_string(Str && str)712     void construct_from_string(Str &&str)
713     {
714         typedef typename base::term_type term_type;
715         // Insert the symbol.
716         this->m_symbol_set.add(symbol(std::forward<Str>(str)));
717         // Construct and insert the term.
718         this->insert(term_type(Cf(1), typename term_type::key_type{1}));
719     }
720     template <typename T = Key,
721               typename std::enable_if<detail::key_has_linarg<T>::value && has_safe_cast<integer, Cf>::value, int>::type
722               = 0>
integral_combination() const723     std::map<std::string, integer> integral_combination() const
724     {
725         try {
726             std::map<std::string, integer> retval;
727             for (auto it = this->m_container.begin(); it != this->m_container.end(); ++it) {
728                 const std::string lin_arg = it->m_key.linear_argument(this->m_symbol_set);
729                 piranha_assert(retval.find(lin_arg) == retval.end());
730                 retval[lin_arg] = safe_cast<integer>(it->m_cf);
731             }
732             return retval;
733         } catch (const std::invalid_argument &) {
734             // NOTE: this currently catches failures both in lin_arg and safe_cast, as safe_cast_failure
735             // inherits from std::invalid_argument.
736             piranha_throw(std::invalid_argument, "polynomial is not an integral linear combination");
737         }
738     }
739     template <
740         typename T = Key,
741         typename std::enable_if<!detail::key_has_linarg<T>::value || !has_safe_cast<integer, Cf>::value, int>::type = 0>
integral_combination() const742     std::map<std::string, integer> integral_combination() const
743     {
744         piranha_throw(std::invalid_argument,
745                       "the polynomial type does not support the extraction of a linear combination");
746     }
747     // Integration utils.
748     // Empty for SFINAE.
749     template <typename T, typename = void>
750     struct integrate_type_ {
751     };
752     // The type resulting from the integration of the key of series T.
753     template <typename T>
754     using key_integrate_type
755         = decltype(std::declval<const typename T::term_type::key_type &>()
756                        .integrate(std::declval<const symbol &>(), std::declval<const symbol_set &>())
757                        .first);
758     // Basic integration requirements for series T, to be satisfied both when the coefficient is integrable
759     // and when it is not. ResT is the type of the result of the integration.
760     template <typename T, typename ResT>
761     using basic_integrate_requirements = typename std::enable_if<
762         // Coefficient differentiable, and can call is_zero on the result.
763         has_is_zero<decltype(math::partial(std::declval<const typename T::term_type::cf_type &>(),
764                                            std::declval<const std::string &>()))>::value
765         &&
766         // The key is integrable.
767         detail::true_tt<key_integrate_type<T>>::value &&
768         // The result needs to be addable in-place.
769         is_addable_in_place<ResT>::value &&
770         // It also needs to be ctible from zero.
771         std::is_constructible<ResT, int>::value>::type;
772     // Non-integrable coefficient.
773     template <typename T>
774     using nic_res_type = decltype((std::declval<const T &>() * std::declval<const typename T::term_type::cf_type &>())
775                                   / std::declval<const key_integrate_type<T> &>());
776     template <typename T>
777     struct integrate_type_<T, typename std::
778                                   enable_if<!is_integrable<typename T::term_type::cf_type>::value
779                                             && detail::true_tt<basic_integrate_requirements<T, nic_res_type<T>>>::
780                                                    value>::type> {
781         using type = nic_res_type<T>;
782     };
783     // Integrable coefficient.
784     // The type resulting from the differentiation of the key of series T.
785     template <typename T>
786     using key_partial_type
787         = decltype(std::declval<const typename T::term_type::key_type &>()
788                        .partial(std::declval<const symbol_set::positions &>(), std::declval<const symbol_set &>())
789                        .first);
790     // Type resulting from the integration of the coefficient.
791     template <typename T>
792     using i_cf_type = decltype(
793         math::integrate(std::declval<const typename T::term_type::cf_type &>(), std::declval<const std::string &>()));
794     // Type above, multiplied by the type coming out of the derivative of the key.
795     template <typename T>
796     using i_cf_type_p = decltype(std::declval<const i_cf_type<T> &>() * std::declval<const key_partial_type<T> &>());
797     // Final series type.
798     template <typename T>
799     using ic_res_type = decltype(std::declval<const i_cf_type_p<T> &>() * std::declval<const T &>());
800     template <typename T>
801     struct integrate_type_<T,
802                            typename std::
803                                enable_if<is_integrable<typename T::term_type::cf_type>::value
804                                          && detail::true_tt<basic_integrate_requirements<T, ic_res_type<T>>>::value &&
805                                          // We need to be able to add the non-integrable type.
806                                          is_addable_in_place<ic_res_type<T>, nic_res_type<T>>::value &&
807                                          // We need to be able to compute the partial degree and cast it to integer.
808                                          has_safe_cast<integer,
809                                                        decltype(
810                                                            std::declval<const typename T::term_type::key_type &>()
811                                                                .degree(std::declval<const symbol_set::positions &>(),
812                                                                        std::declval<const symbol_set &>()))>::value
813                                          &&
814                                          // This is required in the initialisation of the return value.
815                                          std::is_constructible<i_cf_type_p<T>, i_cf_type<T>>::value &&
816                                          // We need to be able to assign the integrated coefficient times key partial.
817                                          std::is_assignable<i_cf_type_p<T> &, i_cf_type_p<T>>::value &&
818                                          // Needs math::negate().
819                                          has_negate<i_cf_type_p<T>>::value>::type> {
820         using type = ic_res_type<T>;
821     };
822     // Final typedef.
823     template <typename T>
824     using integrate_type = typename std::enable_if<is_returnable<typename integrate_type_<T>::type>::value,
825                                                    typename integrate_type_<T>::type>::type;
826     // Integration with integrable coefficient.
827     template <typename T = polynomial>
integrate_impl(const symbol & s,const typename base::term_type & term,const std::true_type &) const828     integrate_type<T> integrate_impl(const symbol &s, const typename base::term_type &term,
829                                      const std::true_type &) const
830     {
831         typedef typename base::term_type term_type;
832         typedef typename term_type::cf_type cf_type;
833         typedef typename term_type::key_type key_type;
834         // Get the partial degree of the monomial in integral form.
835         integer degree;
836         const symbol_set::positions pos(this->m_symbol_set, symbol_set{s});
837         try {
838             degree = safe_cast<integer>(term.m_key.degree(pos, this->m_symbol_set));
839         } catch (const safe_cast_failure &) {
840             piranha_throw(std::invalid_argument,
841                           "unable to perform polynomial integration: cannot extract the integral form of an exponent");
842         }
843         // If the degree is negative, integration by parts won't terminate.
844         if (degree.sign() < 0) {
845             piranha_throw(std::invalid_argument,
846                           "unable to perform polynomial integration: negative integral exponent");
847         }
848         polynomial tmp;
849         tmp.set_symbol_set(this->m_symbol_set);
850         key_type tmp_key = term.m_key;
851         tmp.insert(term_type(cf_type(1), tmp_key));
852         i_cf_type_p<T> i_cf(math::integrate(term.m_cf, s.get_name()));
853         integrate_type<T> retval(i_cf * tmp);
854         for (integer i(1); i <= degree; ++i) {
855             // Update coefficient and key. These variables are persistent across loop iterations.
856             auto partial_key = tmp_key.partial(pos, this->m_symbol_set);
857             i_cf = math::integrate(i_cf, s.get_name()) * std::move(partial_key.first);
858             // Account for (-1)**i.
859             math::negate(i_cf);
860             // Build the other factor from the derivative of the monomial.
861             tmp = polynomial{};
862             tmp.set_symbol_set(this->m_symbol_set);
863             tmp_key = std::move(partial_key.second);
864             // NOTE: don't move tmp_key, as it needs to hold a valid value
865             // for the next loop iteration.
866             tmp.insert(term_type(cf_type(1), tmp_key));
867             retval += i_cf * tmp;
868         }
869         return retval;
870     }
871     // Integration with non-integrable coefficient.
872     template <typename T = polynomial>
integrate_impl(const symbol &,const typename base::term_type &,const std::false_type &) const873     integrate_type<T> integrate_impl(const symbol &, const typename base::term_type &, const std::false_type &) const
874     {
875         piranha_throw(std::invalid_argument,
876                       "unable to perform polynomial integration: coefficient type is not integrable");
877     }
878     // Template alias for use in pow() overload. Will check via SFINAE that the base pow() method can be called with
879     // argument T and that exponentiation of key type is legal.
880     template <typename T>
881     using key_pow_t
882         = decltype(std::declval<Key const &>().pow(std::declval<const T &>(), std::declval<const symbol_set &>()));
883     template <typename T>
884     using pow_ret_type = enable_if_t<is_detected<key_pow_t, T>::value,
885                                      decltype(std::declval<series<Cf, Key, polynomial<Cf, Key>> const &>().pow(
886                                          std::declval<const T &>()))>;
887     // Invert utils.
888     template <typename Series>
889     using inverse_type = decltype(std::declval<const Series &>().pow(-1));
890     // Auto-truncation machinery.
891     // The degree and partial degree types, detected via math::degree().
892     template <typename T>
893     using degree_type = decltype(math::degree(std::declval<const T &>()));
894     template <typename T>
895     using pdegree_type
896         = decltype(math::degree(std::declval<const T &>(), std::declval<const std::vector<std::string> &>()));
897     // Enablers for auto-truncation: degree and partial degree must be the same, series must support
898     // math::truncate_degree(), degree type must be subtractable and yield the same type.
899     template <typename T>
900     using at_degree_enabler =
901         typename std::enable_if<std::is_same<degree_type<T>, pdegree_type<T>>::value
902                                     && has_truncate_degree<T, degree_type<T>>::value
903                                     && std::is_same<decltype(std::declval<const degree_type<T> &>()
904                                                              - std::declval<const degree_type<T> &>()),
905                                                     degree_type<T>>::value
906                                     && is_equality_comparable<degree_type<T>>::value,
907                                 int>::type;
908     // For the setter, we need the above plus we need to be able to convert safely U to the degree type.
909     template <typename T, typename U>
910     using at_degree_set_enabler =
911         typename std::enable_if<detail::true_tt<at_degree_enabler<T>>::value && has_safe_cast<degree_type<T>, U>::value,
912                                 int>::type;
913     // This needs to be separate from the other static inits because we don't have anything to init
914     // if the series does not support degree computation.
915     // NOTE: here the important thing is that this method does not return the same object for different series types,
916     // as the intent of the truncation mechanism is that each polynomial type has its own settings.
917     // We need to keep this in mind if we need static resources that must be unique for the series type, sometimes
918     // adding the Derived series as template argument in a toolbox might actually be necessary because of this. Note
919     // that, contrary to the, e.g., custom derivatives map in series.hpp here we don't care about the type of T - we
920     // just need to be able to extract the term type from it.
921     template <typename T = polynomial>
get_at_degree_max()922     static degree_type<T> &get_at_degree_max()
923     {
924         // Init to zero for peace of mind - though this should never be accessed
925         // if the auto-truncation is not used.
926         static degree_type<T> at_degree_max(0);
927         return at_degree_max;
928     }
929     // Enabler for string construction.
930     template <typename Str>
931     using str_enabler =
932         typename std::enable_if<std::is_same<typename std::decay<Str>::type, std::string>::value
933                                     || std::is_same<typename std::decay<Str>::type, char *>::value
934                                     || std::is_same<typename std::decay<Str>::type, const char *>::value,
935                                 int>::type;
936     // Implementation of find_cf().
937     template <typename T>
938     using find_cf_enabler =
939         typename std::enable_if<std::is_constructible<
940                                     typename base::term_type::key_type, decltype(std::begin(std::declval<const T &>())),
941                                     decltype(std::end(std::declval<const T &>())), const symbol_set &>::value
942                                     && has_begin_end<const T>::value,
943                                 int>::type;
944     template <typename T>
945     using find_cf_init_list_enabler = find_cf_enabler<std::initializer_list<T>>;
946     template <typename Iterator>
find_cf_impl(Iterator begin,Iterator end) const947     Cf find_cf_impl(Iterator begin, Iterator end) const
948     {
949         typename base::term_type tmp_term{Cf(0), Key(begin, end, this->m_symbol_set)};
950         auto it = this->m_container.find(tmp_term);
951         if (it == this->m_container.end()) {
952             return Cf(0);
953         }
954         return it->m_cf;
955     }
956     // Division utilities.
957     // Couple of handy aliases.
958     template <typename T>
959     using cf_t = typename T::term_type::cf_type;
960     template <typename T>
961     using key_t = typename T::term_type::key_type;
962     template <typename T>
963     using expo_t = typename T::term_type::key_type::value_type;
964     // Multiply polynomial by a term. Preconditions:
965     // - p is not zero,
966     // - the coefficient of the term is not zero.
967     // Type requirements:
968     // - cf type supports multiplication,
969     // - key type supports multiply.
970     template <typename PType>
term_mult(const typename PType::term_type & t,const PType & p)971     static PType term_mult(const typename PType::term_type &t, const PType &p)
972     {
973         using term_type = typename PType::term_type;
974         using key_type = typename term_type::key_type;
975         piranha_assert(p.size() != 0u);
976         piranha_assert(!math::is_zero(t.m_cf));
977         // Initialise the return value.
978         PType retval;
979         retval.set_symbol_set(p.get_symbol_set());
980         // Prepare an adequate number of buckets (same as input poly).
981         retval._container().rehash(p._container().bucket_count());
982         // Go with the loop.
983         key_type tmp_key;
984         const auto it_f = p._container().end();
985         for (auto it = p._container().begin(); it != it_f; ++it) {
986             key_type::multiply(tmp_key, t.m_key, it->m_key, p.get_symbol_set());
987             piranha_assert(!math::is_zero(t.m_cf * it->m_cf));
988             // NOTE: here we could use the unique_insert machinery
989             // to improve performance.
990             retval.insert(term_type{t.m_cf * it->m_cf, tmp_key});
991         }
992         return retval;
993     }
994     // Univariate euclidean division.
995     template <bool CheckExpos, typename T>
udivrem_impl(const T & n,const T & d)996     static std::pair<polynomial, polynomial> udivrem_impl(const T &n, const T &d)
997     {
998         using term_type = typename base::term_type;
999         using cf_type = typename term_type::cf_type;
1000         using key_type = typename term_type::key_type;
1001         // Only univariate polynomials are allowed.
1002         if (unlikely(n.get_symbol_set().size() != 1u || n.get_symbol_set() != d.get_symbol_set())) {
1003             piranha_throw(std::invalid_argument, "only univariate polynomials in the same variable "
1004                                                  "are allowed in the polynomial division with remainder routine");
1005         }
1006         if (unlikely(d.size() == 0u)) {
1007             piranha_throw(zero_division_error, "univariate polynomial division by zero");
1008         }
1009         // Cache the symbol set for brevity.
1010         const auto &args = n.get_symbol_set();
1011         // If the numerator is zero, just return two empty polys.
1012         if (n.size() == 0u) {
1013             polynomial q, r;
1014             q.set_symbol_set(args);
1015             r.set_symbol_set(args);
1016             return std::make_pair(std::move(q), std::move(r));
1017         }
1018         // Check negative exponents, if requested.
1019         if (CheckExpos) {
1020             // Check if there are negative exponents.
1021             detail::poly_expo_checker(n);
1022             detail::poly_expo_checker(d);
1023         }
1024         // Initialisation: quotient is empty, remainder is the numerator.
1025         polynomial q, r(n);
1026         q.set_symbol_set(args);
1027         // Leading term of the denominator, always the same.
1028         const auto lden = detail::poly_lterm(d);
1029         piranha_assert(!math::is_zero(lden->m_cf));
1030         // Temp cf and key used for computations in the loop.
1031         cf_type tmp_cf;
1032         key_type tmp_key;
1033         while (true) {
1034             if (r.size() == 0u) {
1035                 break;
1036             }
1037             // Leading term of the remainder.
1038             const auto lr = poly_lterm(r);
1039             if (lr->m_key < lden->m_key) {
1040                 break;
1041             }
1042             // NOTE: we want to check that the division is exact here,
1043             // and throw if this is not the case.
1044             // NOTE: this should be made configurable to optimise the case in which we
1045             // know the division will be exact (e.g., in GCD).
1046             math::divexact(tmp_cf, lr->m_cf, lden->m_cf);
1047             key_type::divide(tmp_key, lr->m_key, lden->m_key, args);
1048             term_type t{tmp_cf, tmp_key};
1049             // NOTE: here we are basically progressively removing terms from r until
1050             // it gets to zero. This sounds exactly like the kind of situation in which
1051             // the iteration over the hash table could become really slow. This might
1052             // matter when we need to re-determine the leading term of r above.
1053             // We should consider adding a re-hashing every time we do a +/- operation on
1054             // a series and the load factor gets below a certain threshold. We should also
1055             // review uses of the insert() method of series to spot other possible
1056             // places where this could be a problem (although once we have a good behaviour
1057             // for +/- we should be mostly good - multiplication should already be ok).
1058             r -= term_mult(t, d);
1059             q.insert(std::move(t));
1060         }
1061         return std::make_pair(std::move(q), std::move(r));
1062     }
1063     // Multivariate exact division.
1064     template <typename T>
division_impl(const T & n,const T & d)1065     static T division_impl(const T &n, const T &d)
1066     {
1067         static_assert(std::is_same<T, polynomial>::value, "Invalid type.");
1068         using term_type = typename polynomial::term_type;
1069         using expo_type = typename term_type::key_type::value_type;
1070         // Cache it.
1071         const auto &args = n.get_symbol_set();
1072         // Univariate case.
1073         if (args.size() == 1u) {
1074             // NOTE: udivrem contains the check for negative exponents, zero n/d, etc.
1075             auto res = polynomial::udivrem(n, d);
1076             if (res.second.size() != 0u) {
1077                 piranha_throw(math::inexact_division, );
1078             }
1079             return std::move(res.first);
1080         }
1081         // Multivariate or zerovariate case.
1082         // NOTE: here we need to check a few things as the mapping to univariate has certain prereqs.
1083         // First we check if n or d are zero.
1084         if (d.size() == 0u) {
1085             piranha_throw(zero_division_error, "polynomial division by zero");
1086         }
1087         if (n.size() == 0u) {
1088             polynomial retval;
1089             retval.set_symbol_set(args);
1090             return retval;
1091         }
1092         // Deal with the case in which the number of arguments is zero. We need to do it here as the mapping
1093         // requires at least 1 var.
1094         if (args.size() == 0u) {
1095             piranha_assert(n.size() == 1u && d.size() == 1u);
1096             piranha_assert(n.is_single_coefficient() && d.is_single_coefficient());
1097             polynomial retval;
1098             retval.set_symbol_set(args);
1099             Cf tmp_cf;
1100             math::divexact(tmp_cf, n._container().begin()->m_cf, d._container().begin()->m_cf);
1101             retval.insert(term_type{std::move(tmp_cf), Key(args)});
1102             return retval;
1103         }
1104         // Map to univariate.
1105         auto umap = detail::poly_to_univariate(n, d);
1106         // Do the univariate division.
1107         // NOTE: need to call udivrem_impl from the mapped univariate type.
1108         // NOTE: the check for negative exponents is done in the mapping.
1109         auto ures = std::get<0u>(umap).template udivrem_impl<false>(std::get<0u>(umap), std::get<1u>(umap));
1110         // Check if the division was exact.
1111         if (ures.second.size() != 0u) {
1112             piranha_throw(math::inexact_division, );
1113         }
1114         // Map back to multivariate.
1115         auto retval = detail::poly_from_univariate<Key>(ures.first, std::get<2u>(umap), args);
1116         // Final check: for each variable, the degree of retval + degree of d must be equal to the degree of n.
1117         auto degree_vector_extractor = [&args](const polynomial &p) -> std::vector<expo_type> {
1118             std::vector<expo_type> ret, tmp;
1119             // Init with the first term of the polynomial.
1120             piranha_assert(p.size() != 0u);
1121             auto it = p._container().begin();
1122             it->m_key.extract_exponents(ret, args);
1123             ++it;
1124             for (; it != p._container().end(); ++it) {
1125                 it->m_key.extract_exponents(tmp, args);
1126                 for (decltype(tmp.size()) i = 0u; i < tmp.size(); ++i) {
1127                     if (tmp[i] > ret[i]) {
1128                         ret[i] = tmp[i];
1129                     }
1130                 }
1131             }
1132             return ret;
1133         };
1134         const auto n_dv = degree_vector_extractor(n), d_dv = degree_vector_extractor(d),
1135                    r_dv = degree_vector_extractor(retval);
1136         for (decltype(n_dv.size()) i = 0u; i < n_dv.size(); ++i) {
1137             if (integer(n_dv[i]) != integer(d_dv[i]) + integer(r_dv[i])) {
1138                 piranha_throw(math::inexact_division, );
1139             }
1140         }
1141         return retval;
1142     }
1143     // Enabler for exact poly division.
1144     // NOTE: the further constraining here that T must be polynomial is to work around a GCC < 5 (?) bug,
1145     // in which template operators from other template instantiations of polynomial<> are considered. We can
1146     // remove this once we bump the minimum version of GCC required.
1147     template <typename T, typename U = T>
1148     using poly_div_enabler
1149         = enable_if_t<std::is_same<decay_t<T>, polynomial>::value, detail::ptd::enabler<decay_t<T>, decay_t<U>>>;
1150     // Enabler for in-place division.
1151     template <typename T, typename U>
1152     using poly_in_place_div_enabler
1153         = enable_if_t<detail::true_tt<poly_div_enabler<T, U>>::value && !std::is_const<T>::value, int>;
1154     // Join enabler.
1155     template <typename T>
1156     using join_enabler =
1157         typename std::enable_if<std::is_base_of<detail::polynomial_tag, cf_t<T>>::value
1158                                     && std::is_same<Key, key_t<T>>::value
1159                                     && detail::true_tt<decltype(std::declval<cf_t<T> &>()
1160                                                                 += std::declval<const cf_t<T> &>()
1161                                                                    * std::declval<const cf_t<T> &>())>::value,
1162                                 int>::type;
1163     // Content enabler.
1164     template <typename T>
1165     using content_enabler = typename std::enable_if<has_gcd3<cf_t<T>>::value, int>::type;
1166     // Primitive part enabler.
1167     template <typename T>
1168     using pp_enabler =
1169         typename std::enable_if<detail::true_tt<content_enabler<T>>::value && has_exact_division<cf_t<T>>::value,
1170                                 int>::type;
1171     // Prem enabler.
1172     template <typename T>
1173     using uprem_enabler = typename std::
1174         enable_if<detail::true_tt<poly_div_enabler<T>>::value
1175                       && std::is_same<decltype(math::pow(std::declval<const cf_t<T> &>(), 0u)), cf_t<T>>::value,
1176                   int>::type;
1177     // Enabler for GCD.
1178     template <typename T>
1179     using gcd_enabler = typename std::
1180         enable_if<has_gcd<cf_t<T>>::value && has_gcd3<cf_t<T>>::value
1181                       && std::is_same<decltype(math::pow(std::declval<const cf_t<T> &>(), 0u)), cf_t<T>>::value
1182                       && is_multipliable_in_place<cf_t<T>>::value
1183                       && std::is_same<decltype(std::declval<const cf_t<T> &>() * std::declval<const cf_t<T> &>()),
1184                                       cf_t<T>>::value
1185                       && has_exact_division<cf_t<T>>::value && std::is_copy_assignable<cf_t<T>>::value
1186                       && detail::true_tt<content_enabler<T>>::value && detail::true_tt<poly_div_enabler<T>>::value,
1187                   int>::type;
1188     // Height.
1189     template <typename T>
1190     using height_type_ = decltype(math::abs(std::declval<const cf_t<T> &>()));
1191     template <typename T>
1192     using height_type = typename std::enable_if<std::is_constructible<height_type_<T>, int>::value
1193                                                     && is_less_than_comparable<height_type_<T>>::value
1194                                                     && std::is_move_assignable<height_type_<T>>::value
1195                                                     && (std::is_copy_constructible<height_type_<T>>::value
1196                                                         || std::is_move_constructible<height_type_<T>>::value),
1197                                                 height_type_<T>>::type;
1198     // Wrapper around heuristic GCD. It will return false,tuple if the calculation went well,
1199     // true,tuple otherwise. If run on polynomials with non-integer coefficients, it will throw
1200     // if the requested algorithm is specifically the heuristic one.
1201     template <typename T, typename std::enable_if<detail::is_mp_integer<cf_t<T>>::value, int>::type = 0>
try_gcdheu(const T & a,const T & b,polynomial_gcd_algorithm)1202     static std::pair<bool, std::tuple<T, T, T>> try_gcdheu(const T &a, const T &b, polynomial_gcd_algorithm)
1203     {
1204         try {
1205             return detail::gcdheu_geddes(a, b);
1206         } catch (const detail::gcdheu_failure &) {
1207         }
1208         return std::make_pair(true, std::tuple<T, T, T>{});
1209     }
1210     template <typename T, typename std::enable_if<!detail::is_mp_integer<cf_t<T>>::value, int>::type = 0>
try_gcdheu(const T &,const T &,polynomial_gcd_algorithm algo)1211     static std::pair<bool, std::tuple<T, T, T>> try_gcdheu(const T &, const T &, polynomial_gcd_algorithm algo)
1212     {
1213         // The idea here is that this is a different kind of failure from the one we throw in the main gcd()
1214         // function, and we want to be able to discriminate the two.
1215         if (algo == polynomial_gcd_algorithm::heuristic) {
1216             piranha_throw(std::runtime_error, "the heuristic polynomial GCD algorithm was explicitly selected, "
1217                                               "but it cannot be applied to non-integral coefficients");
1218         }
1219         return std::make_pair(true, std::tuple<T, T, T>{});
1220     }
1221     // This is a wrapper to compute and return the cofactors, together with the GCD, when the PRS algorithm
1222     // is used (the gcdheu algorithm already computes the cofactors).
1223     template <typename T, typename U>
wrap_gcd_cofactors(U && gcd,const T & a,const T & b,bool with_cofactors)1224     static std::tuple<T, T, T> wrap_gcd_cofactors(U &&gcd, const T &a, const T &b, bool with_cofactors)
1225     {
1226         if (with_cofactors) {
1227             if (math::is_zero(gcd)) {
1228                 // In this case, a tuple of zeroes will be returned.
1229                 return std::make_tuple(std::forward<U>(gcd), T{}, T{});
1230             } else {
1231                 return std::make_tuple(std::forward<U>(gcd), a / gcd, b / gcd);
1232             }
1233         } else {
1234             return std::make_tuple(std::forward<U>(gcd), T{}, T{});
1235         }
1236     }
1237     // Enabler for untruncated multiplication.
1238     template <typename T>
1239     using um_enabler =
1240         typename std::enable_if<std::is_same<T, decltype(std::declval<const T &>() * std::declval<const T &>())>::value,
1241                                 int>::type;
1242     // Enabler for truncated multiplication.
1243     template <typename T, typename U>
1244     using tm_enabler =
1245         typename std::enable_if<std::is_same<T, decltype(std::declval<const T &>() * std::declval<const T &>())>::value
1246                                     && has_safe_cast<degree_type<T>, U>::value
1247                                     && detail::true_tt<at_degree_enabler<T>>::value,
1248                                 int>::type;
1249     // Common bits for truncated/untruncated multiplication. Will do the usual merging of the symbol sets
1250     // before calling the runner functor, which performs the actual multiplication.
1251     template <typename Functor>
um_tm_implementation(const polynomial & p1,const polynomial & p2,const Functor & runner)1252     static polynomial um_tm_implementation(const polynomial &p1, const polynomial &p2, const Functor &runner)
1253     {
1254         const auto &ss1 = p1.get_symbol_set(), &ss2 = p2.get_symbol_set();
1255         if (ss1 == ss2) {
1256             return runner(p1, p2);
1257         }
1258         // If the symbol sets are not the same, we need to merge them and make
1259         // copies of the original operands as needed.
1260         auto merge = ss1.merge(ss2);
1261         const bool need_copy_1 = (merge != ss1), need_copy_2 = (merge != ss2);
1262         if (need_copy_1) {
1263             polynomial copy_1(p1.extend_symbol_set(merge));
1264             if (need_copy_2) {
1265                 polynomial copy_2(p2.extend_symbol_set(merge));
1266                 return runner(copy_1, copy_2);
1267             }
1268             return runner(copy_1, p2);
1269         } else {
1270             polynomial copy_2(p2.extend_symbol_set(merge));
1271             return runner(p1, copy_2);
1272         }
1273     }
1274     // Helper function to clear the pow cache when a new auto truncation limit is set.
1275     template <typename T>
truncation_clear_pow_cache(int mode,const T & max_degree,const std::vector<std::string> & names)1276     static void truncation_clear_pow_cache(int mode, const T &max_degree, const std::vector<std::string> &names)
1277     {
1278         // The pow cache is cleared only if we are actually changing the truncation settings.
1279         if (s_at_degree_mode != mode || get_at_degree_max() != max_degree || names != s_at_degree_names) {
1280             polynomial::clear_pow_cache();
1281         }
1282     }
1283 
1284 public:
1285     /// Series rebind alias.
1286     template <typename Cf2>
1287     using rebind = polynomial<Cf2, Key>;
1288     /// Defaulted default constructor.
1289     /**
1290      * Will construct a polynomial with zero terms.
1291      */
1292     polynomial() = default;
1293     /// Defaulted copy constructor.
1294     polynomial(const polynomial &) = default;
1295     /// Defaulted move constructor.
1296     polynomial(polynomial &&) = default;
1297     /// Constructor from symbol name.
1298     /**
1299      * \note
1300      * This template constructor is enabled only if the decay type of \p Str is a C or C++ string.
1301      *
1302      * Will construct a univariate polynomial made of a single term with unitary coefficient and exponent, representing
1303      * the symbolic variable \p name. The type of \p name must be a string type (either C or C++).
1304      *
1305      * @param name name of the symbolic variable that the polynomial will represent.
1306      *
1307      * @throws unspecified any exception thrown by:
1308      * - piranha::symbol_set::add(),
1309      * - the constructor of piranha::symbol from string,
1310      * - the invoked constructor of the coefficient type,
1311      * - the invoked constructor of the key type,
1312      * - the constructor of the term type from coefficient and key,
1313      * - piranha::series::insert().
1314      */
1315     template <typename Str, str_enabler<Str> = 0>
polynomial(Str && name)1316     explicit polynomial(Str &&name) : base()
1317     {
1318         construct_from_string(std::forward<Str>(name));
1319     }
PIRANHA_FORWARDING_CTOR(polynomial,base)1320     PIRANHA_FORWARDING_CTOR(polynomial, base)
1321     /// Trivial destructor.
1322     ~polynomial()
1323     {
1324         PIRANHA_TT_CHECK(is_cf, polynomial);
1325         PIRANHA_TT_CHECK(is_series, polynomial);
1326     }
1327     /// Copy assignment operator.
1328     /**
1329      * @param other the assignment argument.
1330      *
1331      * @return a reference to \p this.
1332      *
1333      * @throws unspecified any exception thrown by the assignment operator of the base class.
1334      */
1335     polynomial &operator=(const polynomial &other) = default;
1336     /// Move assignment operator.
1337     /**
1338      * @param other the assignment argument.
1339      *
1340      * @return a reference to \p this.
1341      */
1342     polynomial &operator=(polynomial &&other) = default;
PIRANHA_FORWARDING_ASSIGNMENT(polynomial,base)1343     PIRANHA_FORWARDING_ASSIGNMENT(polynomial, base)
1344     /// Override default exponentiation method.
1345     /**
1346      * \note
1347      * This method is enabled only if piranha::series::pow() can be called with exponent \p x
1348      * and the key type can be raised to the power of \p x via its exponentiation method.
1349      *
1350      * This exponentiation override will check if the polynomial consists of a single-term with non-unitary
1351      * key. In that case, the return polynomial will consist of a single term with coefficient computed via
1352      * piranha::math::pow() and key computed via the monomial exponentiation method. Otherwise, the base
1353      * (i.e., default) exponentiation method will be used.
1354      *
1355      * @param x exponent.
1356      *
1357      * @return \p this to the power of \p x.
1358      *
1359      * @throws unspecified any exception thrown by:
1360      * - the <tt>is_unitary()</tt> and exponentiation methods of the key type,
1361      * - piranha::math::pow(),
1362      * - construction of coefficient, key and term,
1363      * - the copy assignment operator of piranha::symbol_set,
1364      * - piranha::series::insert() and piranha::series::pow().
1365      */
1366     template <typename T>
1367     pow_ret_type<T> pow(const T &x) const
1368     {
1369         using ret_type = pow_ret_type<T>;
1370         typedef typename ret_type::term_type term_type;
1371         typedef typename term_type::cf_type cf_type;
1372         typedef typename term_type::key_type key_type;
1373         if (this->size() == 1u && !this->m_container.begin()->m_key.is_unitary(this->m_symbol_set)) {
1374             cf_type cf(math::pow(this->m_container.begin()->m_cf, x));
1375             key_type key(this->m_container.begin()->m_key.pow(x, this->m_symbol_set));
1376             ret_type retval;
1377             retval.set_symbol_set(this->m_symbol_set);
1378             retval.insert(term_type(std::move(cf), std::move(key)));
1379             return retval;
1380         }
1381         return static_cast<series<Cf, Key, polynomial<Cf, Key>> const *>(this)->pow(x);
1382     }
1383     /// Inversion.
1384     /**
1385      * \note
1386      * This method is enabled only if <tt>piranha::polynomial::pow(-1)</tt> is a well-formed
1387      * expression.
1388      *
1389      * @return the calling polynomial raised to -1 using piranha::polynomial::pow().
1390      *
1391      * @throws unspecified any exception thrown by piranha::polynomial::pow().
1392      */
1393     template <typename Series = polynomial>
invert() const1394     inverse_type<Series> invert() const
1395     {
1396         return this->pow(-1);
1397     }
1398     /// Integration.
1399     /**
1400      * \note
1401      * This method is enabled only if the algorithm described below is supported by all the involved types.
1402      *
1403      * This method will attempt to compute the antiderivative of the polynomial term by term. If the term's coefficient
1404      * does not depend on
1405      * the integration variable, the result will be calculated via the integration of the corresponding monomial.
1406      * Integration with respect to a variable appearing to the power of -1 will fail.
1407      *
1408      * Otherwise, a strategy of integration by parts is attempted, its success depending on the integrability
1409      * of the coefficient and on the value of the exponent of the integration variable. The integration will
1410      * fail if the exponent is negative or non-integral.
1411      *
1412      * @param name integration variable.
1413      *
1414      * @return the antiderivative of \p this with respect to \p name.
1415      *
1416      * @throws std::invalid_argument if the integration procedure fails.
1417      * @throws unspecified any exception thrown by:
1418      * - piranha::symbol construction,
1419      * - piranha::math::partial(), piranha::math::is_zero(), piranha::math::integrate(), piranha::safe_cast()
1420      *   and piranha::math::negate(),
1421      * - piranha::symbol_set::add(),
1422      * - term construction,
1423      * - coefficient construction, assignment and arithmetics,
1424      * - integration, construction, assignment, differentiation and degree querying methods of the key type,
1425      * - insert(),
1426      * - series arithmetics.
1427      */
1428     template <typename T = polynomial>
integrate(const std::string & name) const1429     integrate_type<T> integrate(const std::string &name) const
1430     {
1431         typedef typename base::term_type term_type;
1432         typedef typename term_type::cf_type cf_type;
1433         // Turn name into symbol.
1434         const symbol s(name);
1435         integrate_type<T> retval(0);
1436         const auto it_f = this->m_container.end();
1437         for (auto it = this->m_container.begin(); it != it_f; ++it) {
1438             // If the derivative of the coefficient is null, we just need to deal with
1439             // the integration of the key.
1440             if (math::is_zero(math::partial(it->m_cf, name))) {
1441                 polynomial tmp;
1442                 symbol_set sset = this->m_symbol_set;
1443                 // If the variable does not exist in the arguments set, add it.
1444                 if (!std::binary_search(sset.begin(), sset.end(), s)) {
1445                     sset.add(s);
1446                 }
1447                 tmp.set_symbol_set(sset);
1448                 auto key_int = it->m_key.integrate(s, this->m_symbol_set);
1449                 tmp.insert(term_type(cf_type(1), std::move(key_int.second)));
1450                 retval += (tmp * it->m_cf) / key_int.first;
1451             } else {
1452                 retval += integrate_impl(s, *it, std::integral_constant<bool, is_integrable<cf_type>::value>());
1453             }
1454         }
1455         return retval;
1456     }
1457     /// Set total-degree-based auto-truncation.
1458     /**
1459      * \note
1460      * This method is available only if the requisites outlined in piranha::polynomial are satisfied
1461      * and if \p U can be safely cast to the degree type.
1462      *
1463      * Setup the degree-based auto-truncation mechanism to truncate according to the total maximum degree.
1464      * If the new auto truncation settings are different from the currently active ones, the natural power cache
1465      * defined in piranha::series will be cleared.
1466      *
1467      * @param max_degree maximum total degree that will be retained during automatic truncation.
1468      *
1469      * @throws unspecified any exception thrown by:
1470      * - threading primitives,
1471      * - piranha::safe_cast(),
1472      * - the constructor of the degree type.
1473      */
1474     template <typename U, typename T = polynomial, at_degree_set_enabler<T, U> = 0>
set_auto_truncate_degree(const U & max_degree)1475     static void set_auto_truncate_degree(const U &max_degree)
1476     {
1477         // Init out for exception safety.
1478         auto new_degree = safe_cast<degree_type<T>>(max_degree);
1479         // Initialisation of function-level statics is thread-safe, no need to lock. We get
1480         // a ref before the lock because the initialisation of the static could throw in principle,
1481         // and we want the section after the lock to be exception-free.
1482         auto &at_dm = get_at_degree_max();
1483         std::lock_guard<std::mutex> lock(s_at_degree_mutex);
1484         // NOTE: here in principle there could be an exception thrown as a consequence of the degree comparison.
1485         // This is not a problem as at this stage no setting has been modified.
1486         truncation_clear_pow_cache(1, new_degree, {});
1487         s_at_degree_mode = 1;
1488         // NOTE: the degree type of polys satisfies is_container_element, so move assignment is noexcept.
1489         at_dm = std::move(new_degree);
1490         // This should not throw (a vector of strings, destructors and deallocation should be noexcept).
1491         s_at_degree_names.clear();
1492     }
1493     /// Set partial-degree-based auto-truncation.
1494     /**
1495      * \note
1496      * This method is available only if the requisites outlined in piranha::polynomial are satisfied
1497      * and if \p U can be safely cast to the degree type.
1498      *
1499      * Setup the degree-based auto-truncation mechanism to truncate according to the partial degree.
1500      * If the new auto truncation settings are different from the currently active ones, the natural power cache
1501      * defined in piranha::series will be cleared.
1502      *
1503      * @param max_degree maximum partial degree that will be retained during automatic truncation.
1504      * @param names names of the variables that will be considered during the computation of the
1505      * partial degree.
1506      *
1507      * @throws unspecified any exception thrown by:
1508      * - threading primitives,
1509      * - piranha::safe_cast(),
1510      * - the constructor of the degree type,
1511      * - memory allocation errors in standard containers.
1512      */
1513     template <typename U, typename T = polynomial, at_degree_set_enabler<T, U> = 0>
set_auto_truncate_degree(const U & max_degree,const std::vector<std::string> & names)1514     static void set_auto_truncate_degree(const U &max_degree, const std::vector<std::string> &names)
1515     {
1516         // Copy+move for exception safety.
1517         auto new_degree = safe_cast<degree_type<T>>(max_degree);
1518         auto new_names = names;
1519         auto &at_dm = get_at_degree_max();
1520         std::lock_guard<std::mutex> lock(s_at_degree_mutex);
1521         truncation_clear_pow_cache(2, new_degree, new_names);
1522         s_at_degree_mode = 2;
1523         at_dm = std::move(new_degree);
1524         s_at_degree_names = std::move(new_names);
1525     }
1526     /// Disable degree-based auto-truncation.
1527     /**
1528      * \note
1529      * This method is available only if the requisites outlined in piranha::polynomial are satisfied.
1530      *
1531      * Disable the degree-based auto-truncation mechanism.
1532      *
1533      * @throws unspecified any exception thrown by:
1534      * - threading primitives,
1535      * - the constructor of the degree type,
1536      * - memory allocation errors in standard containers.
1537      */
1538     template <typename T = polynomial, at_degree_enabler<T> = 0>
unset_auto_truncate_degree()1539     static void unset_auto_truncate_degree()
1540     {
1541         degree_type<T> new_degree(0);
1542         auto &at_dm = get_at_degree_max();
1543         std::lock_guard<std::mutex> lock(s_at_degree_mutex);
1544         s_at_degree_mode = 0;
1545         at_dm = std::move(new_degree);
1546         s_at_degree_names.clear();
1547     }
1548     /// Query the status of the degree-based auto-truncation mechanism.
1549     /**
1550      * \note
1551      * This method is available only if the requisites outlined in piranha::polynomial are satisfied.
1552      *
1553      * This method will return a tuple of three elements describing the status of the degree-based auto-truncation
1554      * mechanism.
1555      * The elements of the tuple have the following meaning:
1556      * - truncation mode (0 if disabled, 1 for total-degree truncation and 2 for partial-degree truncation),
1557      * - the maximum degree allowed,
1558      * - the list of names to be considered for partial truncation.
1559      *
1560      * @return a tuple representing the status of the degree-based auto-truncation mechanism.
1561      *
1562      * @throws unspecified any exception thrown by threading primitives or by the involved constructors.
1563      */
1564     template <typename T = polynomial, at_degree_enabler<T> = 0>
get_auto_truncate_degree()1565     static std::tuple<int, degree_type<T>, std::vector<std::string>> get_auto_truncate_degree()
1566     {
1567         std::lock_guard<std::mutex> lock(s_at_degree_mutex);
1568         return std::make_tuple(s_at_degree_mode, get_at_degree_max(), s_at_degree_names);
1569     }
1570     /// Find coefficient.
1571     /**
1572      * \note
1573      * This method is enabled only if:
1574      * - \p T satisfies piranha::has_begin_end,
1575      * - \p Key can be constructed from the begin/end iterators of \p c and a piranha::symbol_set.
1576      *
1577      * This method will first construct a term with zero coefficient and key initialised from the begin/end iterators
1578      * of \p c and the symbol set of \p this, and it will then try to locate the term inside \p this.
1579      * If the term is found, its coefficient will be returned. Otherwise, a coefficient initialised
1580      * from 0 will be returned.
1581      *
1582      * @param c the container that will be used to construct the \p Key to be located.
1583      *
1584      * @returns the coefficient of the term whose \p Key corresponds to \p c if such term exists,
1585      * zero otherwise.
1586      *
1587      * @throws unspecified any exception thrown by:
1588      * - term, coefficient and key construction,
1589      * - piranha::hash_set::find().
1590      */
1591     template <typename T, find_cf_enabler<T> = 0>
find_cf(const T & c) const1592     Cf find_cf(const T &c) const
1593     {
1594         return find_cf_impl(std::begin(c), std::end(c));
1595     }
1596     /// Find coefficient.
1597     /**
1598      * \note
1599      * This method is enabled only if \p Key can be constructed from the begin/end iterators of \p l and a
1600      * piranha::symbol_set.
1601      *
1602      * This method is identical to the other overload with the same name, and it is provided for convenience.
1603      *
1604      * @param l the list that will be used to construct the \p Key to be located.
1605      *
1606      * @returns the coefficient of the term whose \p Key corresponds to \p l if such term exists,
1607      * zero otherwise.
1608      *
1609      * @throws unspecified any exception thrown by the other overload.
1610      */
1611     template <typename T, find_cf_init_list_enabler<T> = 0>
find_cf(std::initializer_list<T> l) const1612     Cf find_cf(std::initializer_list<T> l) const
1613     {
1614         return find_cf_impl(std::begin(l), std::end(l));
1615     }
1616     /// Split polynomial.
1617     /**
1618      * This method will split the first symbolic argument of the polynomial. That is, it will return a univariate
1619      * representation of \p this in which the only symbolic argument is the first symbol in the symbol set of \p this,
1620      * and the coefficients are multivariate polynomials in the remaining variables.
1621      *
1622      * @return a representation of \p this as a univariate polynomial with multivariate polynomial coefficients.
1623      *
1624      * @throws std::invalid_argument if \p this does not have at least 2 symbolic arguments.
1625      * @throws unspecified: any exception thrown by:
1626      * - the <tt>split()</tt> method of the monomial,
1627      * - the construction of piranha::symbol_set,
1628      * - piranha::series::set_symbol_set(),
1629      * - the construction of terms, coefficients and keys,
1630      * - piranha::series::insert().
1631      */
split() const1632     polynomial<polynomial<Cf, Key>, Key> split() const
1633     {
1634         if (unlikely(this->get_symbol_set().size() < 2u)) {
1635             piranha_throw(std::invalid_argument, "a polynomial needs at least 2 arguments in order to be split");
1636         }
1637         using r_term_type = typename polynomial<polynomial<Cf, Key>, Key>::term_type;
1638         using term_type = typename base::term_type;
1639         polynomial<polynomial<Cf, Key>, Key> retval;
1640         retval.set_symbol_set(symbol_set(this->get_symbol_set().begin(), this->get_symbol_set().begin() + 1));
1641         symbol_set tmp_ss(this->get_symbol_set().begin() + 1, this->get_symbol_set().end());
1642         for (const auto &t : this->_container()) {
1643             auto tmp_s = t.m_key.split(this->get_symbol_set());
1644             polynomial tmp_p;
1645             tmp_p.set_symbol_set(tmp_ss);
1646             tmp_p.insert(term_type{t.m_cf, std::move(tmp_s.first)});
1647             retval.insert(r_term_type{std::move(tmp_p), std::move(tmp_s.second)});
1648         }
1649         return retval;
1650     }
1651     /// Join polynomial.
1652     /**
1653      * \note
1654      * This method is enabled only if the coefficient type \p Cf is a polynomial whose key type is \p Key, and which
1655      * supports
1656      * the expression <tt>a += b * c</tt> where \p a, \p b and \p c are all instances of \p Cf).
1657      *
1658      * This method is the opposite of piranha::polynomial::split(): given a polynomial with polynomial coefficients,
1659      * it will produce a multivariate polynomial in which the arguments of \p this and the arguments of the coefficients
1660      * have been joined.
1661      *
1662      * @return the joined counterpart of \p this.
1663      *
1664      * @throws unspecified any exception thrown by:
1665      * - piranha::series::set_symbol_set() and piranha::series::insert(),
1666      * - term construction,
1667      * - arithmetic operations on the coefficient type.
1668      */
1669     template <typename T = polynomial, join_enabler<T> = 0>
join() const1670     Cf join() const
1671     {
1672         // NOTE: here we can improve performance by using
1673         // lower level primitives rather than arithmetic operators.
1674         using cf_term_type = typename Cf::term_type;
1675         Cf retval;
1676         for (const auto &t : this->_container()) {
1677             Cf tmp;
1678             tmp.set_symbol_set(this->get_symbol_set());
1679             tmp.insert(cf_term_type{1, t.m_key});
1680             retval += tmp * t.m_cf;
1681         }
1682         return retval;
1683     }
1684     /// Content.
1685     /**
1686      * \note
1687      * This method is enabled only if the coefficient type satisfies piranha::has_gcd3.
1688      *
1689      * This method will return the GCD of the polynomial's coefficients. If the polynomial
1690      * is empty, zero will be returned.
1691      *
1692      * @return the content of \p this.
1693      *
1694      * @throws unspecified any exception thrown by the polynomial constructor from \p int
1695      * or by piranha::math::gcd3().
1696      */
1697     template <typename T = polynomial, content_enabler<T> = 0>
content() const1698     Cf content() const
1699     {
1700         Cf retval(0);
1701         for (const auto &t : this->_container()) {
1702             math::gcd3(retval, retval, t.m_cf);
1703         }
1704         return retval;
1705     }
1706     /// Primitive part.
1707     /**
1708      * \note
1709      * This method is enabled only if:
1710      * - the method piranha::polynomial::content() is enabled,
1711      * - the polynomial coefficient supports math::divexact().
1712      *
1713      * This method will return \p this divided by its content.
1714      *
1715      * @return the primitive part of \p this.
1716      *
1717      * @throws piranha::zero_division_error if the content is zero.
1718      * @throws unspecified any exception thrown by the division operation or by piranha::polynomial::content().
1719      */
1720     template <typename T = polynomial, pp_enabler<T> = 0>
primitive_part() const1721     polynomial primitive_part() const
1722     {
1723         polynomial retval(*this);
1724         auto c = content();
1725         if (unlikely(math::is_zero(c))) {
1726             piranha_throw(zero_division_error, "the content of the polynomial is zero");
1727         }
1728         detail::poly_exact_cf_div(retval, c);
1729         return retval;
1730     }
1731     /// Univariate polynomial division with remainder.
1732     /**
1733      * \note
1734      * This static method is enabled only if:
1735      * - the polynomial coefficient type \p Cf is multipliable yielding the same type \p Cf, it is multipliable in-place
1736      * and
1737      *   divisible exactly, and it has exact ring operations,
1738      * - the polynomial type is subtractable in-place,
1739      * - the exponent type of the monomial is a C++ integral type or an instance of piranha::mp_integer.
1740      *
1741      * This method will return a pair representing the quotient and the remainder of the division of the univariate
1742      * polynomials
1743      * \p n and \p d. The input polynomials must be univariate in the same variable.
1744      *
1745      * @param n the numerator.
1746      * @param d the denominator.
1747      *
1748      * @return the quotient and remainder of the division of \p n by \p d, represented as a standard pair.
1749      *
1750      * @throws std::invalid_argument if:
1751      * - the input polynomials are not univariate or univariate in different variables, or
1752      * - the low degree of the input polynomials is less than zero.
1753      * @throws piranha::zero_division_error if \p d is empty.
1754      * @throws unspecified any exception thrown by:
1755      * - the public interface of piranha::hash_set and piranha::series,
1756      * - the monomial multiplication and division methods,
1757      * - arithmetic operations on the coefficients and on polynomials,
1758      * - construction of coefficients, monomials, terms and polynomials.
1759      */
1760     template <typename T = polynomial, poly_div_enabler<T> = 0>
udivrem(const polynomial & n,const polynomial & d)1761     static std::pair<polynomial, polynomial> udivrem(const polynomial &n, const polynomial &d)
1762     {
1763         return udivrem_impl<true>(n, d);
1764     }
1765     /// Univariate pseudo-remainder.
1766     /**
1767      * \note
1768      * This static method is enabled only if polynomial division is enabled, and the polynomial
1769      * coefficient type is exponentiable to \p unsigned, yielding the coefficient type as a result.
1770      *
1771      * The pseudo-remainder is defined as:
1772      * \f[
1773      * \mathrm{rem}\left(\mathrm{lc}\left( d \right)^{a-b+1} n,d \right),
1774      * \f]
1775      * where \f$\mathrm{lc}\f$ denotes the leading coefficient (that is, the coefficient of the term with the
1776      * highest univariate degree), and \f$a\f$ and \f$b\f$ are the degrees of \p n and \p d respectively.
1777      *
1778      * This method works only on univariate polyomials in the same variable and it requires the univariate degree of \p
1779      * n to be not less than
1780      * the univariate degree of \p d. If \p n is zero, an empty polynomial will be returned.
1781      *
1782      * @param n the numerator.
1783      * @param d the denominator.
1784      *
1785      * @return the pseudo-remainder of <tt>n / d</tt>.
1786      *
1787      * @throws std::invalid_argument if the polynomials are not univariate in the same variable, or if the univariate
1788      * degree of \p d is greater than the univariate degree of \p n.
1789      * @throws piranha::zero_division_error if \p d is zero.
1790      * @throws unspecified any exception thrown by:
1791      * - piranha::polynomial::udivrem(),
1792      * - the public interface of piranha::series(),
1793      * - copy construction of polynomials,
1794      * - exponentiation and multiplication of coefficients,
1795      * - the conversion of piranha::integer to \p unsigned.
1796      */
1797     template <typename T = polynomial, uprem_enabler<T> = 0>
uprem(const polynomial & n,const polynomial & d)1798     static polynomial uprem(const polynomial &n, const polynomial &d)
1799     {
1800         // Only univariate polynomials are allowed.
1801         if (unlikely(n.get_symbol_set().size() != 1u || n.get_symbol_set() != d.get_symbol_set())) {
1802             piranha_throw(std::invalid_argument, "only univariate polynomials in the same variable "
1803                                                  "are allowed in the polynomial uprem routine");
1804         }
1805         const auto &args = n.get_symbol_set();
1806         // NOTE: this check is repeated in divrem, we do it here as we want to get the leading term in d.
1807         if (unlikely(d.size() == 0u)) {
1808             piranha_throw(zero_division_error, "univariate polynomial division by zero");
1809         }
1810         // Same as above, we need the leading term.
1811         if (unlikely(n.size() == 0u)) {
1812             polynomial retval;
1813             retval.set_symbol_set(args);
1814             return retval;
1815         }
1816         auto ld = detail::poly_lterm(d);
1817         integer dn(detail::poly_lterm(n)->m_key.degree(args)), dd(ld->m_key.degree(args));
1818         if (dd > dn) {
1819             piranha_throw(std::invalid_argument, "the degree of the denominator is greater than "
1820                                                  "the degree of the numerator in the polynomial uprem routine");
1821         }
1822         // NOTE: negative degrees will be caught by udivrem.
1823         auto n_copy(n);
1824         detail::poly_cf_mult(math::pow(ld->m_cf, static_cast<unsigned>(dn - dd + 1)), n_copy);
1825         // NOTE: here we can force exact division in udivrem, when we implement it.
1826         return udivrem(n_copy, d).second;
1827     }
1828     /// Exact polynomial division.
1829     /**
1830      * \note
1831      * This operator is enabled only if the decay types of \p T and \p U are the same type \p Td
1832      * and polynomial::udivrem() is enabled for \p Td.
1833      *
1834      * This operator will compute the exact result of <tt>n / d</tt>. If \p d does not divide \p n exactly, an error
1835      * will be produced.
1836      *
1837      * @param n the numerator.
1838      * @param d the denominator.
1839      *
1840      * @return the quotient <tt>n / d</tt>.
1841      *
1842      * @throws piranha::zero_division_error if \p d is zero.
1843      * @throws piranha::math::inexact_division if the division is not exact.
1844      * @throws std::invalid_argument if a negative exponent is encountered.
1845      * @throws unspecified any exception thrown by:
1846      * - piranha::polynomial::udivrem(),
1847      * - the public interface of piranha::hash_set, piranha::series, piranha::symbol_set,
1848      * - the monomial's <tt>extract_exponents()</tt> methods,
1849      * - construction of coefficients, monomials, terms and polynomials,
1850      * - arithmetic operations on the coefficients and on polynomials,
1851      * - memory errors in standard containers,
1852      * - conversion of piranha::integer to C++ integral types,
1853      * - piranha::safe_cast().
1854      */
1855     template <typename T, typename U, poly_div_enabler<T, U> = 0>
operator /(T && n,U && d)1856     friend polynomial operator/(T &&n, U &&d)
1857     {
1858         // Then we need to deal with different symbol sets.
1859         polynomial merged_n, merged_d;
1860         polynomial const *real_n(&n), *real_d(&d);
1861         if (n.get_symbol_set() != d.get_symbol_set()) {
1862             auto merge = n.get_symbol_set().merge(d.get_symbol_set());
1863             if (merge != n.get_symbol_set()) {
1864                 merged_n = n.extend_symbol_set(merge);
1865                 real_n = &merged_n;
1866             }
1867             if (merge != d.get_symbol_set()) {
1868                 merged_d = d.extend_symbol_set(merge);
1869                 real_d = &merged_d;
1870             }
1871         }
1872         return division_impl(*real_n, *real_d);
1873     }
1874     /// Exact in-place polynomial division.
1875     /**
1876      * \note
1877      * This operator is enabled only if operator/() is enabled for the input types and \p T
1878      * is not const.
1879      *
1880      * This operator is equivalent to:
1881      * @code
1882      * return n = n / d;
1883      * @endcode
1884      *
1885      * @param n the numerator.
1886      * @param d the denominator.
1887      *
1888      * @return a reference to \p n.
1889      *
1890      * @throws unspecified any exception thrown by the binary operator.
1891      */
1892     template <typename T, typename U, poly_in_place_div_enabler<T, U> = 0>
operator /=(T & n,U && d)1893     friend polynomial &operator/=(T &n, U &&d)
1894     {
1895         return n = n / d;
1896     }
1897     /// Get the default algorithm to be used for GCD computations.
1898     /**
1899      * The default value initialised on startup is polynomial_gcd_algorithm::automatic.
1900      * This method is thread-safe.
1901      *
1902      * @return the current default algorithm to be used for GCD computations.
1903      */
get_default_gcd_algorithm()1904     static polynomial_gcd_algorithm get_default_gcd_algorithm()
1905     {
1906         return s_def_gcd_algo.load();
1907     }
1908     /// Set the default algorithm to be used for GCD computations.
1909     /**
1910      * This method is thread-safe.
1911      *
1912      * @param algo the desired default algorithm to be used for GCD computations.
1913      */
set_default_gcd_algorithm(polynomial_gcd_algorithm algo)1914     static void set_default_gcd_algorithm(polynomial_gcd_algorithm algo)
1915     {
1916         s_def_gcd_algo.store(algo);
1917     }
1918     /// Reset the default algorithm to be used for GCD computations.
1919     /**
1920      * This method will set the default algorithm to be used for GCD computations to
1921      * polynomial_gcd_algorithm::automatic.
1922      * This method is thread-safe.
1923      */
reset_default_gcd_algorithm()1924     static void reset_default_gcd_algorithm()
1925     {
1926         s_def_gcd_algo.store(polynomial_gcd_algorithm::automatic);
1927     }
1928     /// Polynomial GCD.
1929     /**
1930      * \note
1931      * This static method is enabled only if the following conditions apply:
1932      * - the coefficient type has math::gcd() and math::gcd3(), it is exponentiable to \p unsigned yielding
1933      *   the coefficient type, it is multipliable in-place and in binary form, yielding the coefficient type,
1934      *   it has math::divexact(), and it is copy-assignable,
1935      * - the polynomial type has piranha::polynomial::content() and piranha::polynomial::udivrem().
1936      *
1937      * This method will compute the GCD of polynomials \p a and \p b. The algorithm that will be employed
1938      * for the computation is selected by the \p algo flag. If \p algo is set to polynomial_gcd_algorithm::automatic
1939      * the heuristic GCD algorithm will be tried first, followed by the PRS SR algorithm in case of failures.
1940      * If \p algo is set to any other value, the selected algorithm will be used. The heuristic GCD algorithm can be
1941      * used only when the ceofficient type is an instance of piranha::mp_integer. The default value for \p algo
1942      * is the one returned by get_default_gcd_algorithm().
1943      *
1944      * The \p with_cofactors flag signals whether the cofactors should be returned together with the GCD or not.
1945      *
1946      * @param a first argument.
1947      * @param b second argument.
1948      * @param with_cofactors flag to signal that cofactors must be returned as well.
1949      * @param algo the GCD algorithm.
1950      *
1951      * @return a tuple containing the GCD \p g of \p a and \p b, and the cofactors (that is, <tt>a / g</tt> and <tt>b /
1952      * g</tt>),
1953      * if requested. If the cofactors are not requested, the content of the last two elements of the tuple is
1954      * unspecified. If both input
1955      * arguments are zero, the cofactors will be zero as well.
1956      *
1957      * @throws std::invalid_argument if a negative exponent is encountered in \p a or \p b.
1958      * @throws std::overflow_error in case of (unlikely) integral overflow errors.
1959      * @throws std::runtime_error if \p algo is polynomial_gcd_algorithm::heuristic and the execution
1960      * of the algorithm fails or it the coefficient type is not an instance of piranha::mp_integer.
1961      * @throws unspecified any exception thrown by:
1962      * - the public interface of piranha::series, piranha::symbol_set, piranha::hash_set,
1963      * - construction of keys,
1964      * - construction, arithmetics and other operations of coefficients and polynomials,
1965      * - math::gcd(), math::gcd3(), math::divexact(), math::pow(), math::subs(),
1966      * - piranha::polynomial::udivrem(), piranha::polynomial::content(),
1967      * - memory errors in standard containers,
1968      * - the conversion of piranha::integer to \p unsigned.
1969      */
1970     template <typename T = polynomial, gcd_enabler<T> = 0>
1971     static std::tuple<polynomial, polynomial, polynomial>
gcd(const polynomial & a,const polynomial & b,bool with_cofactors=false,polynomial_gcd_algorithm algo=get_default_gcd_algorithm ())1972     gcd(const polynomial &a, const polynomial &b, bool with_cofactors = false,
1973         polynomial_gcd_algorithm algo = get_default_gcd_algorithm())
1974     {
1975         // Deal with different symbol sets.
1976         polynomial merged_a, merged_b;
1977         polynomial const *real_a(&a), *real_b(&b);
1978         if (a.get_symbol_set() != b.get_symbol_set()) {
1979             auto merge = a.get_symbol_set().merge(b.get_symbol_set());
1980             if (merge != a.get_symbol_set()) {
1981                 merged_a = a.extend_symbol_set(merge);
1982                 real_a = &merged_a;
1983             }
1984             if (merge != b.get_symbol_set()) {
1985                 merged_b = b.extend_symbol_set(merge);
1986                 real_b = &merged_b;
1987             }
1988         }
1989         if (algo == polynomial_gcd_algorithm::automatic || algo == polynomial_gcd_algorithm::heuristic) {
1990             auto heu_res = try_gcdheu(*real_a, *real_b, algo);
1991             if (heu_res.first && algo == polynomial_gcd_algorithm::heuristic) {
1992                 // This can happen only if the heuristic fails due to the number of iterations. Calling the heuristic
1993                 // with non-integral coefficients already throws from the try_gcdheu implementation.
1994                 piranha_throw(std::runtime_error, "the heuristic polynomial GCD algorithm was explicitly selected, "
1995                                                   "but its execution failed due to too many iterations");
1996             }
1997             if (!heu_res.first) {
1998                 return std::move(heu_res.second);
1999             }
2000         }
2001         // Cache it.
2002         const auto &args = real_a->get_symbol_set();
2003         // Proceed with the univariate case.
2004         if (args.size() == 1u) {
2005             return wrap_gcd_cofactors(detail::gcd_prs_sr(*real_a, *real_b), *real_a, *real_b, with_cofactors);
2006         }
2007         // Zerovariate case. We need to handle this separately as the use of split() below
2008         // requires a nonzero number of arguments.
2009         if (args.size() == 0u) {
2010             if (real_a->size() == 0u && real_b->size() == 0u) {
2011                 return wrap_gcd_cofactors(polynomial{}, *real_a, *real_b, with_cofactors);
2012             }
2013             if (real_a->size() == 0u) {
2014                 return wrap_gcd_cofactors(*real_b, *real_a, *real_b, with_cofactors);
2015             }
2016             if (real_b->size() == 0u) {
2017                 return wrap_gcd_cofactors(*real_a, *real_a, *real_b, with_cofactors);
2018             }
2019             Cf g(0);
2020             math::gcd3(g, real_a->_container().begin()->m_cf, real_b->_container().begin()->m_cf);
2021             return wrap_gcd_cofactors(polynomial(std::move(g)), *real_a, *real_b, with_cofactors);
2022         }
2023         // The general multivariate case.
2024         return wrap_gcd_cofactors(detail::gcd_prs_sr(real_a->split(), real_b->split()).join(), *real_a, *real_b,
2025                                   with_cofactors);
2026     }
2027     /// Height.
2028     /**
2029      * \note
2030      * This method is enabled only if the following conditions hold:
2031      * - the coefficient type supports piranha::math::abs(), yielding the type <tt>height_type<T></tt>,
2032      * - <tt>height_type<T></tt> is constructible from \p int, less-than comparable, move-assignable and copy or move
2033      * constructible.
2034      *
2035      * The height of a polynomial is defined as the maximum of the absolute values of the coefficients. If the
2036      * polynomial
2037      * is empty, zero will be returned.
2038      *
2039      * @return the height of \p this.
2040      *
2041      * @throws unspecified any exception thrown by the construction, assignment, comparison, and absolute value
2042      * calculation
2043      * of <tt>height_type<T></tt>.
2044      */
2045     template <typename T = polynomial>
height() const2046     height_type<T> height() const
2047     {
2048         height_type<T> retval(0);
2049         for (const auto &t : this->_container()) {
2050             auto tmp(math::abs(t.m_cf));
2051             if (!(tmp < retval)) {
2052                 retval = std::move(tmp);
2053             }
2054         }
2055         return retval;
2056     }
2057     /// Untruncated multiplication.
2058     /**
2059      * \note
2060      * This function template is enabled only if the calling piranha::polynomial satisfies piranha::is_multipliable,
2061      * returning the calling piranha::polynomial as return type.
2062      *
2063      * This function will return the product of \p p1 and \p p2, computed without truncation (regardless
2064      * of the current automatic truncation settings). Note that this function is
2065      * available only if the operands are of the same type and no type promotions affect the coefficient types
2066      * during multiplication.
2067      *
2068      * @param p1 the first operand.
2069      * @param p2 the second operand.
2070      *
2071      * @return the product of \p p1 and \p p2.
2072      *
2073      * @throws unspecified any exception thrown by:
2074      * - the public interface of the specialisation of piranha::series_multiplier for piranha::polynomial,
2075      * - the public interface of piranha::symbol_set,
2076      * - the public interface of piranha::series.
2077      */
2078     template <typename T = polynomial, um_enabler<T> = 0>
untruncated_multiplication(const polynomial & p1,const polynomial & p2)2079     static polynomial untruncated_multiplication(const polynomial &p1, const polynomial &p2)
2080     {
2081         auto runner = [](const polynomial &a, const polynomial &b) {
2082             return series_multiplier<polynomial>(a, b)._untruncated_multiplication();
2083         };
2084         return um_tm_implementation(p1, p2, runner);
2085     }
2086     /// Truncated multiplication (total degree).
2087     /**
2088      * \note
2089      * This function template is enabled only if the following conditions hold:
2090      * - the calling piranha::polynomial satisfies piranha::is_multipliable, returning the calling piranha::polynomial
2091      *   as return type,
2092      * - the requirements for truncated multiplication outlined in piranha::polynomial are satisfied,
2093      * - \p U can be safely cast to the degree type of the calling piranha::polynomial.
2094      *
2095      * This function will return the product of \p p1 and \p p2, truncated to the maximum total degree
2096      * of \p max_degree (regardless of the current automatic truncation settings).
2097      * Note that this function is
2098      * available only if the operands are of the same type and no type promotions affect the coefficient types
2099      * during multiplication.
2100      *
2101      * @param p1 the first operand.
2102      * @param p2 the second operand.
2103      * @param max_degree the maximum total degree in the result.
2104      *
2105      * @return the truncated product of \p p1 and \p p2.
2106      *
2107      * @throws unspecified any exception thrown by:
2108      * - the public interface of the specialisation of piranha::series_multiplier for piranha::polynomial,
2109      * - the public interface of piranha::symbol_set,
2110      * - the public interface of piranha::series,
2111      * - piranha::safe_cast().
2112      */
2113     template <typename U, typename T = polynomial, tm_enabler<T, U> = 0>
truncated_multiplication(const polynomial & p1,const polynomial & p2,const U & max_degree)2114     static polynomial truncated_multiplication(const polynomial &p1, const polynomial &p2, const U &max_degree)
2115     {
2116         // NOTE: these 2 implementations may be rolled into one once we can safely capture variadic arguments
2117         // in lambdas.
2118         auto runner = [&max_degree](const polynomial &a, const polynomial &b) {
2119             return series_multiplier<polynomial>(a, b)._truncated_multiplication(safe_cast<degree_type<T>>(max_degree));
2120         };
2121         return um_tm_implementation(p1, p2, runner);
2122     }
2123     /// Truncated multiplication (partial degree).
2124     /**
2125      * \note
2126      * This function template is enabled only if the following conditions hold:
2127      * - the calling piranha::polynomial satisfies piranha::is_multipliable, returning the calling piranha::polynomial
2128      *   as return type,
2129      * - the requirements for truncated multiplication outlined in piranha::polynomial are satisfied,
2130      * - \p U can be safely cast to the degree type of the calling piranha::polynomial.
2131      *
2132      * This function will return the product of \p p1 and \p p2, truncated to the maximum partial degree
2133      * of \p max_degree (regardless of the current automatic truncation settings).
2134      * Note that this function is
2135      * available only if the operands are of the same type and no type promotions affect the coefficient types
2136      * during multiplication.
2137      *
2138      * @param p1 the first operand.
2139      * @param p2 the second operand.
2140      * @param max_degree the maximum total degree in the result.
2141      * @param names names of the variables that will be considered in the computation of the degree.
2142      *
2143      * @return the truncated product of \p p1 and \p p2.
2144      *
2145      * @throws unspecified any exception thrown by:
2146      * - the public interface of the specialisation of piranha::series_multiplier for piranha::polynomial,
2147      * - the public interface of piranha::symbol_set,
2148      * - the public interface of piranha::series,
2149      * - piranha::safe_cast().
2150      */
2151     template <typename U, typename T = polynomial, tm_enabler<T, U> = 0>
truncated_multiplication(const polynomial & p1,const polynomial & p2,const U & max_degree,const std::vector<std::string> & names)2152     static polynomial truncated_multiplication(const polynomial &p1, const polynomial &p2, const U &max_degree,
2153                                                const std::vector<std::string> &names)
2154     {
2155         // NOTE: total and partial degree must be the same.
2156         auto runner = [&max_degree, &names](const polynomial &a, const polynomial &b) -> polynomial {
2157             const symbol_set::positions pos(a.get_symbol_set(), symbol_set(names.begin(), names.end()));
2158             return series_multiplier<polynomial>(a, b)._truncated_multiplication(safe_cast<degree_type<T>>(max_degree),
2159                                                                                  names, pos);
2160         };
2161         return um_tm_implementation(p1, p2, runner);
2162     }
2163 
2164 private:
2165     // Static data for auto_truncate_degree.
2166     static std::mutex s_at_degree_mutex;
2167     static int s_at_degree_mode;
2168     static std::vector<std::string> s_at_degree_names;
2169     // Static data for default GCD algorithm selection.
2170     static std::atomic<polynomial_gcd_algorithm> s_def_gcd_algo;
2171 };
2172 
2173 // Static inits.
2174 template <typename Cf, typename Key>
2175 std::mutex polynomial<Cf, Key>::s_at_degree_mutex;
2176 
2177 template <typename Cf, typename Key>
2178 int polynomial<Cf, Key>::s_at_degree_mode = 0;
2179 
2180 template <typename Cf, typename Key>
2181 std::vector<std::string> polynomial<Cf, Key>::s_at_degree_names;
2182 
2183 template <typename Cf, typename Key>
2184 std::atomic<polynomial_gcd_algorithm> polynomial<Cf, Key>::s_def_gcd_algo(polynomial_gcd_algorithm::automatic);
2185 
2186 namespace detail
2187 {
2188 
2189 template <unsigned N, typename Cf, typename Key, typename = void>
2190 struct r_poly_impl {
2191     static_assert(N > 1u, "Invalid recursion index.");
2192     using type = polynomial<typename r_poly_impl<N - 1u, Cf, Key>::type, Key>;
2193 };
2194 
2195 template <unsigned N, typename Cf, typename Key>
2196 struct r_poly_impl<N, Cf, Key, typename std::enable_if<N == 1u>::type> {
2197     using type = polynomial<Cf, Key>;
2198 };
2199 }
2200 
2201 /// Recursive polynomial.
2202 /**
2203  * This is a convenience alias that can be used to define multivariate polynomials
2204  * as univariate polynomials with univariate polynomials as coefficients.
2205  *
2206  * For instance, the type
2207  * @code
2208  * r_polynomial<1,double,k_monomial>;
2209  * @endcode
2210  * is exactly equivalent to
2211  * @code
2212  * polynomial<double,k_monomial>;
2213  * @endcode
2214  * The type
2215  * @code
2216  * r_polynomial<2,double,k_monomial>;
2217  * @endcode
2218  * is exactly equivalent to
2219  * @code
2220  * polynomial<polynomial<double,k_monomial>,k_monomial>;
2221  * @endcode
2222  * And so on for increasing values of \p N. \p N must be nonzero, or a compile-time error will be
2223  * generated.
2224  */
2225 template <unsigned N, typename Cf, typename Key>
2226 using r_polynomial = typename detail::r_poly_impl<N, Cf, Key>::type;
2227 
2228 namespace detail
2229 {
2230 
2231 // Identification of key types for dispatching in the multiplier.
2232 template <typename T>
2233 struct is_kronecker_monomial {
2234     static const bool value = false;
2235 };
2236 
2237 template <typename T>
2238 struct is_kronecker_monomial<kronecker_monomial<T>> {
2239     static const bool value = true;
2240 };
2241 
2242 template <typename T>
2243 struct is_monomial {
2244     static const bool value = false;
2245 };
2246 
2247 template <typename T, typename S>
2248 struct is_monomial<monomial<T, S>> {
2249     static const bool value = true;
2250 };
2251 
2252 // Identify the presence of auto-truncation methods in the poly multiplier.
2253 template <typename S, typename T>
2254 class has_set_auto_truncate_degree : sfinae_types
2255 {
2256     // NOTE: if we have total degree auto truncation, we also have partial degree truncation.
2257     template <typename S1, typename T1>
2258     static auto test(const S1 &, const T1 &t) -> decltype(S1::set_auto_truncate_degree(t), void(), yes());
2259     static no test(...);
2260 
2261 public:
2262     static const bool value = std::is_same<yes, decltype(test(std::declval<S>(), std::declval<T>()))>::value;
2263 };
2264 
2265 template <typename S, typename T>
2266 const bool has_set_auto_truncate_degree<S, T>::value;
2267 
2268 template <typename S>
2269 class has_get_auto_truncate_degree : sfinae_types
2270 {
2271     template <typename S1>
2272     static auto test(const S1 &) -> decltype(S1::get_auto_truncate_degree(), void(), yes());
2273     static no test(...);
2274 
2275 public:
2276     static const bool value = std::is_same<yes, decltype(test(std::declval<S>()))>::value;
2277 };
2278 
2279 template <typename S>
2280 const bool has_get_auto_truncate_degree<S>::value;
2281 
2282 // Global enabler for the polynomial multiplier.
2283 template <typename Series>
2284 using poly_multiplier_enabler = typename std::enable_if<std::is_base_of<detail::polynomial_tag, Series>::value>::type;
2285 
2286 // Enabler for divexact.
2287 template <typename T>
2288 using poly_divexact_enabler = typename std::enable_if<true_tt<ptd::enabler<T, T>>::value>::type;
2289 
2290 // Enabler for exact ring operations.
2291 template <typename T>
2292 using poly_ero_enabler =
2293     typename std::enable_if<std::is_base_of<detail::polynomial_tag, typename std::decay<T>::type>::value
2294                             && has_exact_ring_operations<typename std::decay<T>::type::term_type::cf_type>::value
2295                             && (std::is_integral<typename std::decay<T>::type::term_type::key_type::value_type>::value
2296                                 || has_exact_ring_operations<
2297                                        typename std::decay<T>::type::term_type::key_type::value_type>::value)>::type;
2298 
2299 // Enabler for GCD.
2300 template <typename T>
2301 using poly_gcd_enabler = typename std::
2302     enable_if<std::is_base_of<detail::polynomial_tag, T>::value
2303               && true_tt<decltype(T::gcd(std::declval<const T &>(), std::declval<const T &>()))>::value>::type;
2304 }
2305 
2306 namespace math
2307 {
2308 
2309 /// Implementation of piranha::math::divexact() for piranha::polynomial.
2310 /**
2311  * This specialisation is enabled if \p T is an instance of piranha::polynomial which supports
2312  * exact polynomial division.
2313  */
2314 template <typename T>
2315 struct divexact_impl<T, detail::poly_divexact_enabler<T>> {
2316     /// Call operator.
2317     /**
2318      * This operator will use the division operator of the polynomial type.
2319      *
2320      * @param out return value.
2321      * @param n the numerator.
2322      * @param d the denominator.
2323      *
2324      * @return a reference to \p out.
2325      *
2326      * @throws unspecified any exception thrown by the polynomial division operator.
2327      */
operator ()piranha::math::divexact_impl2328     T &operator()(T &out, const T &n, const T &d) const
2329     {
2330         return out = n / d;
2331     }
2332 };
2333 
2334 /// Implementation of piranha::math::gcd() for piranha::polynomial.
2335 /**
2336  * This specialisation is enabled if \p T is an instance of piranha::polynomial that supports
2337  * piranha::polynomial::gcd(). The piranha::polynomial_gcd_algorithm::automatic algorithm
2338  * will be used for the computation.
2339  */
2340 template <typename T>
2341 struct gcd_impl<T, T, detail::poly_gcd_enabler<T>> {
2342     /// Call operator.
2343     /**
2344      * @param a first argument.
2345      * @param b second argument.
2346      *
2347      * @return the first element of the result of <tt>piranha::polynomial::gcd(a,b)</tt>.
2348      *
2349      * @throws unspecified any exception thrown by piranha::polynomial::gcd().
2350      */
operator ()piranha::math::gcd_impl2351     T operator()(const T &a, const T &b) const
2352     {
2353         return std::get<0u>(T::gcd(a, b));
2354     }
2355 };
2356 }
2357 
2358 /// Exact ring operations specialisation for piranha::polynomial.
2359 /**
2360  * This specialisation is enabled if the decay type of \p T is an instance of piranha::polynomial
2361  * whose coefficient type satisfies piranha::has_exact_ring_operations and whose exponent type is either
2362  * a C++ integral type or another type which satisfies piranha::has_exact_ring_operations.
2363  */
2364 template <typename T>
2365 struct has_exact_ring_operations<T, detail::poly_ero_enabler<T>> {
2366     /// Value of the type trait.
2367     static const bool value = true;
2368 };
2369 
2370 template <typename T>
2371 const bool has_exact_ring_operations<T, detail::poly_ero_enabler<T>>::value;
2372 
2373 /// Specialisation of piranha::series_multiplier for piranha::polynomial.
2374 /**
2375  * This specialisation of piranha::series_multiplier is enabled when \p Series is an instance of
2376  * piranha::polynomial.
2377  *
2378  * ## Type requirements ##
2379  *
2380  * \p Series must be suitable for use in piranha::base_series_multiplier.
2381  *
2382  * ## Exception safety guarantee ##
2383  *
2384  * This class provides the same guarantee as piranha::base_series_multiplier.
2385  *
2386  * ## Move semantics ##
2387  *
2388  * Move semantics is equivalent to piranha::base_series_multiplier's move semantics.
2389  */
2390 template <typename Series>
2391 class series_multiplier<Series, detail::poly_multiplier_enabler<Series>> : public base_series_multiplier<Series>
2392 {
2393     // Base multiplier type.
2394     using base = base_series_multiplier<Series>;
2395     // Cf type getter shortcut.
2396     template <typename T>
2397     using cf_t = typename T::term_type::cf_type;
2398     // Key type getter shortcut.
2399     template <typename T>
2400     using key_t = typename T::term_type::key_type;
2401     // Bounds checking.
2402     // Functor to return un updated copy of p if v is less than p.first or greater than p.second.
2403     struct update_minmax {
2404         template <typename T>
operator ()piranha::series_multiplier::update_minmax2405         std::pair<T, T> operator()(const std::pair<T, T> &p, const T &v) const
2406         {
2407             return std::make_pair(v < p.first ? v : p.first, v > p.second ? v : p.second);
2408         }
2409     };
2410     // No bounds checking if key is a monomial with non-integral exponents.
2411     template <typename T = Series,
2412               typename std::enable_if<detail::is_monomial<key_t<T>>::value
2413                                           && !std::is_integral<typename key_t<T>::value_type>::value,
2414                                       int>::type
2415               = 0>
check_bounds() const2416     void check_bounds() const
2417     {
2418     }
2419     // Monomial with integral exponents.
2420     template <typename T = Series,
2421               typename std::enable_if<detail::is_monomial<key_t<T>>::value
2422                                           && std::is_integral<typename key_t<T>::value_type>::value,
2423                                       int>::type
2424               = 0>
check_bounds() const2425     void check_bounds() const
2426     {
2427         using expo_type = typename key_t<T>::value_type;
2428         using term_type = typename Series::term_type;
2429         using mm_vec = std::vector<std::pair<expo_type, expo_type>>;
2430         using v_ptr = typename base::v_ptr;
2431         // NOTE: we know that the input series are not null.
2432         piranha_assert(this->m_v1.size() != 0u && this->m_v2.size() != 0u);
2433         // Sync mutex, actually used only in mt mode.
2434         std::mutex mut;
2435         // Checker for monomial sizes in debug mode.
2436         auto monomial_checker = [this](const term_type &t) { return t.m_key.size() == this->m_ss.size(); };
2437         (void)monomial_checker;
2438         // The function used to determine minmaxs for the two series. This is used both in
2439         // single-thread and multi-thread mode.
2440         auto thread_func = [&mut, this, &monomial_checker](unsigned t_idx, const v_ptr *vp, mm_vec *mmv) {
2441             piranha_assert(t_idx < this->m_n_threads);
2442             // Establish the block size.
2443             const auto block_size = vp->size() / this->m_n_threads;
2444             auto start = vp->data() + t_idx * block_size;
2445             const auto end
2446                 = vp->data() + ((t_idx == this->m_n_threads - 1u) ? vp->size() : ((t_idx + 1u) * block_size));
2447             // We need to make sure we have at least 1 element to process. This is guaranteed
2448             // in the single-threaded implementation but not in multithreading.
2449             if (start == end) {
2450                 piranha_assert(this->m_n_threads > 1u);
2451                 return;
2452             }
2453             piranha_assert(monomial_checker(**start));
2454             // Local vector that will hold the minmax values for this thread.
2455             mm_vec minmax_values;
2456             // Init the mimnax.
2457             // NOTE: we can use this as we are sure the series has at least one element (start != end).
2458             std::transform((*start)->m_key.begin(), (*start)->m_key.end(), std::back_inserter(minmax_values),
2459                            [](const expo_type &v) { return std::make_pair(v, v); });
2460             // Move to the next element and go with the loop.
2461             ++start;
2462             for (; start != end; ++start) {
2463                 piranha_assert(monomial_checker(**start));
2464                 // NOTE: std::transform is allowed to do transformations in-place - i.e., here the output range is
2465                 // the same as the first or second input range:
2466                 // http://stackoverflow.com/questions/19200528/is-it-safe-for-the-input-iterator-and-output-iterator-in-stdtransform-to-be-fr
2467                 // The important part is that the functor *itself* must not mutate the elements.
2468                 std::transform(minmax_values.begin(), minmax_values.end(), (*start)->m_key.begin(),
2469                                minmax_values.begin(), update_minmax{});
2470             }
2471             if (this->m_n_threads == 1u) {
2472                 // In single thread the output mmv should be written only once, after being def-inited.
2473                 piranha_assert(mmv->empty());
2474                 // Just move in the local minmax values in single-threaded mode.
2475                 *mmv = std::move(minmax_values);
2476             } else {
2477                 std::lock_guard<std::mutex> lock(mut);
2478                 if (mmv->empty()) {
2479                     // If mmv has not been inited yet, just move in the current values.
2480                     *mmv = std::move(minmax_values);
2481                 } else {
2482                     piranha_assert(minmax_values.size() == mmv->size());
2483                     // Otherwise, update the minmaxs.
2484                     auto updater
2485                         = [](const std::pair<expo_type, expo_type> &p1, const std::pair<expo_type, expo_type> &p2) {
2486                               return std::make_pair(p1.first < p2.first ? p1.first : p2.first,
2487                                                     p1.second > p2.second ? p1.second : p2.second);
2488                           };
2489                     std::transform(minmax_values.begin(), minmax_values.end(), mmv->begin(), mmv->begin(), updater);
2490                 }
2491             }
2492         };
2493         // minmax vectors for the two series.
2494         mm_vec minmax_values1, minmax_values2;
2495         check_bounds_impl(minmax_values1, minmax_values2, thread_func);
2496         // Compute the sum of the two minmaxs, using multiprecision to avoid overflow (this is a simple interval
2497         // addition).
2498         std::vector<std::pair<integer, integer>> minmax_values;
2499         std::transform(
2500             minmax_values1.begin(), minmax_values1.end(), minmax_values2.begin(), std::back_inserter(minmax_values),
2501             [](const std::pair<expo_type, expo_type> &p1, const std::pair<expo_type, expo_type> &p2) {
2502                 return std::make_pair(integer(p1.first) + integer(p2.first), integer(p1.second) + integer(p2.second));
2503             });
2504         piranha_assert(minmax_values.size() == minmax_values1.size());
2505         piranha_assert(minmax_values.size() == minmax_values2.size());
2506         // Now do the checking.
2507         for (decltype(minmax_values.size()) i = 0u; i < minmax_values.size(); ++i) {
2508             try {
2509                 (void)static_cast<expo_type>(minmax_values[i].first);
2510                 (void)static_cast<expo_type>(minmax_values[i].second);
2511             } catch (...) {
2512                 piranha_throw(std::overflow_error, "monomial components are out of bounds");
2513             }
2514         }
2515     }
2516     template <typename T = Series,
2517               typename std::enable_if<detail::is_kronecker_monomial<key_t<T>>::value, int>::type = 0>
check_bounds() const2518     void check_bounds() const
2519     {
2520         using value_type = typename key_t<Series>::value_type;
2521         using ka = kronecker_array<value_type>;
2522         using v_ptr = typename base::v_ptr;
2523         using mm_vec = std::vector<std::pair<value_type, value_type>>;
2524         piranha_assert(this->m_v1.size() != 0u && this->m_v2.size() != 0u);
2525         // NOTE: here we are sure about this since the symbol set in a series should never
2526         // overflow the size of the limits, as the check for compatibility in Kronecker monomial
2527         // would kick in.
2528         piranha_assert(this->m_ss.size() < ka::get_limits().size());
2529         std::mutex mut;
2530         auto thread_func = [&mut, this](unsigned t_idx, const v_ptr *vp, mm_vec *mmv) {
2531             piranha_assert(t_idx < this->m_n_threads);
2532             const auto block_size = vp->size() / this->m_n_threads;
2533             auto start = vp->data() + t_idx * block_size;
2534             const auto end
2535                 = vp->data() + ((t_idx == this->m_n_threads - 1u) ? vp->size() : ((t_idx + 1u) * block_size));
2536             if (start == end) {
2537                 piranha_assert(this->m_n_threads > 1u);
2538                 return;
2539             }
2540             mm_vec minmax_values;
2541             // Tmp vector for unpacking, inited with the first element in the range.
2542             // NOTE: we need to check that the exponents of the monomials in the result do not
2543             // go outside the bounds of the Kronecker codification. We need to unpack all monomials
2544             // in the operands and examine them, we cannot operate on the codes for this.
2545             auto tmp_vec = (*start)->m_key.unpack(this->m_ss);
2546             // Init the mimnax.
2547             std::transform(tmp_vec.begin(), tmp_vec.end(), std::back_inserter(minmax_values),
2548                            [](const value_type &v) { return std::make_pair(v, v); });
2549             ++start;
2550             for (; start != end; ++start) {
2551                 tmp_vec = (*start)->m_key.unpack(this->m_ss);
2552                 std::transform(minmax_values.begin(), minmax_values.end(), tmp_vec.begin(), minmax_values.begin(),
2553                                update_minmax{});
2554             }
2555             if (this->m_n_threads == 1u) {
2556                 piranha_assert(mmv->empty());
2557                 *mmv = std::move(minmax_values);
2558             } else {
2559                 std::lock_guard<std::mutex> lock(mut);
2560                 if (mmv->empty()) {
2561                     *mmv = std::move(minmax_values);
2562                 } else {
2563                     piranha_assert(minmax_values.size() == mmv->size());
2564                     auto updater
2565                         = [](const std::pair<value_type, value_type> &p1, const std::pair<value_type, value_type> &p2) {
2566                               return std::make_pair(p1.first < p2.first ? p1.first : p2.first,
2567                                                     p1.second > p2.second ? p1.second : p2.second);
2568                           };
2569                     std::transform(minmax_values.begin(), minmax_values.end(), mmv->begin(), mmv->begin(), updater);
2570                 }
2571             }
2572         };
2573         mm_vec minmax_values1, minmax_values2;
2574         check_bounds_impl(minmax_values1, minmax_values2, thread_func);
2575         std::vector<std::pair<integer, integer>> minmax_values;
2576         std::transform(
2577             minmax_values1.begin(), minmax_values1.end(), minmax_values2.begin(), std::back_inserter(minmax_values),
2578             [](const std::pair<value_type, value_type> &p1, const std::pair<value_type, value_type> &p2) {
2579                 return std::make_pair(integer(p1.first) + integer(p2.first), integer(p1.second) + integer(p2.second));
2580             });
2581         // Bounds of the Kronecker representation for each component.
2582         const auto &minmax_vec
2583             = std::get<0u>(ka::get_limits()[static_cast<decltype(ka::get_limits().size())>(this->m_ss.size())]);
2584         piranha_assert(minmax_values.size() == minmax_vec.size());
2585         piranha_assert(minmax_values.size() == minmax_values1.size());
2586         piranha_assert(minmax_values.size() == minmax_values2.size());
2587         for (decltype(minmax_values.size()) i = 0u; i < minmax_values.size(); ++i) {
2588             if (unlikely(minmax_values[i].first < -minmax_vec[i] || minmax_values[i].second > minmax_vec[i])) {
2589                 piranha_throw(std::overflow_error, "Kronecker monomial components are out of bounds");
2590             }
2591         }
2592     }
2593     // Implementation detail of the bound checking logic. This is common enough to be shared.
2594     template <typename MmVec, typename Func>
check_bounds_impl(MmVec & minmax_values1,MmVec & minmax_values2,Func & thread_func) const2595     void check_bounds_impl(MmVec &minmax_values1, MmVec &minmax_values2, Func &thread_func) const
2596     {
2597         if (this->m_n_threads == 1u) {
2598             thread_func(0u, &(this->m_v1), &minmax_values1);
2599             thread_func(0u, &(this->m_v2), &minmax_values2);
2600         } else {
2601             // Series 1.
2602             {
2603                 future_list<void> ff_list;
2604                 try {
2605                     for (unsigned i = 0u; i < this->m_n_threads; ++i) {
2606                         ff_list.push_back(thread_pool::enqueue(i, thread_func, i, &(this->m_v1), &minmax_values1));
2607                     }
2608                     // First let's wait for everything to finish.
2609                     ff_list.wait_all();
2610                     // Then, let's handle the exceptions.
2611                     ff_list.get_all();
2612                 } catch (...) {
2613                     ff_list.wait_all();
2614                     throw;
2615                 }
2616             }
2617             // Series 2.
2618             {
2619                 future_list<void> ff_list;
2620                 try {
2621                     for (unsigned i = 0u; i < this->m_n_threads; ++i) {
2622                         ff_list.push_back(thread_pool::enqueue(i, thread_func, i, &(this->m_v2), &minmax_values2));
2623                     }
2624                     // First let's wait for everything to finish.
2625                     ff_list.wait_all();
2626                     // Then, let's handle the exceptions.
2627                     ff_list.get_all();
2628                 } catch (...) {
2629                     ff_list.wait_all();
2630                     throw;
2631                 }
2632             }
2633         }
2634     }
2635     // Enabler for the call operator.
2636     template <typename T>
2637     using call_enabler =
2638         typename std::enable_if<key_is_multipliable<cf_t<T>, key_t<T>>::value && has_multiply_accumulate<cf_t<T>>::value
2639                                     && detail::true_tt<detail::cf_mult_enabler<cf_t<T>>>::value,
2640                                 int>::type;
2641     // Utility helpers for the subtraction of degree types in the truncation routines. The specialisation
2642     // for integral types will check the operation for overflow.
2643     template <typename T, typename std::enable_if<!std::is_integral<T>::value, int>::type = 0>
degree_sub(const T & a,const T & b)2644     static T degree_sub(const T &a, const T &b)
2645     {
2646         return a - b;
2647     }
2648     template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
degree_sub(const T & a,const T & b)2649     static T degree_sub(const T &a, const T &b)
2650     {
2651         T retval(a);
2652         detail::safe_integral_subber(retval, b);
2653         return retval;
2654     }
2655     // Dispatch of untruncated multiplication.
2656     template <typename T = Series,
2657               typename std::enable_if<detail::is_kronecker_monomial<typename T::term_type::key_type>::value, int>::type
2658               = 0>
um_impl() const2659     Series um_impl() const
2660     {
2661         return untruncated_kronecker_mult();
2662     }
2663     template <typename T = Series,
2664               typename std::enable_if<!detail::is_kronecker_monomial<typename T::term_type::key_type>::value, int>::type
2665               = 0>
um_impl() const2666     Series um_impl() const
2667     {
2668         return this->plain_multiplication();
2669     }
2670 
2671 public:
2672     /// Constructor.
2673     /**
2674      * The constructor will call the base constructor and run these additional checks:
2675      * - if the key is a piranha::kronecker_monomial, it will be checked that the result of the multiplication does
2676      *   not overflow the representation limits of piranha::kronecker_monomial;
2677      * - if the key is a piranha::monomial of a C++ integral type, it will be checked that the result of the
2678      *   multiplication does not overflow the limits of the integral type.
2679      *
2680      * If any check fails, a runtime error will be produced.
2681      *
2682      * @param s1 first series operand.
2683      * @param s2 second series operand.
2684      *
2685      * @throws std::overflow_error if a bounds check, as described above, fails.
2686      * @throws unspecified any exception thrown by:
2687      * - the base constructor,
2688      * - standard threading primitives,
2689      * - memory errors in standard containers,
2690      * - thread_pool::enqueue(),
2691      * - future_list::push_back().
2692      */
series_multiplier(const Series & s1,const Series & s2)2693     explicit series_multiplier(const Series &s1, const Series &s2) : base(s1, s2)
2694     {
2695         // Nothing to do if the series are null or the merged symbol set is empty.
2696         if (unlikely(this->m_v1.empty() || this->m_v2.empty() || this->m_ss.size() == 0u)) {
2697             return;
2698         }
2699         check_bounds();
2700     }
2701     /// Perform multiplication.
2702     /**
2703      * \note
2704      * This template operator is enabled only if:
2705      * - the coefficient and key type of \p Series satisfy piranha::key_is_multipliable,
2706      * - the coefficient type of \p Series supports piranha::math::mul3() and piranha::math::multiply_accumulate().
2707      *
2708      * This method will perform the multiplication of the series operands passed to the constructor. Depending on
2709      * the key type of \p Series, the implementation will use either base_series_multiplier::plain_multiplication()
2710      * with base_series_multiplier::plain_multiplier or a different algorithm.
2711      *
2712      * If a polynomial truncation threshold is defined and the degree type of the polynomial is a C++ integral type,
2713      * the integral arithmetic operations involved in the truncation logic will be checked for overflow.
2714      *
2715      * @return the result of the multiplication of the input series operands.
2716      *
2717      * @throws std::overflow_error in case of overflow errors.
2718      * @throws unspecified any exception thrown by:
2719      * - piranha::base_series_multiplier::plain_multiplication(),
2720      * - piranha::base_series_multiplier::estimate_final_series_size(),
2721      * - piranha::base_series_multiplier::sanitise_series(),
2722      * - piranha::base_series_multiplier::finalise_series(),
2723      * - <tt>boost::numeric_cast()</tt>,
2724      * - the public interface of piranha::hash_set,
2725      * - piranha::safe_cast(),
2726      * - memory errors in standard containers,
2727      * - piranha::math::mul3(),
2728      * - piranha::math::multiply_accumulate(),
2729      * - thread_pool::enqueue(),
2730      * - future_list::push_back(),
2731      * - _truncated_multiplication(),
2732      * - polynomial::get_auto_truncate_degree().
2733      */
2734     template <typename T = Series, call_enabler<T> = 0>
operator ()() const2735     Series operator()() const
2736     {
2737         return execute();
2738     }
2739     /** @name Low-level interface
2740      * Low-level methods, on top of which the call operator is implemented.
2741      */
2742     //@{
2743     /// Untruncated multiplication.
2744     /**
2745      * \note
2746      * This method can be used only if operator()() can be called.
2747      *
2748      * This method will return the result of multiplying the two polynomials used as input arguments
2749      * in the class' constructor. The multiplication will be untruncated, regardless of the current global
2750      * truncation settings.
2751      *
2752      * @return the result of the untruncated multiplication of the two operands used in the construction of \p this.
2753      *
2754      * @throws unspecified any exception thrown by:
2755      * - piranha::base_series_multiplier::plain_multiplication(),
2756      * - piranha::base_series_multiplier::estimate_final_series_size(),
2757      * - piranha::base_series_multiplier::sanitise_series(),
2758      * - piranha::base_series_multiplier::finalise_series(),
2759      * - <tt>boost::numeric_cast()</tt>,
2760      * - the public interface of piranha::hash_set,
2761      * - piranha::safe_cast(),
2762      * - memory errors in standard containers,
2763      * - piranha::math::mul3(),
2764      * - piranha::math::multiply_accumulate(),
2765      * - thread_pool::enqueue(),
2766      * - future_list::push_back().
2767      */
_untruncated_multiplication() const2768     Series _untruncated_multiplication() const
2769     {
2770         return um_impl();
2771     }
2772     /// Truncated multiplication.
2773     /**
2774      * \note
2775      * This method can be used only if the following conditions apply:
2776      * - the conditions for truncated multiplication outlined in piranha::polynomial are satisfied,
2777      * - the type \p T is the same as the degree type of the polynomial,
2778      * - the number and types of \p Args is as specified below,
2779      * - piranha::base_series_multiplier::plain_multiplication() and _get_skip_limits() can be called.
2780      *
2781      * This method will perform the truncated multiplication of the series operands passed to the constructor.
2782      * The truncation degree is set to \p max_degree, and it is either:
2783      * - the total maximum degree, if the number of \p Args is zero, or
2784      * - the partial degree, if the number of \p Args is two.
2785      *
2786      * In the latter case, the two arguments must be:
2787      * - an \p std::vector of \p std::string representing the names of the variables which will be taken
2788      *   into account when computing the partial degree,
2789      * - a piranha::symbol_set::positions referring to the positions of the variables of the first argument
2790      *   in the merged symbol set of the two operands.
2791      *
2792      * @param max_degree the maximum degree of the result of the multiplication.
2793      * @param args either an empty argument, or a pair of arguments as described above.
2794      *
2795      * @return the result of the truncated multiplication of the operands used for construction.
2796      *
2797      * @throws unspecified any exception thrown by:
2798      * - memory errors in standard containers,
2799      * - piranha::safe_cast(),
2800      * - arithmetic and other operations on the degree type,
2801      * - base_series_multiplier::plain_multiplication(),
2802      * - _get_skip_limits().
2803      */
2804     template <typename T, typename... Args>
_truncated_multiplication(const T & max_degree,const Args &...args) const2805     Series _truncated_multiplication(const T &max_degree, const Args &... args) const
2806     {
2807         // NOTE: a possible optimisation here is the following: if the sum degrees of the arguments is less than
2808         // or equal to the max truncation degree, just do the normal multiplication - which can also then take
2809         // advantage of faster Kronecker multiplication, if the series are suitable.
2810         using term_type = typename Series::term_type;
2811         // NOTE: degree type is the same in total and partial.
2812         using degree_type = decltype(ps_get_degree(term_type{}, this->m_ss));
2813         using size_type = typename base::size_type;
2814         namespace sph = std::placeholders;
2815         static_assert(std::is_same<T, degree_type>::value, "Invalid degree type");
2816         static_assert(detail::has_get_auto_truncate_degree<Series>::value, "Invalid series type");
2817         // First let's create two vectors with the degrees of the terms in the two series.
2818         using d_size_type = typename std::vector<degree_type>::size_type;
2819         std::vector<degree_type> v_d1(safe_cast<d_size_type>(this->m_v1.size())),
2820             v_d2(safe_cast<d_size_type>(this->m_v2.size()));
2821         detail::parallel_vector_transform(
2822             this->m_n_threads, this->m_v1, v_d1,
2823             std::bind(term_degree_getter{}, sph::_1, std::cref(this->m_ss), std::cref(args)...));
2824         detail::parallel_vector_transform(
2825             this->m_n_threads, this->m_v2, v_d2,
2826             std::bind(term_degree_getter{}, sph::_1, std::cref(this->m_ss), std::cref(args)...));
2827         // Next we need to order the terms in the second series, and also the corresponding degree vector.
2828         // First we create a vector of indices and we fill it.
2829         std::vector<size_type> idx_vector(safe_cast<typename std::vector<size_type>::size_type>(this->m_v2.size()));
2830         std::iota(idx_vector.begin(), idx_vector.end(), size_type(0u));
2831         // Second, we sort the vector of indices according to the degrees in the second series.
2832         std::stable_sort(idx_vector.begin(), idx_vector.end(), [&v_d2](const size_type &i1, const size_type &i2) {
2833             return v_d2[static_cast<d_size_type>(i1)] < v_d2[static_cast<d_size_type>(i2)];
2834         });
2835         // Finally, we apply the permutation to v_d2 and m_v2.
2836         decltype(this->m_v2) v2_copy(this->m_v2.size());
2837         decltype(v_d2) v_d2_copy(v_d2.size());
2838         std::transform(idx_vector.begin(), idx_vector.end(), v2_copy.begin(),
2839                        [this](const size_type &i) { return this->m_v2[i]; });
2840         std::transform(idx_vector.begin(), idx_vector.end(), v_d2_copy.begin(),
2841                        [&v_d2](const size_type &i) { return v_d2[static_cast<d_size_type>(i)]; });
2842         this->m_v2 = std::move(v2_copy);
2843         v_d2 = std::move(v_d2_copy);
2844         // Now get the skip limits and we build the limits functor.
2845         const auto sl = _get_skip_limits(v_d1, v_d2, max_degree);
2846         auto lf = [&sl](const size_type &idx1) {
2847             return sl[static_cast<typename std::vector<size_type>::size_type>(idx1)];
2848         };
2849         return this->plain_multiplication(lf);
2850     }
2851     /// Establish skip limits for truncated multiplication.
2852     /**
2853      * \note
2854      * This method can be called only if \p Series supports truncated multiplication (as explained in
2855      * piranha::polynomial) and
2856      * \p T is the same type as the degree type.
2857      *
2858      * This method assumes that \p v_d1 and \p v_d2 are vectors containing the degrees of each term in the first and
2859      * second series respectively, and that \p v_d2 is sorted in ascending order.
2860      * It will return a vector \p v of indices in the second series such that, given an index \p i in the first
2861      * series, the term of index <tt>v[i]</tt> in the second series is the first term such that the term-by-term
2862      * multiplication with the <tt>i</tt>-th term in the first series produces a term of degree greater than
2863      * \p max_degree. That is, terms of index equal to or greater than <tt>v[i]</tt> in the second series
2864      * will produce terms with degree greater than \p max_degree when multiplied by the <tt>i</tt>-th term in the first
2865      * series.
2866      *
2867      * @param v_d1 a vector containing the degrees of the terms in the first series.
2868      * @param v_d2 a sorted vector containing the degrees of the terms in the second series.
2869      * @param max_degree the truncation degree.
2870      *
2871      * @return the vector of skip limits, as explained above.
2872      *
2873      * @throws unspecified any exception thrown by:
2874      * - memory errors in standard containers,
2875      * - piranha::safe_cast(),
2876      * - operations on the degree type.
2877      */
2878     template <typename T>
_get_skip_limits(const std::vector<T> & v_d1,const std::vector<T> & v_d2,const T & max_degree) const2879     std::vector<typename base::size_type> _get_skip_limits(const std::vector<T> &v_d1, const std::vector<T> &v_d2,
2880                                                            const T &max_degree) const
2881     {
2882         // NOTE: this can be parallelised, but we need to check the heuristic
2883         // for selecting the number of threads as it is pretty fast wrt the multiplication.
2884         // Check that we are allowed to call this method.
2885         PIRANHA_TT_CHECK(detail::has_get_auto_truncate_degree, Series);
2886         PIRANHA_TT_CHECK(std::is_same, T, decltype(math::degree(Series{})));
2887         using size_type = typename base::size_type;
2888         using d_size_type = typename std::vector<T>::size_type;
2889         piranha_assert(std::is_sorted(v_d2.begin(), v_d2.end()));
2890         // A vector of indices into the second series.
2891         std::vector<size_type> idx_vector(safe_cast<typename std::vector<size_type>::size_type>(this->m_v2.size()));
2892         std::iota(idx_vector.begin(), idx_vector.end(), size_type(0u));
2893         // The return value.
2894         std::vector<size_type> retval;
2895         for (const auto &d1 : v_d1) {
2896             // Here we will find the index of the first term t2 in the second series such that
2897             // the degree d2 of t2 is > max_degree - d1, that is, d1 + d2 > max_degree.
2898             // NOTE: we need to use upper_bound, instead of lower_bound, because we need to find the first
2899             // element which is *strictly* greater than the max degree, as upper bound of a half closed
2900             // interval.
2901             // NOTE: the functor of lower_bound works inversely wrt upper_bound. See the notes on the type
2902             // requirements for the functor here:
2903             // http://en.cppreference.com/w/cpp/algorithm/upper_bound
2904             const T comp = degree_sub(max_degree, d1);
2905             const auto it = std::upper_bound(
2906                 idx_vector.begin(), idx_vector.end(), comp,
2907                 [&v_d2](const T &c, const size_type &idx) { return c < v_d2[static_cast<d_size_type>(idx)]; });
2908             retval.push_back(it == idx_vector.end() ? static_cast<size_type>(idx_vector.size()) : *it);
2909         }
2910         // Check the consistency of the result in debug mode.
2911         auto retval_checker = [&retval, &v_d1, &v_d2, &max_degree, this]() -> bool {
2912             for (decltype(retval.size()) i = 0u; i < retval.size(); ++i) {
2913                 // NOTE: this just means that all terms in s2 are within the limit.
2914                 if (retval[i] == v_d2.size()) {
2915                     continue;
2916                 }
2917                 if (retval[i] > v_d2.size()) {
2918                     return false;
2919                 }
2920                 if (!(v_d2[static_cast<d_size_type>(retval[i])]
2921                       > this->degree_sub(max_degree, v_d1[static_cast<d_size_type>(i)]))) {
2922                     return false;
2923                 }
2924             }
2925             return true;
2926         };
2927         (void)retval_checker;
2928         piranha_assert(retval.size() == this->m_v1.size());
2929         piranha_assert(retval_checker());
2930         return retval;
2931     }
2932     //@}
2933 private:
2934     // NOTE: wrapper to multadd that treats specially rational coefficients. We need to decide in the future
2935     // if this stays here or if it is better to generalise it.
2936     template <typename T, typename std::enable_if<!detail::is_mp_rational<T>::value, int>::type = 0>
fma_wrap(T & a,const T & b,const T & c)2937     static void fma_wrap(T &a, const T &b, const T &c)
2938     {
2939         math::multiply_accumulate(a, b, c);
2940     }
2941     template <typename T, typename std::enable_if<detail::is_mp_rational<T>::value, int>::type = 0>
fma_wrap(T & a,const T & b,const T & c)2942     static void fma_wrap(T &a, const T &b, const T &c)
2943     {
2944         math::multiply_accumulate(a._num(), b.num(), c.num());
2945     }
2946     // Wrapper for the plain multiplication routine.
2947     // Case 1: no auto truncation available, just run the plain multiplication.
2948     template <typename T = Series,
2949               typename std::enable_if<!detail::has_get_auto_truncate_degree<T>::value, int>::type = 0>
plain_multiplication_wrapper() const2950     Series plain_multiplication_wrapper() const
2951     {
2952         return this->plain_multiplication();
2953     }
2954     // Case 2: auto-truncation available. Check if auto truncation is active.
2955     template <typename T = Series,
2956               typename std::enable_if<detail::has_get_auto_truncate_degree<T>::value, int>::type = 0>
plain_multiplication_wrapper() const2957     Series plain_multiplication_wrapper() const
2958     {
2959         const auto t = T::get_auto_truncate_degree();
2960         if (std::get<0u>(t) == 0) {
2961             // No truncation active.
2962             return this->plain_multiplication();
2963         }
2964         // Truncation is active.
2965         if (std::get<0u>(t) == 1) {
2966             // Total degree truncation.
2967             return _truncated_multiplication(std::get<1u>(t));
2968         }
2969         piranha_assert(std::get<0u>(t) == 2);
2970         // Partial degree truncation.
2971         const symbol_set::positions pos(this->m_ss, symbol_set(std::get<2u>(t).begin(), std::get<2u>(t).end()));
2972         return _truncated_multiplication(std::get<1u>(t), std::get<2u>(t), pos);
2973     }
2974     // NOTE: the existence of these functors is because GCC 4.8 has troubles capturing variadic arguments in lambdas
2975     // in _truncated_multiplication, and we need to use std::bind instead. Once we switch to 4.9, we can revert
2976     // to lambdas and drop the <functional> header.
2977     struct term_degree_sorter {
2978         using term_type = typename Series::term_type;
2979         template <typename... Args>
operator ()piranha::series_multiplier::term_degree_sorter2980         bool operator()(term_type const *p1, term_type const *p2, const symbol_set &ss, const Args &... args) const
2981         {
2982             return ps_get_degree(*p1, args..., ss) < ps_get_degree(*p2, args..., ss);
2983         }
2984     };
2985     struct term_degree_getter {
2986         using term_type = typename Series::term_type;
2987         template <typename... Args>
operator ()piranha::series_multiplier::term_degree_getter2988         auto operator()(term_type const *p, const symbol_set &ss, const Args &... args) const
2989             -> decltype(ps_get_degree(*p, args..., ss))
2990         {
2991             return ps_get_degree(*p, args..., ss);
2992         }
2993     };
2994     // execute() is the top level dispatch for the actual multiplication.
2995     // Case 1: not a Kronecker monomial, do the plain mult.
2996     template <typename T = Series,
2997               typename std::enable_if<!detail::is_kronecker_monomial<typename T::term_type::key_type>::value, int>::type
2998               = 0>
execute() const2999     Series execute() const
3000     {
3001         return plain_multiplication_wrapper();
3002     }
3003     // Checking for active truncation.
3004     template <typename T = Series,
3005               typename std::enable_if<detail::has_get_auto_truncate_degree<T>::value, int>::type = 0>
check_truncation() const3006     bool check_truncation() const
3007     {
3008         const auto t = T::get_auto_truncate_degree();
3009         return std::get<0u>(t) != 0;
3010     }
3011     template <typename T = Series,
3012               typename std::enable_if<!detail::has_get_auto_truncate_degree<T>::value, int>::type = 0>
check_truncation() const3013     bool check_truncation() const
3014     {
3015         return false;
3016     }
3017     // Case 2: Kronecker mult, do the special multiplication unless a truncation is active. In that case, run the
3018     // plain mult.
3019     template <typename T = Series,
3020               typename std::enable_if<detail::is_kronecker_monomial<typename T::term_type::key_type>::value, int>::type
3021               = 0>
execute() const3022     Series execute() const
3023     {
3024         if (check_truncation()) {
3025             return plain_multiplication_wrapper();
3026         }
3027         return untruncated_kronecker_mult();
3028     }
3029     template <typename T = Series,
3030               typename std::enable_if<detail::is_kronecker_monomial<typename T::term_type::key_type>::value, int>::type
3031               = 0>
untruncated_kronecker_mult() const3032     Series untruncated_kronecker_mult() const
3033     {
3034         // Cache the sizes.
3035         const auto size1 = this->m_v1.size(), size2 = this->m_v2.size();
3036         // Determine whether we want to estimate or not. We check the threshold, and
3037         // we force the estimation in multithreaded mode.
3038         bool estimate = true;
3039         const auto e_thr = tuning::get_estimate_threshold();
3040         if (integer(size1) * size2 < integer(e_thr) * e_thr && this->m_n_threads == 1u) {
3041             estimate = false;
3042         }
3043         // If estimation is not worth it, we go with the plain multiplication.
3044         // NOTE: this is probably not optimal, but we have to do like this as the sparse
3045         // Kronecker multiplication below requires estimation. Maybe in the future we can
3046         // have a version without estimation.
3047         if (!estimate) {
3048             return this->plain_multiplication();
3049         }
3050         // Setup the return value.
3051         Series retval;
3052         retval.set_symbol_set(this->m_ss);
3053         // Do not do anything if one of the two series is empty, just return an empty series.
3054         if (unlikely(!size1 || !size2)) {
3055             return retval;
3056         }
3057         // Rehash the retun value's container accordingly. Check the tuning flag to see if we want to use
3058         // multiple threads for initing the return value.
3059         // NOTE: it is important here that we use the same n_threads for multiplication and memset as
3060         // we tie together pinned threads with potentially different NUMA regions.
3061         const unsigned n_threads_rehash = tuning::get_parallel_memory_set() ? this->m_n_threads : 1u;
3062         // Use the plain functor in normal mode for the estimation.
3063         const auto est
3064             = this->template estimate_final_series_size<1u, typename base::template plain_multiplier<false>>();
3065         // NOTE: if something goes wrong here, no big deal as retval is still empty.
3066         retval._container().rehash(boost::numeric_cast<typename Series::size_type>(
3067                                        std::ceil(static_cast<double>(est) / retval._container().max_load_factor())),
3068                                    n_threads_rehash);
3069         piranha_assert(retval._container().bucket_count());
3070         sparse_kronecker_multiplication(retval);
3071         return retval;
3072     }
sparse_kronecker_multiplication(Series & retval) const3073     void sparse_kronecker_multiplication(Series &retval) const
3074     {
3075         using bucket_size_type = typename base::bucket_size_type;
3076         using size_type = typename base::size_type;
3077         using term_type = typename Series::term_type;
3078         // Type representing multiplication tasks:
3079         // - the current term index from s1,
3080         // - the first term index in s2,
3081         // - the last term index in s2.
3082         using task_type = std::tuple<size_type, size_type, size_type>;
3083         // Cache a few quantities.
3084         auto &v1 = this->m_v1;
3085         auto &v2 = this->m_v2;
3086         const auto size1 = v1.size();
3087         const auto size2 = v2.size();
3088         auto &container = retval._container();
3089         // A convenience functor to compute the destination bucket
3090         // of a term into retval.
3091         auto r_bucket = [&container](term_type const *p) { return container._bucket_from_hash(p->hash()); };
3092         // Sort input terms according to bucket positions in retval.
3093         auto term_cmp = [&r_bucket](term_type const *p1, term_type const *p2) { return r_bucket(p1) < r_bucket(p2); };
3094         std::stable_sort(v1.begin(), v1.end(), term_cmp);
3095         std::stable_sort(v2.begin(), v2.end(), term_cmp);
3096         // Task comparator. It will compare the bucket index of the terms resulting from
3097         // the multiplication of the term in the first series by the first term in the block
3098         // of the second series. This is essentially the first bucket index of retval in which the task
3099         // will write.
3100         // NOTE: this is guaranteed not to overflow as the max bucket size in the hash set is 2**(nbits-1),
3101         // and the max value of bucket_size_type is 2**nbits - 1.
3102         auto task_cmp = [&r_bucket, &v1, &v2](const task_type &t1, const task_type &t2) {
3103             return r_bucket(v1[std::get<0u>(t1)]) + r_bucket(v2[std::get<1u>(t1)])
3104                    < r_bucket(v1[std::get<0u>(t2)]) + r_bucket(v2[std::get<1u>(t2)]);
3105         };
3106         // Task block size.
3107         const size_type block_size = safe_cast<size_type>(tuning::get_multiplication_block_size());
3108         // Task splitter: split a task in block_size sized tasks and append them to out.
3109         auto task_split = [block_size](const task_type &t, std::vector<task_type> &out) {
3110             size_type start = std::get<1u>(t), end = std::get<2u>(t);
3111             while (static_cast<size_type>(end - start) > block_size) {
3112                 out.emplace_back(std::get<0u>(t), start, static_cast<size_type>(start + block_size));
3113                 start = static_cast<size_type>(start + block_size);
3114             }
3115             if (end != start) {
3116                 out.emplace_back(std::get<0u>(t), start, end);
3117             }
3118         };
3119         // End of the container, always the same value.
3120         const auto it_end = container.end();
3121         // Function to perform all the term-by-term multiplications in a task, using tmp_term
3122         // as a temporary value for the computation of the result.
3123         auto task_consume = [&v1, &v2, &container, it_end, this](const task_type &task, term_type &tmp_term) {
3124             // Get the term in the first series.
3125             term_type const *t1 = v1[std::get<0u>(task)];
3126             // Get pointers to the second series.
3127             term_type const **start2 = &(v2[std::get<1u>(task)]), **end2 = &(v2[std::get<2u>(task)]);
3128             // NOTE: these will have to be adapted for kd_monomial.
3129             using int_type = decltype(t1->m_key.get_int());
3130             // Get shortcuts to cf and key in t1.
3131             const auto &cf1 = t1->m_cf;
3132             const int_type key1 = t1->m_key.get_int();
3133             // Iterate over the task.
3134             for (; start2 != end2; ++start2) {
3135                 // Const ref to the current term in the second series.
3136                 const auto &cur = **start2;
3137                 // Add the keys.
3138                 // NOTE: this will have to be adapted for kd_monomial.
3139                 tmp_term.m_key.set_int(static_cast<int_type>(key1 + cur.m_key.get_int()));
3140                 // Try to locate the term into retval.
3141                 auto bucket_idx = container._bucket(tmp_term);
3142                 const auto it = container._find(tmp_term, bucket_idx);
3143                 if (it == it_end) {
3144                     // NOTE: for coefficient series, we might want to insert with move() below,
3145                     // as we are not going to re-use the allocated resources in tmp.m_cf.
3146                     // Take care of multiplying the coefficient.
3147                     detail::cf_mult_impl(tmp_term.m_cf, cf1, cur.m_cf);
3148                     container._unique_insert(tmp_term, bucket_idx);
3149                 } else {
3150                     // NOTE: here we need to decide if we want to give the same treatment to fmp as we did with
3151                     // cf_mult_impl.
3152                     // For the moment it is an implementation detail of this class.
3153                     this->fma_wrap(it->m_cf, cf1, cur.m_cf);
3154                 }
3155             }
3156         };
3157         if (this->m_n_threads == 1u) {
3158             try {
3159                 // Single threaded case.
3160                 // Create the vector of tasks.
3161                 std::vector<task_type> tasks;
3162                 for (decltype(v1.size()) i = 0u; i < size1; ++i) {
3163                     task_split(std::make_tuple(i, size_type(0u), size2), tasks);
3164                 }
3165                 // Sort the tasks.
3166                 std::stable_sort(tasks.begin(), tasks.end(), task_cmp);
3167                 // Iterate over the tasks and run the multiplication.
3168                 term_type tmp_term;
3169                 for (const auto &t : tasks) {
3170                     task_consume(t, tmp_term);
3171                 }
3172                 this->sanitise_series(retval, this->m_n_threads);
3173                 this->finalise_series(retval);
3174             } catch (...) {
3175                 retval._container().clear();
3176                 throw;
3177             }
3178             return;
3179         }
3180         // Number of buckets in retval.
3181         const bucket_size_type bucket_count = container.bucket_count();
3182         // Compute the number of zones in which the output container will be subdivided,
3183         // a multiple of the number of threads.
3184         // NOTE: zm is a tuning parameter.
3185         const unsigned zm = 10u;
3186         const bucket_size_type n_zones = static_cast<bucket_size_type>(integer(this->m_n_threads) * zm);
3187         // Number of buckets per zone (can be zero).
3188         const bucket_size_type bpz = static_cast<bucket_size_type>(bucket_count / n_zones);
3189         // For each zone, we need to define a vector of tasks that will write only into that zone.
3190         std::vector<std::vector<task_type>> task_table;
3191         task_table.resize(safe_cast<decltype(task_table.size())>(n_zones));
3192         // Lower bound implementation. Adapted from:
3193         // http://en.cppreference.com/w/cpp/algorithm/lower_bound
3194         // Given the [first,last[ index range in v2, find the first index idx in the v2 range such that the i-th
3195         // term in v1 multiplied by the idx-th term in v2 will be written into retval at a bucket index not less than
3196         // zb.
3197         auto l_bound = [&v1, &v2, &r_bucket, &task_split](size_type first, size_type last, bucket_size_type zb,
3198                                                           size_type i) -> size_type {
3199             piranha_assert(first <= last);
3200             bucket_size_type ib = r_bucket(v1[i]);
3201             // Avoid zb - ib below wrapping around.
3202             if (zb < ib) {
3203                 return 0u;
3204             }
3205             const auto cmp = static_cast<bucket_size_type>(zb - ib);
3206             size_type idx, step, count = static_cast<size_type>(last - first);
3207             while (count > 0u) {
3208                 idx = first;
3209                 step = static_cast<size_type>(count / 2u);
3210                 idx = static_cast<size_type>(idx + step);
3211                 if (r_bucket(v2[idx]) < cmp) {
3212                     idx = static_cast<size_type>(idx + 1u);
3213                     first = idx;
3214                     if (count <= step + 1u) {
3215                         break;
3216                     }
3217                     count = static_cast<size_type>(count - (step + 1u));
3218                 } else {
3219                     count = step;
3220                 }
3221             }
3222             return first;
3223         };
3224         // Fill the task table.
3225         auto table_filler = [&task_table, bpz, zm, this, bucket_count, size1, size2, &l_bound, &task_split,
3226                              &task_cmp](const unsigned &thread_idx) {
3227             for (unsigned n = 0u; n < zm; ++n) {
3228                 std::vector<task_type> cur_tasks;
3229                 // [a,b[ is the container zone.
3230                 bucket_size_type a = static_cast<bucket_size_type>(thread_idx * bpz * zm + n * bpz);
3231                 bucket_size_type b;
3232                 if (n == zm - 1u && thread_idx == this->m_n_threads - 1u) {
3233                     // Special casing if this is the last zone in the container.
3234                     b = bucket_count;
3235                 } else {
3236                     b = static_cast<bucket_size_type>(a + bpz);
3237                 }
3238                 // First batch of tasks.
3239                 for (size_type i = 0u; i < size1; ++i) {
3240                     auto t = std::make_tuple(i, l_bound(0u, size2, a, i), l_bound(0u, size2, b, i));
3241                     if (std::get<1u>(t) == 0u && std::get<2u>(t) == 0u) {
3242                         // This means that all the next tasks we will compute will be empty,
3243                         // no sense in calculating them.
3244                         break;
3245                     }
3246                     task_split(t, cur_tasks);
3247                 }
3248                 // Second batch of tasks.
3249                 // Note: we can always compute a,b + bucket_count because of the limits on the maximum value of
3250                 // bucket_count.
3251                 for (size_type i = 0u; i < size1; ++i) {
3252                     auto t = std::make_tuple(i, l_bound(0u, size2, static_cast<bucket_size_type>(a + bucket_count), i),
3253                                              l_bound(0u, size2, static_cast<bucket_size_type>(b + bucket_count), i));
3254                     if (std::get<1u>(t) == 0u && std::get<2u>(t) == 0u) {
3255                         break;
3256                     }
3257                     task_split(t, cur_tasks);
3258                 }
3259                 // Sort the task vector.
3260                 std::stable_sort(cur_tasks.begin(), cur_tasks.end(), task_cmp);
3261                 // Move the vector of tasks in the table.
3262                 task_table[static_cast<decltype(task_table.size())>(thread_idx * zm + n)] = std::move(cur_tasks);
3263             }
3264         };
3265         // Go with the threads to fill the task table.
3266         future_list<decltype(table_filler(0u))> ff_list;
3267         try {
3268             for (unsigned i = 0u; i < this->m_n_threads; ++i) {
3269                 ff_list.push_back(thread_pool::enqueue(i, table_filler, i));
3270             }
3271             // First let's wait for everything to finish.
3272             ff_list.wait_all();
3273             // Then, let's handle the exceptions.
3274             ff_list.get_all();
3275         } catch (...) {
3276             ff_list.wait_all();
3277             throw;
3278         }
3279         // Check the consistency of the table for debug purposes.
3280         auto table_checker = [&task_table, size1, size2, &r_bucket, bpz, bucket_count, &v1, &v2]() -> bool {
3281             // Total number of term-by-term multiplications. Needs to be equal
3282             // to size1 * size2 at the end.
3283             integer tot_n(0);
3284             // Tmp term for multiplications.
3285             term_type tmp_term;
3286             for (decltype(task_table.size()) i = 0u; i < task_table.size(); ++i) {
3287                 const auto &v = task_table[i];
3288                 // Bucket limits of each zone.
3289                 bucket_size_type a = static_cast<bucket_size_type>(bpz * i), b;
3290                 // Special casing for the last zone in the table.
3291                 if (i == task_table.size() - 1u) {
3292                     b = bucket_count;
3293                 } else {
3294                     b = static_cast<bucket_size_type>(a + bpz);
3295                 }
3296                 for (const auto &t : v) {
3297                     auto idx1 = std::get<0u>(t), start2 = std::get<1u>(t), end2 = std::get<2u>(t);
3298                     using int_type = decltype(v1[idx1]->m_key.get_int());
3299                     piranha_assert(start2 <= end2);
3300                     tot_n += end2 - start2;
3301                     for (; start2 != end2; ++start2) {
3302                         tmp_term.m_key.set_int(
3303                             static_cast<int_type>(v1[idx1]->m_key.get_int() + v2[start2]->m_key.get_int()));
3304                         auto b_idx = r_bucket(&tmp_term);
3305                         if (b_idx < a || b_idx >= b) {
3306                             return false;
3307                         }
3308                     }
3309                 }
3310             }
3311             return tot_n == integer(size1) * size2;
3312         };
3313         (void)table_checker;
3314         piranha_assert(table_checker());
3315         // Init the vector of atomic flags.
3316         detail::atomic_flag_array af(safe_cast<std::size_t>(task_table.size()));
3317         // Thread functor.
3318         auto thread_functor = [zm, &task_table, &af, &v1, &v2, &container, &task_consume](const unsigned &thread_idx) {
3319             using t_size_type = decltype(task_table.size());
3320             // Temporary term_type for caching.
3321             term_type tmp_term;
3322             // The starting index in the task table.
3323             auto t_idx = static_cast<t_size_type>(t_size_type(thread_idx) * zm);
3324             const auto start_t_idx = t_idx;
3325             while (true) {
3326                 // If this returns false, it means that the tasks still need to be consumed;
3327                 if (!af[static_cast<std::size_t>(t_idx)].test_and_set()) {
3328                     // Current vector of tasks.
3329                     const auto &cur_tasks = task_table[t_idx];
3330                     for (const auto &t : cur_tasks) {
3331                         task_consume(t, tmp_term);
3332                     }
3333                 }
3334                 // Update the index, wrapping around if necessary.
3335                 t_idx = static_cast<t_size_type>(t_idx + 1u);
3336                 if (t_idx == task_table.size()) {
3337                     t_idx = 0u;
3338                 }
3339                 // If we got back to the original index, get out.
3340                 if (t_idx == start_t_idx) {
3341                     break;
3342                 }
3343             }
3344         };
3345         // Go with the multiplication threads.
3346         future_list<decltype(thread_functor(0u))> ft_list;
3347         try {
3348             for (unsigned i = 0u; i < this->m_n_threads; ++i) {
3349                 ft_list.push_back(thread_pool::enqueue(i, thread_functor, i));
3350             }
3351             // First let's wait for everything to finish.
3352             ft_list.wait_all();
3353             // Then, let's handle the exceptions.
3354             ft_list.get_all();
3355             // Finally, fix and finalise the series.
3356             this->sanitise_series(retval, this->m_n_threads);
3357             this->finalise_series(retval);
3358         } catch (...) {
3359             ft_list.wait_all();
3360             // Clean up and re-throw.
3361             retval._container().clear();
3362             throw;
3363         }
3364     }
3365 };
3366 }
3367 
3368 #endif
3369