1 /* Copyright 2009-2016 Francesco Biscani (bluescarni@gmail.com) 2 3 This file is part of the Piranha library. 4 5 The Piranha library is free software; you can redistribute it and/or modify 6 it under the terms of either: 7 8 * the GNU Lesser General Public License as published by the Free 9 Software Foundation; either version 3 of the License, or (at your 10 option) any later version. 11 12 or 13 14 * the GNU General Public License as published by the Free Software 15 Foundation; either version 3 of the License, or (at your option) any 16 later version. 17 18 or both in parallel, as here. 19 20 The Piranha library is distributed in the hope that it will be useful, but 21 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 22 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 23 for more details. 24 25 You should have received copies of the GNU General Public License and the 26 GNU Lesser General Public License along with the Piranha library. If not, 27 see https://www.gnu.org/licenses/. */ 28 29 #ifndef PIRANHA_DETAIL_BASE_SERIES_MULTIPLIER_HPP 30 #define PIRANHA_DETAIL_BASE_SERIES_MULTIPLIER_HPP 31 32 #include <algorithm> 33 #include <array> 34 #include <boost/numeric/conversion/cast.hpp> 35 #include <cmath> 36 #include <cstddef> 37 #include <iterator> 38 #include <limits> 39 #include <mutex> 40 #include <random> 41 #include <stdexcept> 42 #include <type_traits> 43 #include <utility> 44 #include <vector> 45 46 #include "config.hpp" 47 #include "detail/atomic_flag_array.hpp" 48 #include "detail/atomic_lock_guard.hpp" 49 #include "exceptions.hpp" 50 #include "key_is_multipliable.hpp" 51 #include "math.hpp" 52 #include "mp_integer.hpp" 53 #include "mp_rational.hpp" 54 #include "safe_cast.hpp" 55 #include "series.hpp" 56 #include "settings.hpp" 57 #include "symbol_set.hpp" 58 #include "thread_pool.hpp" 59 #include "tuning.hpp" 60 #include "type_traits.hpp" 61 62 namespace piranha 63 { 64 65 namespace detail 66 { 67 68 template <typename Series, typename Derived, typename = void> 69 struct base_series_multiplier_impl { 70 using term_type = typename Series::term_type; 71 using container_type = typename std::decay<decltype(std::declval<Series>()._container())>::type; 72 using c_size_type = typename container_type::size_type; 73 using v_size_type = typename std::vector<term_type const *>::size_type; 74 template <typename Term, 75 typename std::enable_if<!is_less_than_comparable<typename Term::key_type>::value, int>::type = 0> fill_term_pointerspiranha::detail::base_series_multiplier_impl76 void fill_term_pointers(const container_type &c1, const container_type &c2, std::vector<Term const *> &v1, 77 std::vector<Term const *> &v2) 78 { 79 // If the key is not less-than comparable, we can only copy over the pointers as they are. 80 std::transform(c1.begin(), c1.end(), std::back_inserter(v1), [](const term_type &t) { return &t; }); 81 std::transform(c2.begin(), c2.end(), std::back_inserter(v2), [](const term_type &t) { return &t; }); 82 } 83 template <typename Term, 84 typename std::enable_if<is_less_than_comparable<typename Term::key_type>::value, int>::type = 0> fill_term_pointerspiranha::detail::base_series_multiplier_impl85 void fill_term_pointers(const container_type &c1, const container_type &c2, std::vector<Term const *> &v1, 86 std::vector<Term const *> &v2) 87 { 88 // Fetch the number of threads from the derived class. 89 const unsigned n_threads = static_cast<Derived *>(this)->m_n_threads; 90 piranha_assert(n_threads > 0u); 91 // Threading functor. 92 auto thread_func 93 = [n_threads](unsigned thread_idx, const container_type *c, std::vector<term_type const *> *v) { 94 piranha_assert(thread_idx < n_threads); 95 // Total bucket count. 96 const auto b_count = c->bucket_count(); 97 // Buckets per thread. 98 const auto bpt = b_count / n_threads; 99 // End index. 100 const auto end 101 = static_cast<c_size_type>((thread_idx == n_threads - 1u) ? b_count : (bpt * (thread_idx + 1u))); 102 // Sorter. 103 auto sorter = [](term_type const *p1, term_type const *p2) { return p1->m_key < p2->m_key; }; 104 v_size_type j = 0u; 105 for (auto start = static_cast<c_size_type>(bpt * thread_idx); start < end; ++start) { 106 const auto &b = c->_get_bucket_list(start); 107 v_size_type tmp = 0u; 108 for (const auto &t : b) { 109 v->push_back(&t); 110 ++tmp; 111 } 112 std::stable_sort(v->data() + j, v->data() + j + tmp, sorter); 113 j += tmp; 114 } 115 }; 116 if (n_threads == 1u) { 117 thread_func(0u, &c1, &v1); 118 thread_func(0u, &c2, &v2); 119 return; 120 } 121 auto thread_wrapper = [&thread_func, n_threads](const container_type *c, std::vector<term_type const *> *v) { 122 // In the multi-threaded case, each thread needs to work on a separate vector. 123 // We will merge the vectors later. 124 using vv_t = std::vector<std::vector<Term const *>>; 125 using vv_size_t = typename vv_t::size_type; 126 vv_t vv(safe_cast<vv_size_t>(n_threads)); 127 // Go with the threads. 128 future_list<void> ff_list; 129 try { 130 for (unsigned i = 0u; i < n_threads; ++i) { 131 ff_list.push_back(thread_pool::enqueue(i, thread_func, i, c, &(vv[static_cast<vv_size_t>(i)]))); 132 } 133 // First let's wait for everything to finish. 134 ff_list.wait_all(); 135 // Then, let's handle the exceptions. 136 ff_list.get_all(); 137 } catch (...) { 138 ff_list.wait_all(); 139 throw; 140 } 141 // Last, we need to merge everything into v. 142 for (const auto &vi : vv) { 143 v->insert(v->end(), vi.begin(), vi.end()); 144 } 145 }; 146 thread_wrapper(&c1, &v1); 147 thread_wrapper(&c2, &v2); 148 } 149 }; 150 151 template <typename Series, typename Derived> 152 struct base_series_multiplier_impl<Series, Derived, typename std::enable_if<is_mp_rational< 153 typename Series::term_type::cf_type>::value>::type> { 154 // Useful shortcuts. 155 using term_type = typename Series::term_type; 156 using rat_type = typename term_type::cf_type; 157 using int_type = typename std::decay<decltype(std::declval<rat_type>().num())>::type; 158 using container_type = typename std::decay<decltype(std::declval<Series>()._container())>::type; fill_term_pointerspiranha::detail::base_series_multiplier_impl159 void fill_term_pointers(const container_type &c1, const container_type &c2, std::vector<term_type const *> &v1, 160 std::vector<term_type const *> &v2) 161 { 162 // Compute the least common multiplier. 163 m_lcm = 1; 164 auto it_f = c1.end(); 165 int_type g; 166 for (auto it = c1.begin(); it != it_f; ++it) { 167 math::gcd3(g, m_lcm, it->m_cf.den()); 168 math::mul3(m_lcm, m_lcm, it->m_cf.den()); 169 int_type::_divexact(m_lcm, m_lcm, g); 170 } 171 it_f = c2.end(); 172 for (auto it = c2.begin(); it != it_f; ++it) { 173 math::gcd3(g, m_lcm, it->m_cf.den()); 174 math::mul3(m_lcm, m_lcm, it->m_cf.den()); 175 int_type::_divexact(m_lcm, m_lcm, g); 176 } 177 // All these computations involve only positive numbers, 178 // the GCD must always be positive. 179 piranha_assert(m_lcm.sign() == 1); 180 // Copy over the terms and renormalise to lcm. 181 it_f = c1.end(); 182 for (auto it = c1.begin(); it != it_f; ++it) { 183 // NOTE: these divisions are exact, we could take advantage of that. 184 m_terms1.push_back(term_type(rat_type(m_lcm / it->m_cf.den() * it->m_cf.num(), int_type(1)), it->m_key)); 185 } 186 it_f = c2.end(); 187 for (auto it = c2.begin(); it != it_f; ++it) { 188 m_terms2.push_back(term_type(rat_type(m_lcm / it->m_cf.den() * it->m_cf.num(), int_type(1)), it->m_key)); 189 } 190 // Copy over the pointers. 191 std::transform(m_terms1.begin(), m_terms1.end(), std::back_inserter(v1), [](const term_type &t) { return &t; }); 192 std::transform(m_terms2.begin(), m_terms2.end(), std::back_inserter(v2), [](const term_type &t) { return &t; }); 193 piranha_assert(v1.size() == c1.size()); 194 piranha_assert(v2.size() == c2.size()); 195 } 196 std::vector<term_type> m_terms1; 197 std::vector<term_type> m_terms2; 198 int_type m_lcm; 199 }; 200 } 201 202 /// Base series multiplier. 203 /** 204 * This is a class that provides functionality useful to define a piranha::series_multiplier specialisation. Note that 205 * this class 206 * alone does *not* fulfill the requirements of a piranha::series_multiplier - it merely provides a set of utilities 207 * that can be 208 * useful for the implementation of a piranha::series_multiplier. 209 * 210 * ## Type requirements ## 211 * 212 * \p Series must satisfy piranha::is_series. 213 * 214 * ## Exception safety guarantee ## 215 * 216 * This class provides the strong exception safety guarantee. 217 * 218 * ## Move semantics ## 219 * 220 * Instances of this class cannot be copied, moved or assigned. 221 */ 222 // Some performance ideas: 223 // - optimisation in case one series has 1 term with unitary key and both series same type: multiply directly 224 // coefficients; 225 // - optimisation for coefficient series that merges all args, similar to the rational optimisation; 226 // - optimisation for load balancing similar to the poly multiplier. 227 template <typename Series> 228 class base_series_multiplier : private detail::base_series_multiplier_impl<Series, base_series_multiplier<Series>> 229 { 230 PIRANHA_TT_CHECK(is_series, Series); 231 // Make friends with the base, so it can access protected/private members of this. 232 friend struct detail::base_series_multiplier_impl<Series, base_series_multiplier<Series>>; 233 // Alias for the series' container type. 234 using container_type = uncvref_t<decltype(std::declval<const Series &>()._container())>; 235 236 public: 237 /// Alias for a vector of const pointers to series terms. 238 using v_ptr = std::vector<typename Series::term_type const *>; 239 /// The size type of base_series_multiplier::v_ptr. 240 using size_type = typename v_ptr::size_type; 241 /// The size type of \p Series. 242 using bucket_size_type = typename Series::size_type; 243 244 private: 245 // The default limit functor: it will include all terms in the second series. 246 struct default_limit_functor { default_limit_functorpiranha::base_series_multiplier::default_limit_functor247 default_limit_functor(const base_series_multiplier &m) : m_size2(m.m_v2.size()) 248 { 249 } operator ()piranha::base_series_multiplier::default_limit_functor250 size_type operator()(const size_type &) const 251 { 252 return m_size2; 253 } 254 const size_type m_size2; 255 }; 256 // The purpose of this helper is to move in a coefficient series during insertion. For series, 257 // we know that moves leave the series in a valid state, and series multiplications do not benefit 258 // from an already-constructed destination - hence it is convenient to move them rather than copy. 259 template <typename Term, typename std::enable_if<!is_series<typename Term::cf_type>::value, int>::type = 0> term_insertion(Term & t)260 static Term &term_insertion(Term &t) 261 { 262 return t; 263 } 264 template <typename Term, typename std::enable_if<is_series<typename Term::cf_type>::value, int>::type = 0> term_insertion(Term & t)265 static Term term_insertion(Term &t) 266 { 267 return Term{std::move(t.m_cf), t.m_key}; 268 } 269 // Implementation of finalise(). 270 template <typename T, 271 typename std::enable_if<detail::is_mp_rational<typename T::term_type::cf_type>::value, int>::type = 0> finalise_impl(T & s) const272 void finalise_impl(T &s) const 273 { 274 // Nothing to do if the lcm is unitary. 275 if (math::is_unitary(this->m_lcm)) { 276 return; 277 } 278 // NOTE: this has to be the square of the lcm, as in addition to uniformising 279 // the denominators in each series we are also multiplying the two series. 280 const auto l2 = this->m_lcm * this->m_lcm; 281 auto &container = s._container(); 282 // Single thread implementation. 283 if (m_n_threads == 1u) { 284 for (const auto &t : container) { 285 t.m_cf._set_den(l2); 286 t.m_cf.canonicalise(); 287 } 288 return; 289 } 290 // Multi-thread implementation. 291 // Buckets per thread. 292 const bucket_size_type bpt = static_cast<bucket_size_type>(container.bucket_count() / m_n_threads); 293 auto thread_func = [l2, &container, this, bpt](unsigned t_idx) { 294 bucket_size_type start_idx = static_cast<bucket_size_type>(t_idx * bpt); 295 // Special handling for the last thread. 296 const bucket_size_type end_idx = t_idx == (this->m_n_threads - 1u) 297 ? container.bucket_count() 298 : static_cast<bucket_size_type>((t_idx + 1u) * bpt); 299 for (; start_idx != end_idx; ++start_idx) { 300 auto &list = container._get_bucket_list(start_idx); 301 for (const auto &t : list) { 302 t.m_cf._set_den(l2); 303 t.m_cf.canonicalise(); 304 } 305 } 306 }; 307 // Go with the threads. 308 future_list<decltype(thread_func(0u))> ff_list; 309 try { 310 for (unsigned i = 0u; i < m_n_threads; ++i) { 311 ff_list.push_back(thread_pool::enqueue(i, thread_func, i)); 312 } 313 // First let's wait for everything to finish. 314 ff_list.wait_all(); 315 // Then, let's handle the exceptions. 316 ff_list.get_all(); 317 } catch (...) { 318 ff_list.wait_all(); 319 throw; 320 } 321 } 322 template <typename T, 323 typename std::enable_if<!detail::is_mp_rational<typename T::term_type::cf_type>::value, int>::type = 0> finalise_impl(T &) const324 void finalise_impl(T &) const 325 { 326 } 327 328 public: 329 /// Constructor. 330 /** 331 * This constructor will store in the base_series_multiplier::m_v1 and base_series_multiplier::m_v2 protected 332 * members references to the terms of the input series \p s1 and \p s2. \p m_v1 will store references to the larger 333 * series, \p m_v2 will store references to the smaller series. The constructor will also store a copy of the symbol 334 * set of \p s1 and \p s2 in the protected member base_series_multiplier::m_ss. 335 * 336 * If the coefficient type of \p Series is an instance of piranha::mp_rational, then the pointers in \p m_v1 and \p 337 * m_v2 will refer not to the original terms in \p s1 and \p s2 but to *copies* of these terms, in which all 338 * coefficients have unitary denominator and the numerators have all been multiplied by the global least common 339 * multiplier. This transformation allows to reduce the multiplication of series with rational coefficients to the 340 * multiplication of series with integral coefficients. 341 * 342 * If an operand is empty and the series type does not satisfy piranha::zero_is_absorbing, then a hidden 343 * private series consisting of a single term with zero coefficient is created, and the pointers in 344 * base_series_multiplier::m_v1 and/or base_series_multiplier::m_v2 will refer to this hidden instance. This ensures 345 * that a multiplication by a null series results in multiplications by a zero coefficient, which may not 346 * necessarily lead to a zero result (e.g., with IEEE floats inf times zero gives NaN). 347 * 348 * @param s1 first series. 349 * @param s2 second series. 350 * 351 * @throws std::invalid_argument if the symbol sets of \p s1 and \p s2 differ. 352 * @throws unspecified any exception thrown by: 353 * - thread_pool::use_threads(), 354 * - memory allocation errors in standard containers, 355 * - the construction of the term, coefficient and key types of \p Series, 356 * - the public interface of piranha::hash_set. 357 */ base_series_multiplier(const Series & s1,const Series & s2)358 explicit base_series_multiplier(const Series &s1, const Series &s2) : m_ss(s1.get_symbol_set()) 359 { 360 if (unlikely(s1.get_symbol_set() != s2.get_symbol_set())) { 361 piranha_throw(std::invalid_argument, "incompatible arguments sets"); 362 } 363 // The largest series goes first. 364 const Series *p1 = &s1, *p2 = &s2; 365 if (s1.size() < s2.size()) { 366 std::swap(p1, p2); 367 } 368 // This is just an optimisation, no troubles if there is a truncation due to static_cast. 369 m_v1.reserve(static_cast<size_type>(p1->size())); 370 m_v2.reserve(static_cast<size_type>(p2->size())); 371 container_type const *ctr1 = &p1->_container(), *ctr2 = &p2->_container(); 372 // NOTE: if the zero element of Series is not absorbing, we need to create a temporary zero series in place 373 // of any factor that is zero, and then use it in the multiplication. This ensures a correct series 374 // multiplication result for coefficient types (such as IEEE floats) for which 0 times x is not necessarily 375 // always 0. The temporary zero series is stored in the m_zero_f member as a collection of 1 term with 376 // zero coefficient. 377 if (!zero_is_absorbing<Series>::value) { 378 using term_type = typename Series::term_type; 379 using cf_type = typename term_type::cf_type; 380 using key_type = typename term_type::key_type; 381 if (p1->empty()) { 382 m_zero_f1.insert(term_type{cf_type(0), key_type(s1.get_symbol_set())}); 383 ctr1 = &m_zero_f1; 384 } 385 if (p2->empty()) { 386 m_zero_f2.insert(term_type{cf_type(0), key_type(s1.get_symbol_set())}); 387 ctr2 = &m_zero_f2; 388 } 389 } 390 // Set the number of threads. 391 m_n_threads = (ctr1->size() && ctr2->size()) 392 ? thread_pool::use_threads(integer(ctr1->size()) * ctr2->size(), 393 integer(settings::get_min_work_per_thread())) 394 : 1u; 395 this->fill_term_pointers(*ctr1, *ctr2, m_v1, m_v2); 396 } 397 398 private: 399 base_series_multiplier() = delete; 400 base_series_multiplier(const base_series_multiplier &) = delete; 401 base_series_multiplier(base_series_multiplier &&) = delete; 402 base_series_multiplier &operator=(const base_series_multiplier &) = delete; 403 base_series_multiplier &operator=(base_series_multiplier &&) = delete; 404 405 protected: 406 /// Blocked multiplication. 407 /** 408 * \note 409 * If \p MultFunctor or \p LimitFunctor do not satisfy the requirements outlined below, 410 * a compile-time error will be produced. 411 * 412 * This method is logically equivalent to the following double loop: 413 * 414 * @code 415 * for (size_type i = start1; i < end1; ++i) { 416 * const size_type limit = std::min(lf(i),m_v2.size()); 417 * for (size_type j = 0; j < limit; ++j) { 418 * mf(i,j); 419 * } 420 * } 421 * @endcode 422 * 423 * \p mf must be a function object with a call operator accepting two instances of 424 * base_series_multiplier::size_type and returning \p void. \p lf must be a function object 425 * with a call operator accepting and returning a base_series_multiplier::size_type. 426 * 427 * Internally, the double loops is decomposed in blocks of size tuning::get_multiplication_block_size() in an 428 * attempt to optimise cache memory access patterns. 429 * 430 * This method is meant to be used for series multiplication. \p mf is intended to be a function object that 431 * multiplies the <tt>i</tt>-th term of the first series by the <tt>j</tt>-th term of the second series. 432 * \p lf is intended to be a functor that establishes how many terms in the second series have to be multiplied by 433 * the <tt>i</tt>-th term of the first series. 434 * 435 * @param mf the multiplication functor. 436 * @param start1 start index in the first series. 437 * @param end1 end index in the first series. 438 * @param lf the limit functor. 439 * 440 * @throws std::invalid_argument if \p start1 is greater than \p end1 or greater than the size of 441 * base_series_multiplier::m_v1, or if \p end1 is greater than the size of base_series_multiplier::m_v1. 442 * @throws unspecified any exception thrown by the call operator of \p mf or \p sf, or piranha::safe_cast. 443 */ 444 template <typename MultFunctor, typename LimitFunctor> blocked_multiplication(const MultFunctor & mf,const size_type & start1,const size_type & end1,const LimitFunctor & lf) const445 void blocked_multiplication(const MultFunctor &mf, const size_type &start1, const size_type &end1, 446 const LimitFunctor &lf) const 447 { 448 PIRANHA_TT_CHECK(is_function_object, MultFunctor, void, const size_type &, const size_type &); 449 if (unlikely(start1 > end1 || start1 > m_v1.size() || end1 > m_v1.size())) { 450 piranha_throw(std::invalid_argument, "invalid bounds in blocked_multiplication"); 451 } 452 // Block size and number of regular blocks. 453 const size_type bsize = safe_cast<size_type>(tuning::get_multiplication_block_size()), 454 nblocks1 = static_cast<size_type>((end1 - start1) / bsize), 455 nblocks2 = static_cast<size_type>(m_v2.size() / bsize); 456 // Start and end of last (possibly irregular) blocks. 457 const size_type i_ir_start = static_cast<size_type>(nblocks1 * bsize + start1), i_ir_end = end1; 458 const size_type j_ir_start = static_cast<size_type>(nblocks2 * bsize), j_ir_end = m_v2.size(); 459 for (size_type n1 = 0u; n1 < nblocks1; ++n1) { 460 const size_type i_start = static_cast<size_type>(n1 * bsize + start1), 461 i_end = static_cast<size_type>(i_start + bsize); 462 // regulars1 * regulars2 463 for (size_type n2 = 0u; n2 < nblocks2; ++n2) { 464 const size_type j_start = static_cast<size_type>(n2 * bsize), 465 j_end = static_cast<size_type>(j_start + bsize); 466 for (size_type i = i_start; i < i_end; ++i) { 467 const size_type limit = std::min<size_type>(lf(i), j_end); 468 for (size_type j = j_start; j < limit; ++j) { 469 mf(i, j); 470 } 471 } 472 } 473 // regulars1 * rem2 474 for (size_type i = i_start; i < i_end; ++i) { 475 const size_type limit = std::min<size_type>(lf(i), j_ir_end); 476 for (size_type j = j_ir_start; j < limit; ++j) { 477 mf(i, j); 478 } 479 } 480 } 481 // rem1 * regulars2 482 for (size_type n2 = 0u; n2 < nblocks2; ++n2) { 483 const size_type j_start = static_cast<size_type>(n2 * bsize), 484 j_end = static_cast<size_type>(j_start + bsize); 485 for (size_type i = i_ir_start; i < i_ir_end; ++i) { 486 const size_type limit = std::min<size_type>(lf(i), j_end); 487 for (size_type j = j_start; j < limit; ++j) { 488 mf(i, j); 489 } 490 } 491 } 492 // rem1 * rem2. 493 for (size_type i = i_ir_start; i < i_ir_end; ++i) { 494 const size_type limit = std::min<size_type>(lf(i), j_ir_end); 495 for (size_type j = j_ir_start; j < limit; ++j) { 496 mf(i, j); 497 } 498 } 499 } 500 /// Blocked multiplication (convenience overload). 501 /** 502 * This method is equivalent to the other overload with a limit functor that will unconditionally 503 * always return the size of the second series. 504 * 505 * @param mf the multiplication functor. 506 * @param start1 start index in the first series. 507 * @param end1 end index in the first series. 508 * 509 * @throws unspecified any exception thrown by the other overload. 510 */ 511 template <typename MultFunctor> blocked_multiplication(const MultFunctor & mf,const size_type & start1,const size_type & end1) const512 void blocked_multiplication(const MultFunctor &mf, const size_type &start1, const size_type &end1) const 513 { 514 blocked_multiplication(mf, start1, end1, default_limit_functor{*this}); 515 } 516 /// Estimate size of series multiplication. 517 /** 518 * \note 519 * If \p MultArity, \p MultFunctor or \p LimitFunctor do not satisfy the requirements outlined below, 520 * a compile-time error will be produced. 521 * 522 * This method expects a \p MultFunctor type exposing the same inteface as explained in blocked_multiplication(). 523 * Additionally, \p MultFunctor must be constructible from a const reference to piranha::base_series_multiplier 524 * and a mutable reference to \p Series. \p MultFunctor objects will be constructed internally by this method, 525 * passing \p this and a temporary local \p Series object as construction parameters. 526 * It will be assumed that a call <tt>mf(i,j)</tt> multiplies the <tt>i</tt>-th 527 * term of the first series by the <tt>j</tt>-th term of the second series, accumulating the result into the \p 528 * Series 529 * passed as second parameter for construction. 530 * 531 * This method will apply a statistical approach to estimate the final size of the result of the multiplication of 532 * the first 533 * series by the second. It will perform random term-by-term multiplications 534 * and deduce the estimate from the number of term multiplications performed before finding the first duplicate 535 * term. 536 * The \p MultArity parameter represents the arity of term multiplications - that is, the number of terms generated 537 * by a single 538 * term-by-term multiplication. It must be strictly positive. 539 * 540 * The \p lf parameter must be a function object exposing the same inteface as explained in 541 * blocked_multiplication(). 542 * This functor establishes how many terms in the second series must be multiplied by the <tt>i</tt>-th term of the 543 * first series. 544 * 545 * The number returned by this function is always at least 1. Multiple threads might be used by this method: in such 546 * a case, different 547 * instances of \p MultFunctor are constructed in different threads, but \p lf is shared among all threads. 548 * 549 * @param lf the limit functor. 550 * 551 * @return the estimated size of the multiplication of the first series by the second, always at least 1. 552 * 553 * @throws std::overflow_error in case of (unlikely) overflows in integral arithmetics. 554 * @throws unspecified any exception thrown by: 555 * - overflow errors in the conversion of piranha::integer to integral types, 556 * - the call operator of \p mf or \p lf, and the constructor of \p mf, 557 * - memory errors in standard containers, 558 * - the conversion operator of piranha::integer, 559 * - standard threading primitives, 560 * - thread_pool::enqueue(), 561 * - future_list::push_back(). 562 */ 563 template <std::size_t MultArity, typename MultFunctor, typename LimitFunctor> estimate_final_series_size(const LimitFunctor & lf) const564 bucket_size_type estimate_final_series_size(const LimitFunctor &lf) const 565 { 566 PIRANHA_TT_CHECK(is_function_object, MultFunctor, void, const size_type &, const size_type &); 567 PIRANHA_TT_CHECK(std::is_constructible, MultFunctor, const base_series_multiplier &, Series &); 568 PIRANHA_TT_CHECK(is_function_object, LimitFunctor, size_type, const size_type &); 569 // Cache these. 570 const size_type size1 = m_v1.size(), size2 = m_v2.size(); 571 constexpr std::size_t result_size = MultArity; 572 // If one of the two series is empty, just return 0. 573 if (unlikely(!size1 || !size2)) { 574 return 1u; 575 } 576 // If either series has a size of 1, just return size1 * size2 * result_size. 577 if (size1 == 1u || size2 == 1u) { 578 return static_cast<bucket_size_type>(integer(size1) * size2 * result_size); 579 } 580 // NOTE: Hard-coded number of trials. 581 // NOTE: here consider that in case of extremely sparse series with few terms this will incur in noticeable 582 // overhead, since we will need many term-by-term before encountering the first duplicate. 583 const unsigned n_trials = 15u; 584 // NOTE: Hard-coded value for the estimation multiplier. 585 // NOTE: This value should be tuned for performance/memory usage tradeoffs. 586 const unsigned multiplier = 2u; 587 // Number of threads to use. If there are more threads than trials, then reduce 588 // the number of actual threads to use. 589 // NOTE: this is a bit different from usual, where we do not care if the workload per thread is zero. 590 // We do like this because n_trials is a small number and there still seems to be benefit in running 591 // just 1 trial per thread. 592 const unsigned n_threads = (n_trials >= m_n_threads) ? m_n_threads : n_trials; 593 piranha_assert(n_threads > 0u); 594 // Trials per thread. This will always be at least 1. 595 const unsigned tpt = n_trials / n_threads; 596 piranha_assert(tpt >= 1u); 597 // The cumulative estimate. 598 integer c_estimate(0); 599 // Sync mutex - actually used only in multithreading. 600 std::mutex mut; 601 // The estimation functor. 602 auto estimator = [&lf, size1, n_threads, multiplier, tpt, n_trials, this, &c_estimate, &mut, 603 result_size](unsigned thread_idx) { 604 piranha_assert(thread_idx < n_threads); 605 // Vectors of indices into m_v1. 606 std::vector<size_type> v_idx1(safe_cast<typename std::vector<size_type>::size_type>(size1)); 607 std::iota(v_idx1.begin(), v_idx1.end(), size_type(0)); 608 // Copy in order to reset to initial state later. 609 const auto v_idx1_copy = v_idx1; 610 // Random number engine. 611 std::mt19937 engine; 612 // Uniform int distribution. 613 using dist_type = std::uniform_int_distribution<size_type>; 614 dist_type dist; 615 // Init the accumulated estimation for averaging later. 616 integer acc(0); 617 // Number of trials for this thread - usual special casing for the last thread. 618 const unsigned cur_trials = (thread_idx == n_threads - 1u) ? (n_trials - thread_idx * tpt) : tpt; 619 // This should always be guaranteed because tpt is never 0. 620 piranha_assert(cur_trials > 0u); 621 // Create and setup the temp series. 622 Series tmp; 623 tmp.set_symbol_set(m_ss); 624 // Create the multiplier. 625 MultFunctor mf(*this, tmp); 626 // Go with the trials. 627 for (auto n = 0u; n < cur_trials; ++n) { 628 // Seed the engine. The seed should be the global trial number, accounting for multiple 629 // threads. This way the estimation will not depend on the number of threads. 630 engine.seed(static_cast<std::mt19937::result_type>(tpt * thread_idx + n)); 631 // Reset the indices vector and re-randomise it. 632 // NOTE: we need to do this as every run inside this for loop must be completely independent 633 // of any previous run, we cannot keep any state. 634 v_idx1 = v_idx1_copy; 635 std::shuffle(v_idx1.begin(), v_idx1.end(), engine); 636 // The counter. This will be increased each time a term-by-term multiplication 637 // does not generate a duplicate term. 638 size_type count = 0u; 639 // This will be used to determine the average number of terms in s2 640 // that participate in the multiplication. 641 integer acc_s2(0); 642 auto it1 = v_idx1.begin(); 643 for (; it1 != v_idx1.end(); ++it1) { 644 // Get the limit idx in s2. 645 const size_type limit = lf(*it1); 646 // This is the upper limit of an open ended interval, so it needs 647 // to be decreased by one in order to be used in dist. If zero, it means 648 // there are no terms in v2 that can be multiplied by the current term in t1. 649 if (limit == 0u) { 650 continue; 651 } 652 acc_s2 += limit; 653 // Pick a random index in m_v2 within the limit. 654 const size_type idx2 655 = dist(engine, typename dist_type::param_type(static_cast<size_type>(0u), 656 static_cast<size_type>(limit - 1u))); 657 // Perform term multiplication. 658 mf(*it1, idx2); 659 // Check for unlikely overflows when increasing count. 660 if (unlikely(result_size > std::numeric_limits<size_type>::max() 661 || count > std::numeric_limits<size_type>::max() - result_size)) { 662 piranha_throw(std::overflow_error, "overflow error"); 663 } 664 if (tmp.size() != count + result_size) { 665 break; 666 } 667 // Increase cycle variables. 668 count = static_cast<size_type>(count + result_size); 669 } 670 integer add; 671 if (it1 == v_idx1.end()) { 672 // We never found a duplicate. count is now the number of terms in s1 673 // which actually participate in the multiplication, while acc_s2 / count 674 // is the average number of terms in s2 that participate in the multiplication. 675 // The result will be then count * acc_s2 / count = acc_s2. 676 add = acc_s2; 677 } else { 678 // If we found a duplicate, we use the heuristic. 679 add = integer(multiplier) * count * count; 680 } 681 // Fix if zero, so that the average later never results in zero. 682 if (add.sign() == 0) { 683 add = 1; 684 } 685 acc += add; 686 // Reset tmp. 687 tmp._container().clear(); 688 } 689 // Accumulate in the shared variable. 690 if (n_threads == 1u) { 691 // No locking needed. 692 c_estimate += acc; 693 } else { 694 std::lock_guard<std::mutex> lock(mut); 695 c_estimate += acc; 696 } 697 }; 698 // Run the estimation functor. 699 if (n_threads == 1u) { 700 estimator(0u); 701 } else { 702 future_list<void> f_list; 703 try { 704 for (unsigned i = 0u; i < n_threads; ++i) { 705 f_list.push_back(thread_pool::enqueue(i, estimator, i)); 706 } 707 // First let's wait for everything to finish. 708 f_list.wait_all(); 709 // Then, let's handle the exceptions. 710 f_list.get_all(); 711 } catch (...) { 712 f_list.wait_all(); 713 throw; 714 } 715 } 716 piranha_assert(c_estimate >= n_trials); 717 // Return the mean. 718 return static_cast<bucket_size_type>(c_estimate / n_trials); 719 } 720 /// Estimate size of series multiplication (convenience overload) 721 /** 722 * @return the output of the other overload of estimate_final_series_size(), with a limit 723 * functor whose call operator will always return the size of the second series unconditionally. 724 * 725 * @throws unspecified any exception thrown by the other overload of estimate_final_series_size(). 726 */ 727 template <std::size_t MultArity, typename MultFunctor> estimate_final_series_size() const728 bucket_size_type estimate_final_series_size() const 729 { 730 return estimate_final_series_size<MultArity, MultFunctor>(default_limit_functor{*this}); 731 } 732 /// A plain multiplier functor. 733 /** 734 * \note 735 * If the key and coefficient types of \p Series do not satisfy piranha::key_is_multipliable, a compile-time error 736 * will 737 * be produced. 738 * 739 * This is a functor that conforms to the protocol expected by base_series_multiplier::blocked_multiplication() and 740 * base_series_multiplier::estimate_final_series_size(). 741 * 742 * This functor requires that the key and coefficient types of \p Series satisfy piranha::key_is_multipliable, and 743 * it will 744 * use the key's <tt>multiply()</tt> method to perform term-by-term multiplications. 745 * 746 * If the \p FastMode boolean parameter is \p true, then the call operator will insert terms into the return value 747 * series 748 * using the low-level interface of piranha::hash_set, otherwise the call operator will use 749 * piranha::series::insert() for 750 * term insertion. 751 */ 752 template <bool FastMode> 753 class plain_multiplier 754 { 755 using term_type = typename Series::term_type; 756 using key_type = typename term_type::key_type; 757 PIRANHA_TT_CHECK(key_is_multipliable, typename term_type::cf_type, key_type); 758 using it_type = decltype(std::declval<Series &>()._container().end()); 759 static constexpr std::size_t m_arity = key_type::multiply_arity; 760 761 public: 762 /// Constructor. 763 /** 764 * The constructor will store references to the input arguments internally. 765 * 766 * @param bsm a const reference to an instance of piranha::base_series_multiplier, from which 767 * the vectors of term pointers will be extracted. 768 * @param retval the \p Series instance into which terms resulting from multiplications will be inserted. 769 */ plain_multiplier(const base_series_multiplier & bsm,Series & retval)770 explicit plain_multiplier(const base_series_multiplier &bsm, Series &retval) 771 : m_v1(bsm.m_v1), m_v2(bsm.m_v2), m_retval(retval), m_c_end(retval._container().end()) 772 { 773 } 774 775 private: 776 plain_multiplier(const plain_multiplier &) = delete; 777 plain_multiplier(plain_multiplier &&) = delete; 778 plain_multiplier &operator=(const plain_multiplier &) = delete; 779 plain_multiplier &operator=(plain_multiplier &&) = delete; 780 781 public: 782 /// Call operator. 783 /** 784 * The call operator will perform the multiplication of the <tt>i</tt>-th term of the first series by the 785 * <tt>j</tt>-th term of the second series, and it will insert the result into the return value. 786 * 787 * @param i index of a term in the first series. 788 * @param j index of a term in the second series. 789 * 790 * @throws unspecified any exception thrown by: 791 * - piranha::series::insert(), 792 * - the low-level interface of piranha::hash_set, 793 * - the in-place addition operator of the coefficient type, 794 * - term construction. 795 */ operator ()(const size_type & i,const size_type & j) const796 void operator()(const size_type &i, const size_type &j) const 797 { 798 // First perform the multiplication. 799 key_type::multiply(m_tmp_t, *m_v1[i], *m_v2[j], m_retval.get_symbol_set()); 800 for (std::size_t n = 0u; n < m_arity; ++n) { 801 auto &tmp_term = m_tmp_t[n]; 802 if (FastMode) { 803 auto &container = m_retval._container(); 804 // Try to locate the term into retval. 805 auto bucket_idx = container._bucket(tmp_term); 806 const auto it = container._find(tmp_term, bucket_idx); 807 if (it == m_c_end) { 808 container._unique_insert(term_insertion(tmp_term), bucket_idx); 809 } else { 810 it->m_cf += tmp_term.m_cf; 811 } 812 } else { 813 m_retval.insert(term_insertion(tmp_term)); 814 } 815 } 816 } 817 818 private: 819 mutable std::array<term_type, m_arity> m_tmp_t; 820 const std::vector<term_type const *> &m_v1; 821 const std::vector<term_type const *> &m_v2; 822 Series &m_retval; 823 const it_type m_c_end; 824 }; 825 /// Sanitise series. 826 /** 827 * When using the low-level interface of piranha::hash_set for term insertion, invariants might be violated 828 * both in piranha::hash_set and piranha::series. In particular: 829 * 830 * - terms may not be checked for compatibility or ignorability upon insertion, 831 * - the count of elements in piranha::hash_set might not be updated. 832 * 833 * This method can be used to fix these invariants: each term of \p retval will be checked for ignorability and 834 * compatibility, 835 * and the total count of terms in the series will be set to the number of non-ignorable terms. Ignorable terms will 836 * be erased. 837 * 838 * Note that in case of exceptions \p retval will likely be left in an inconsistent state which violates internal 839 * invariants. 840 * Calls to this function should always be wrapped in a try/catch block that makes sure that \p retval is cleared 841 * before 842 * re-throwing. 843 * 844 * @param retval the series to be sanitised. 845 * @param n_threads the number of threads to be used. 846 * 847 * @throws std::invalid_argument if \p n_threads is zero, or if one of the terms in \p retval is not compatible 848 * with the symbol set of \p retval. 849 * @throws std::overflow_error if the number of terms in \p retval overflows the maximum value representable by 850 * base_series_multiplier::bucket_size_type. 851 * @throws unspecified any exception thrown by: 852 * - the cast operator of piranha::integer, 853 * - standard threading primitives, 854 * - thread_pool::enqueue(), 855 * - future_list::push_back(). 856 */ sanitise_series(Series & retval,unsigned n_threads)857 static void sanitise_series(Series &retval, unsigned n_threads) 858 { 859 using term_type = typename Series::term_type; 860 if (unlikely(n_threads == 0u)) { 861 piranha_throw(std::invalid_argument, "invalid number of threads"); 862 } 863 auto &container = retval._container(); 864 const auto &args = retval.get_symbol_set(); 865 // Reset the size to zero before doing anything. 866 container._update_size(static_cast<bucket_size_type>(0u)); 867 // Single-thread implementation. 868 if (n_threads == 1u) { 869 const auto it_end = container.end(); 870 for (auto it = container.begin(); it != it_end;) { 871 if (unlikely(!it->is_compatible(args))) { 872 piranha_throw(std::invalid_argument, "incompatible term"); 873 } 874 if (unlikely(container.size() == std::numeric_limits<bucket_size_type>::max())) { 875 piranha_throw(std::overflow_error, "overflow error in the number of terms of a series"); 876 } 877 // First update the size, it will be scaled back in the erase() method if necessary. 878 container._update_size(static_cast<bucket_size_type>(container.size() + 1u)); 879 if (unlikely(it->is_ignorable(args))) { 880 it = container.erase(it); 881 } else { 882 ++it; 883 } 884 } 885 return; 886 } 887 // Multi-thread implementation. 888 const auto b_count = container.bucket_count(); 889 std::mutex m; 890 integer global_count(0); 891 auto eraser = [b_count, &container, &m, &args, &global_count](const bucket_size_type &start, 892 const bucket_size_type &end) { 893 piranha_assert(start <= end && end <= b_count); 894 (void)b_count; 895 bucket_size_type count = 0u; 896 std::vector<term_type> term_list; 897 // Examine and count the terms bucket-by-bucket. 898 for (bucket_size_type i = start; i != end; ++i) { 899 term_list.clear(); 900 const auto &bl = container._get_bucket_list(i); 901 const auto it_f = bl.end(); 902 for (auto it = bl.begin(); it != it_f; ++it) { 903 // Check first for compatibility. 904 if (unlikely(!it->is_compatible(args))) { 905 piranha_throw(std::invalid_argument, "incompatible term"); 906 } 907 // Check for ignorability. 908 if (unlikely(it->is_ignorable(args))) { 909 term_list.push_back(*it); 910 } 911 // Update the count of terms. 912 if (unlikely(count == std::numeric_limits<bucket_size_type>::max())) { 913 piranha_throw(std::overflow_error, "overflow error in the number of terms of a series"); 914 } 915 count = static_cast<bucket_size_type>(count + 1u); 916 } 917 for (auto it = term_list.begin(); it != term_list.end(); ++it) { 918 // NOTE: must use _erase to avoid concurrent modifications 919 // to the number of elements in the table. 920 container._erase(container._find(*it, i)); 921 // Account for the erased term. 922 piranha_assert(count > 0u); 923 count = static_cast<bucket_size_type>(count - 1u); 924 } 925 } 926 // Update the global count. 927 std::lock_guard<std::mutex> lock(m); 928 global_count += count; 929 }; 930 future_list<decltype(eraser(bucket_size_type(), bucket_size_type()))> f_list; 931 try { 932 for (unsigned i = 0u; i < n_threads; ++i) { 933 const auto start = static_cast<bucket_size_type>((b_count / n_threads) * i), 934 end = static_cast<bucket_size_type>( 935 (i == n_threads - 1u) ? b_count : (b_count / n_threads) * (i + 1u)); 936 f_list.push_back(thread_pool::enqueue(i, eraser, start, end)); 937 } 938 // First let's wait for everything to finish. 939 f_list.wait_all(); 940 // Then, let's handle the exceptions. 941 f_list.get_all(); 942 } catch (...) { 943 f_list.wait_all(); 944 // NOTE: there's not need to clear retval here - it was already in an inconsistent 945 // state coming into this method. We rather need to make sure sanitise_series() is always 946 // called in a try/catch block that clears retval in case of errors. 947 throw; 948 } 949 // Final update of the total count. 950 container._update_size(static_cast<bucket_size_type>(global_count)); 951 } 952 /// A plain series multiplication routine. 953 /** 954 * \note 955 * If the key and coefficient types of \p Series do not satisfy piranha::key_is_multipliable, or \p LimitFunctor 956 * does 957 * not satisfy the requirements outlined in base_series_multiplier::blocked_multiplication(), a compile-time error 958 * will 959 * be produced. 960 * 961 * This method implements a generic series multiplication routine suitable for key types that satisfy 962 * piranha::key_is_multipliable. 963 * The implementation is either single-threaded or multi-threaded, depending on the sizes of the input series, and 964 * it will use 965 * either base_series_multiplier::plain_multiplier or a similar thread-safe multiplier for the term-by-term 966 * multiplications. 967 * The \p lf functor will be forwarded as limit functor to base_series_multiplier::blocked_multiplication() 968 * and base_series_multiplier::estimate_final_series_size(). 969 * 970 * Note that, in multithreaded mode, \p lf will be shared among (and called concurrently from) all the threads. 971 * 972 * @param lf the limit functor (see base_series_multiplier::blocked_multiplication()). 973 * 974 * @return the series resulting from the multiplication of the two series used to construct \p this. 975 * 976 * @throws unspecified any exception thrown by: 977 * - piranha::safe_cast(), 978 * - base_series_multiplier::estimate_final_series_size(), 979 * - <tt>boost::numeric_cast()</tt>, 980 * - the public interface of piranha::hash_set, 981 * - base_series_multiplier::blocked_multiplication(), 982 * - base_series_multiplier::sanitise_series(), 983 * - the <tt>multiply()</tt> method of the key type of \p Series, 984 * - thread_pool::enqueue(), 985 * - future_list::push_back(), 986 * - the construction of terms, 987 * - in-place addition of coefficients. 988 */ 989 template <typename LimitFunctor> plain_multiplication(const LimitFunctor & lf) const990 Series plain_multiplication(const LimitFunctor &lf) const 991 { 992 // Shortcuts. 993 using term_type = typename Series::term_type; 994 using cf_type = typename term_type::cf_type; 995 using key_type = typename term_type::key_type; 996 PIRANHA_TT_CHECK(key_is_multipliable, cf_type, key_type); 997 constexpr std::size_t m_arity = key_type::multiply_arity; 998 // Setup the return value with the merged symbol set. 999 Series retval; 1000 retval.set_symbol_set(m_ss); 1001 // Do not do anything if one of the two series is empty. 1002 if (unlikely(m_v1.empty() || m_v2.empty())) { 1003 return retval; 1004 } 1005 const size_type size1 = m_v1.size(), size2 = m_v2.size(); 1006 (void)size2; 1007 piranha_assert(size1 && size2); 1008 // Convert n_threads to size_type for convenience. 1009 const size_type n_threads = safe_cast<size_type>(m_n_threads); 1010 piranha_assert(n_threads); 1011 // Determine if we should estimate the size. We check the threshold, but we always 1012 // need to estimate in multithreaded mode. 1013 bool estimate = true; 1014 const auto e_thr = tuning::get_estimate_threshold(); 1015 if (integer(m_v1.size()) * m_v2.size() < integer(e_thr) * e_thr && n_threads == 1u) { 1016 estimate = false; 1017 } 1018 if (estimate) { 1019 // Estimate and rehash. 1020 const auto est = estimate_final_series_size<m_arity, plain_multiplier<false>>(lf); 1021 // NOTE: use numeric cast here as safe_cast is expensive, going through an integer-double conversion, 1022 // and in this case the behaviour of numeric_cast is appropriate. 1023 const auto n_buckets = boost::numeric_cast<bucket_size_type>( 1024 std::ceil(static_cast<double>(est) / retval._container().max_load_factor())); 1025 piranha_assert(n_buckets > 0u); 1026 // Check if we want to use the parallel memory set. 1027 // NOTE: it is important here that we use the same n_threads for multiplication and memset as 1028 // we tie together pinned threads with potentially different NUMA regions. 1029 const unsigned n_threads_rehash = tuning::get_parallel_memory_set() ? static_cast<unsigned>(n_threads) : 1u; 1030 retval._container().rehash(n_buckets, n_threads_rehash); 1031 } 1032 if (n_threads == 1u) { 1033 try { 1034 // Single-thread case. 1035 if (estimate) { 1036 blocked_multiplication(plain_multiplier<true>(*this, retval), 0u, size1, lf); 1037 // If we estimated beforehand, we need to sanitise the series. 1038 sanitise_series(retval, static_cast<unsigned>(n_threads)); 1039 } else { 1040 blocked_multiplication(plain_multiplier<false>(*this, retval), 0u, size1, lf); 1041 } 1042 finalise_series(retval); 1043 return retval; 1044 } catch (...) { 1045 retval._container().clear(); 1046 throw; 1047 } 1048 } 1049 // Multi-threaded case. 1050 piranha_assert(estimate); 1051 // Init the vector of spinlocks. 1052 detail::atomic_flag_array sl_array(safe_cast<std::size_t>(retval._container().bucket_count())); 1053 // Init the future list. 1054 future_list<void> f_list; 1055 // Thread block size. 1056 const auto block_size = size1 / n_threads; 1057 try { 1058 for (size_type idx = 0u; idx < n_threads; ++idx) { 1059 // Thread functor. 1060 auto tf = [idx, this, block_size, n_threads, &sl_array, &retval, &lf]() { 1061 // Used to store the result of term multiplication. 1062 std::array<term_type, key_type::multiply_arity> tmp_t; 1063 // End of retval container (thread-safe). 1064 const auto c_end = retval._container().end(); 1065 // Block functor. 1066 // NOTE: this is very similar to the plain functor, but it does the bucket locking 1067 // additionally. 1068 auto f = [&c_end, &tmp_t, this, &retval, &sl_array](const size_type &i, const size_type &j) { 1069 // Run the term multiplication. 1070 key_type::multiply(tmp_t, *(this->m_v1[i]), *(this->m_v2[j]), retval.get_symbol_set()); 1071 for (std::size_t n = 0u; n < key_type::multiply_arity; ++n) { 1072 auto &container = retval._container(); 1073 auto &tmp_term = tmp_t[n]; 1074 // Try to locate the term into retval. 1075 auto bucket_idx = container._bucket(tmp_term); 1076 // Lock the bucket. 1077 detail::atomic_lock_guard alg(sl_array[static_cast<std::size_t>(bucket_idx)]); 1078 const auto it = container._find(tmp_term, bucket_idx); 1079 if (it == c_end) { 1080 container._unique_insert(term_insertion(tmp_term), bucket_idx); 1081 } else { 1082 it->m_cf += tmp_term.m_cf; 1083 } 1084 } 1085 }; 1086 // Thread block limit. 1087 const auto e1 1088 = (idx == n_threads - 1u) ? this->m_v1.size() : static_cast<size_type>((idx + 1u) * block_size); 1089 this->blocked_multiplication(f, static_cast<size_type>(idx * block_size), e1, lf); 1090 }; 1091 f_list.push_back(thread_pool::enqueue(static_cast<unsigned>(idx), tf)); 1092 } 1093 f_list.wait_all(); 1094 f_list.get_all(); 1095 sanitise_series(retval, static_cast<unsigned>(n_threads)); 1096 finalise_series(retval); 1097 } catch (...) { 1098 f_list.wait_all(); 1099 // Clean up retval as it might be in an inconsistent state. 1100 retval._container().clear(); 1101 throw; 1102 } 1103 return retval; 1104 } 1105 /// A plain series multiplication routine (convenience overload). 1106 /** 1107 * @return the output of the other overload of plain_multiplication(), with a limit 1108 * functor whose call operator will always return the size of the second series unconditionally. 1109 * 1110 * @throws unspecified any exception thrown by the other overload of plain_multiplication(). 1111 */ plain_multiplication() const1112 Series plain_multiplication() const 1113 { 1114 return plain_multiplication(default_limit_functor{*this}); 1115 } 1116 /// Finalise series. 1117 /** 1118 * This method will finalise the output \p s of a series multiplication undertaken via 1119 * piranha::base_series_multiplier. 1120 * Currently, this method will not do anything unless the coefficient type of \p Series is an instance of 1121 * piranha::mp_rational. 1122 * In this case, the coefficients of \p s will be normalised with respect to the least common multiplier computed in 1123 * the 1124 * constructor of piranha::base_series_multiplier. 1125 * 1126 * @param s the \p Series to be finalised. 1127 * 1128 * @throws unspecified any exception thrown by: 1129 * - thread_pool::enqueue(), 1130 * - future_list::push_back(). 1131 */ finalise_series(Series & s) const1132 void finalise_series(Series &s) const 1133 { 1134 finalise_impl(s); 1135 } 1136 1137 protected: 1138 /// Vector of const pointers to the terms in the larger series. 1139 mutable v_ptr m_v1; 1140 /// Vector of const pointers to the terms in the smaller series. 1141 mutable v_ptr m_v2; 1142 /// The symbol set of the series used during construction. 1143 const symbol_set m_ss; 1144 /// Number of threads. 1145 /** 1146 * This value will be set by the constructor, and it represents the number of threads 1147 * that will be used by the multiplier. The value is always at least 1 and it is calculated 1148 * via thread_pool::use_threads(). 1149 */ 1150 unsigned m_n_threads; 1151 1152 private: 1153 // See the constructor for an explanation. 1154 container_type m_zero_f1; 1155 container_type m_zero_f2; 1156 }; 1157 } 1158 1159 #endif 1160