1 /*
2  *  Copyright (C) 2004-2021 Edward F. Valeev
3  *
4  *  This file is part of Libint.
5  *
6  *  Libint is free software: you can redistribute it and/or modify
7  *  it under the terms of the GNU Lesser General Public License as published by
8  *  the Free Software Foundation, either version 3 of the License, or
9  *  (at your option) any later version.
10  *
11  *  Libint is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  *  GNU Lesser General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Lesser General Public License
17  *  along with Libint.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  */
20 
21 #ifndef _libint2_src_lib_libint_engine_h_
22 #define _libint2_src_lib_libint_engine_h_
23 
24 #ifndef LIBINT2_DOES_NOT_INLINE_ENGINE
25 # define __libint2_engine_inline inline
26 #else
27 # define __libint2_engine_inline
28 #endif
29 
30 #include <libint2/util/cxxstd.h>
31 #if LIBINT2_CPLUSPLUS_STD < 2011
32 # error "libint2/engine.h requires C++11 support"
33 #endif
34 
35 #include <algorithm>
36 #include <array>
37 #include <cstring>
38 #include <cstdio>
39 #include <functional>
40 #include <iostream>
41 #include <limits>
42 #include <map>
43 #include <memory>
44 #include <tuple>
45 #include <utility>
46 #include <vector>
47 
48 #include <libint2/cxxapi.h>
49 #include <libint2/boys_fwd.h>
50 #include <libint2/shell.h>
51 #include <libint2/solidharmonics.h>
52 #include <libint2/cartesian.h>
53 #include <libint2/util/any.h>
54 #include <libint2/util/array_adaptor.h>
55 #include <libint2/util/intpart_iter.h>
56 #include <libint2/util/compressed_pair.h>
57 #include <libint2/util/timer.h>
58 #include <libint2/cgshellinfo.h>
59 #include <libint2/braket.h>
60 
61 // the engine will be profiled by default if library was configured with
62 // --enable-profile
63 #ifdef LIBINT2_PROFILE
64 #define LIBINT2_ENGINE_TIMERS
65 // uncomment if want to profile each integral class
66 #define LIBINT2_ENGINE_PROFILE_CLASS
67 #endif
68 // uncomment if want to profile the engine even if library was configured
69 // without --enable-profile
70 //#  define LIBINT2_ENGINE_TIMERS
71 
72 namespace libint2 {
73 
74 /// contracted Gaussian geminal = \f$ \sum_i c_i \exp(- \alpha r_{12}^2) \f$,
75 /// represented as a vector of
76 /// {\f$ \alpha_i \f$, \f$ c_i \f$ } pairs
77 typedef std::vector<std::pair<double, double>> ContractedGaussianGeminal;
78 
num_geometrical_derivatives(size_t ncenter,size_t deriv_order)79 constexpr size_t num_geometrical_derivatives(size_t ncenter,
80                                              size_t deriv_order) {
81   return (deriv_order > 0)
82              ? (num_geometrical_derivatives(ncenter, deriv_order - 1) *
83                 (3 * ncenter + deriv_order - 1)) /
84                    deriv_order
85              : 1;
86 }
87 
88 template <typename T, unsigned N>
89 __libint2_engine_inline typename std::remove_all_extents<T>::type* to_ptr1(T (&a)[N]);
90 
91 /// \brief types of operators (operator sets) supported by Engine.
92 
93 /// \warning These must start with 0 and appear in same order as elements of
94 /// BOOST_PP_NBODY_OPERATOR_LIST preprocessor macro (aliases do not need to be included).
95 /// \warning for the sake of nbody() order operators by # of particles
96 enum class Operator {
97   /// overlap
98   overlap = 0,
99   /// electronic kinetic energy, i.e. \f$ -\frac{1}{2} \nabla^2 \f$
100   kinetic,
101   /// Coulomb potential due to point charges
102   nuclear,
103   /// erf-attenuated point-charge Coulomb operator,
104   /// \f$ \mathrm{erf}(\omega r)/r \f$
105   erf_nuclear,
106   /// erfc-attenuated point-charge Coulomb operator,
107   /// \f$ \mathrm{erfc}(\omega r)/r \f$
108   erfc_nuclear,
109   //! overlap + (Cartesian) electric dipole moment,
110   //! \f$ x_O, y_O, z_O \f$, where
111   //! \f$ x_O \equiv x - O_x \f$ is relative to
112   //! origin \f$ \vec{O} \f$
113   emultipole1,
114   //! emultipole1 + (Cartesian) electric quadrupole moment,
115   //! \f$ x^2, xy, xz, y^2, yz, z^2 \f$
116   emultipole2,
117   //! emultipole2 + (Cartesian) electric octupole moment,
118   //! \f$ x^3, x^2y, x^2z, xy^2, xyz, xz^2, y^3, y^2z, yz^2, z^3 \f$
119   emultipole3,
120   //! (electric) spherical multipole moments,
121   //! \f$ O_{l,m} \equiv \mathcal{N}^{\text{sign}(m)}_{l,|m|} \f$ where \f$ \f$ \mathcal{N}^{\pm}_{l,m} \f$
122   //! is defined in J.M. Pérez-Jordá and W. Yang, J Chem Phys 104, 8003 (1996), DOI 10.1063/1.468354 .
123   //! To obtain the real solid harmonics \f$ C^m_l \f$ and \f$ S^m_l \f$ defined in https://en.wikipedia.org/wiki/Solid_harmonics
124   //! multiply these harmonics by \f$ (-1)^m \sqrt{(2 - \delta_{m,0}) (l + |m|)! (l - |m|)!} \f$ .
125   //! The operator set includes multipoles of order up to \f$ l_{\rm max} = \f$ MULTIPOLE_MAX_ORDER (for a total of \f$ (l_{\rm max}+1)^2 \f$ operators),
126   //! in the order of increasing \c l , with the operators of same \c l but different \c m ordered according to the solid harmonics ordering
127   //! specified at configure time (see macro FOR_SOLIDHARM in shgshell_ordering.h.in). For example, for the CCA standard solid harmonics
128   //! ordering the operators will appear in the following order
129   //! \f$ \mathcal{N}^+_{0,0} , \mathcal{N}^-_{1,1}, \mathcal{N}^+_{1,0}, \mathcal{N}^+_{1,1}, \mathcal{N}^-_{2,2}, \mathcal{N}^-_{2,1}, \mathcal{N}^+_{2,0}, \mathcal{N}^+_{2,1}, \mathcal{N}^+_{2,2}. \dots \f$ .
130   sphemultipole,
131   /// \f$ \delta(\vec{r}_1 - \vec{r}_2) \f$
132   delta,
133   /// (2-body) Coulomb operator = \f$ r_{12}^{-1} \f$
134   coulomb,
135   /// alias for Operator::coulomb
136   r12_m1 = coulomb,
137   /// contracted Gaussian geminal
138   cgtg,
139   /// contracted Gaussian geminal times Coulomb
140   cgtg_x_coulomb,
141   /// |Delta . contracted Gaussian geminal|^2
142   delcgtg2,
143   /// anti-Coulomb operator, \f$ r_{12} \f$
144   r12,
145   /// alias for Operator::r12
146   r12_1 = r12,
147   /// erf-attenuated Coulomb operator,
148   /// \f$ \mathrm{erf}(\omega r)/r \f$
149   erf_coulomb,
150   /// erfc-attenuated Coulomb operator,
151   /// \f$ \mathrm{erfc}(\omega r)/r \f$
152   erfc_coulomb,
153   /// Slater-type geminal, \f$ \mathrm{exp}(-\zeta r_{12}) \f$
154   stg,
155   /// Slater-type geminal times Coulomb, , \f$ \mathrm{exp}(-\zeta r_{12}) / r_{12} \f$
156   stg_x_coulomb,
157   /// alias for Operator::stg_x_coulomb
158   yukawa = stg_x_coulomb,
159   // do not modify this
160   invalid = -1,
161   // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!keep this updated!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
162   first_1body_oper = overlap,
163   last_1body_oper = sphemultipole,
164   first_2body_oper = delta,
165   last_2body_oper = stg_x_coulomb,
166   first_oper = first_1body_oper,
167   last_oper = last_2body_oper
168 };
169 
170 /// @param[in] oper an Operator object
171 /// @return the particle rank of \c oper
172 /// @throw std::logic_error if an invalid @c oper given
rank(Operator oper)173 inline constexpr int rank(Operator oper) {
174   return (oper >= Operator::first_1body_oper &&
175           oper <= Operator::last_1body_oper)
176           ? 1 :
177          ((oper >= Operator::first_2body_oper &&
178            oper <= Operator::last_2body_oper)
179            ? 2 : throw std::logic_error("rank(Operator): invalid operator given"));
180 }
181 
182 namespace detail {
183 struct default_operator_traits {
184   typedef struct {
185   } oper_params_type;
default_paramsdefault_operator_traits186   static oper_params_type default_params() { return oper_params_type{}; }
187   static constexpr auto nopers = 1u;
188   struct _core_eval_type {
189     template <typename... params>
instancedefault_operator_traits::_core_eval_type190     static std::shared_ptr<const _core_eval_type> instance(params...) {
191       return nullptr;
192     }
193   };
194   using core_eval_type = const _core_eval_type;
195 };
196 }  // namespace detail
197 
198 /// describes operator set \c Op
199 /// @tparam Op a value of type Operator
200 /// \note default describes operator set of size 1 that takes trivial \c
201 /// oper_params_type and \c core_eval_type;
202 ///       needs to be specialized for some operator types
203 template <Operator Op>
204 struct operator_traits : public detail::default_operator_traits {};
205 
206 template <>
207 struct operator_traits<Operator::nuclear>
208     : public detail::default_operator_traits {
209   /// point charges and their positions
210   typedef std::vector<std::pair<scalar_type, std::array<scalar_type, 3>>>
211       oper_params_type;
212   static oper_params_type default_params() { return oper_params_type{}; }
213 #ifndef LIBINT_USER_DEFINED_REAL
214   typedef const libint2::FmEval_Chebyshev7<scalar_type> core_eval_type;
215 #else
216   typedef const libint2::FmEval_Reference<scalar_type> core_eval_type;
217 #endif
218 };
219 
220 template <>
221 struct operator_traits<Operator::erf_nuclear>
222     : public detail::default_operator_traits {
223   /// the attenuation parameter (0 = zero potential, +infinity = no attenuation)
224   /// + point charges and positions
225   typedef std::tuple<
226       scalar_type, typename operator_traits<Operator::nuclear>::oper_params_type>
227       oper_params_type;
228   static oper_params_type default_params() {
229     return std::make_tuple(0,operator_traits<Operator::nuclear>::default_params());
230   }
231   typedef const libint2::GenericGmEval<libint2::os_core_ints::erf_coulomb_gm_eval<scalar_type>>
232       core_eval_type;
233 };
234 
235 template <>
236 struct operator_traits<Operator::erfc_nuclear>
237     : public detail::default_operator_traits {
238   /// the attenuation parameter (0 = no attenuation, +infinity = zero potential)
239   /// + point charges and positions
240   typedef typename operator_traits<Operator::erf_nuclear>::oper_params_type oper_params_type;
241   static oper_params_type default_params() {
242     return std::make_tuple(0,operator_traits<Operator::nuclear>::default_params());
243   }
244   typedef const libint2::GenericGmEval<libint2::os_core_ints::erfc_coulomb_gm_eval<scalar_type>>
245       core_eval_type;
246 };
247 
248 template <>
249 struct operator_traits<Operator::emultipole1>
250     : public detail::default_operator_traits {
251   /// Cartesian coordinates of the origin with respect to which the dipole
252   /// moment is defined
253   typedef std::array<double, 3> oper_params_type;
254   static oper_params_type default_params() {
255     return oper_params_type{{0,0,0}};
256   }
257   static constexpr auto nopers = 4u;  //!< overlap + 3 dipole components
258 };
259 template <>
260 struct operator_traits<Operator::emultipole2>
261     : public operator_traits<Operator::emultipole1> {
262   static constexpr auto nopers =
263       operator_traits<Operator::emultipole1>::nopers +
264       6;  //!< overlap + 3 dipoles + 6 quadrupoles
265 };
266 template <>
267 struct operator_traits<Operator::emultipole3>
268     : public operator_traits<Operator::emultipole1> {
269   static constexpr auto nopers =
270       operator_traits<Operator::emultipole2>::nopers + 10;
271 };
272 template <>
273 struct operator_traits<Operator::sphemultipole>
274     : public operator_traits<Operator::emultipole1> {
275   static constexpr auto nopers =
276       (MULTIPOLE_MAX_ORDER + 1) * (MULTIPOLE_MAX_ORDER + 1);
277 };
278 
279 template <>
280 struct operator_traits<Operator::coulomb>
281     : public detail::default_operator_traits {
282 #ifndef LIBINT_USER_DEFINED_REAL
283   typedef const libint2::FmEval_Chebyshev7<scalar_type> core_eval_type;
284 #else
285   typedef const libint2::FmEval_Reference<scalar_type> core_eval_type;
286 #endif
287 };
288 namespace detail {
289 template <int K>
290 struct cgtg_operator_traits : public detail::default_operator_traits {
291   typedef libint2::GaussianGmEval<scalar_type, K> core_eval_type;
292   typedef ContractedGaussianGeminal oper_params_type;
293 };
294 }  // namespace detail
295 template <>
296 struct operator_traits<Operator::cgtg>
297     : public detail::cgtg_operator_traits<0> {};
298 template <>
299 struct operator_traits<Operator::cgtg_x_coulomb>
300     : public detail::cgtg_operator_traits<-1> {};
301 template <>
302 struct operator_traits<Operator::delcgtg2>
303     : public detail::cgtg_operator_traits<2> {};
304 
305 template <>
306 struct operator_traits<Operator::delta>
307     : public detail::default_operator_traits {
308   typedef const libint2::GenericGmEval<libint2::os_core_ints::delta_gm_eval<scalar_type>>
309       core_eval_type;
310 };
311 
312 template <>
313 struct operator_traits<Operator::r12>
314     : public detail::default_operator_traits {
315   typedef const libint2::GenericGmEval<libint2::os_core_ints::r12_xx_K_gm_eval<scalar_type, 1>>
316       core_eval_type;
317 };
318 
319 template <>
320 struct operator_traits<Operator::erf_coulomb>
321     : public detail::default_operator_traits {
322   /// the attenuation parameter (0 = zero potential, +infinity = no attenuation)
323   typedef scalar_type oper_params_type;
324   static oper_params_type default_params() {
325     return oper_params_type{0};
326   }
327   typedef const libint2::GenericGmEval<libint2::os_core_ints::erf_coulomb_gm_eval<scalar_type>>
328       core_eval_type;
329 };
330 template <>
331 struct operator_traits<Operator::erfc_coulomb>
332     : public detail::default_operator_traits {
333   /// the attenuation parameter (0 = no attenuation, +infinity = zero potential)
334   typedef scalar_type oper_params_type;
335   static oper_params_type default_params() {
336     return oper_params_type{0};
337   }
338   typedef const libint2::GenericGmEval<libint2::os_core_ints::erfc_coulomb_gm_eval<scalar_type>>
339       core_eval_type;
340 };
341 
342 template <>
343 struct operator_traits<Operator::stg>
344     : public detail::default_operator_traits {
345   /// the attenuation parameter (0 = constant, infinity = delta-function)
346   typedef scalar_type oper_params_type;
347   static oper_params_type default_params() {
348     return oper_params_type{0};
349   }
350   typedef const libint2::TennoGmEval<scalar_type>
351       core_eval_type;
352 };
353 
354 template <>
355 struct operator_traits<Operator::stg_x_coulomb>
356     : public detail::default_operator_traits {
357   /// the attenuation parameter (0 = coulomb, infinity = delta-function)
358   typedef scalar_type oper_params_type;
359   static oper_params_type default_params() {
360     return oper_params_type{0};
361   }
362   typedef const libint2::TennoGmEval<scalar_type>
363       core_eval_type;
364 };
365 
366 /// the runtime version of \c operator_traits<oper>::default_params()
367 libint2::any
368 default_params(const Operator& oper);
369 
370 namespace detail {
371 template <typename core_eval_type>
372 using __core_eval_pack_type =
373     compressed_pair<std::shared_ptr<core_eval_type>,
374                     libint2::detail::CoreEvalScratch<core_eval_type>>;
375 template <Operator Op>
376 using core_eval_pack_type =
377     __core_eval_pack_type<typename operator_traits<Op>::core_eval_type>;
378 }
379 
380 #define BOOST_PP_NBODY_BRAKET_MAX_INDEX 4
381 
382 /// @param[in] braket a BraKet object
383 /// @return rank of @c braket
384 /// @throw std::logic_error if invalid @c braket given
385 inline constexpr int rank(BraKet braket) {
386   return (braket==BraKet::x_x || braket==BraKet::xs_xs) ? 2 :
387          ((braket==BraKet::xs_xx || braket==BraKet::xx_xs) ? 3 :
388           ((braket==BraKet::xx_xx) ? 4 : throw std::logic_error("rank(BraKet): invalid braket given")));
389 }
390 
391 /// @param[in] oper an Operator object
392 /// @return the default braket for @c oper
393 /// @throw std::logic_error if invalid @c oper given
394 inline constexpr BraKet default_braket(Operator oper) {
395   return (rank(oper)==1) ? BraKet::x_x :
396          ((rank(oper)==2) ? BraKet::xx_xx : throw std::logic_error("default_braket(Operator): invalid operator given"));
397 }
398 
399 constexpr size_t nopers_2body = static_cast<int>(Operator::last_2body_oper) -
400                                 static_cast<int>(Operator::first_2body_oper) +
401                                 1;
402 constexpr size_t nbrakets_2body = static_cast<int>(BraKet::last_2body_braket) -
403                                   static_cast<int>(BraKet::first_2body_braket) +
404                                   1;
405 constexpr size_t nderivorders_2body = LIBINT2_MAX_DERIV_ORDER + 1;
406 
407 /**
408  * Engine computes integrals of operators (or operator sets) specified by
409  * combination of Operator and BraKet.
410  * This class deprecates OneBodyEngine and TwoBodyEngine.
411  */
412 class Engine {
413  private:
414   typedef struct {
415   } empty_pod;
416 
417  public:
418   static constexpr auto max_ntargets =
419       std::extent<decltype(std::declval<Libint_t>().targets), 0>::value;
420   using target_ptr_vec =
421       std::vector<const value_type*, detail::ext_stack_allocator<const value_type*, max_ntargets>>;
422 
423   /// creates a default Engine that cannot be used for computing integrals;
424   /// to be used as placeholder for copying a usable engine, OR for cleanup of
425   /// thread-local data. Nontrivial use of a default-initialized Engine will
426   /// cause an exception of type Engine::using_default_initialized
427   Engine()
428       : oper_(Operator::invalid),
429         braket_(BraKet::invalid),
430         primdata_(),
431         stack_size_(0),
432         lmax_(-1),
433         deriv_order_(0),
434         cartesian_shell_normalization_(CartesianShellNormalization::standard),
435         scale_(1) {
436     set_precision(std::numeric_limits<scalar_type>::epsilon());
437   }
438 
439   // clang-format off
440   /// Constructs a (usable) Engine
441 
442   /// \param oper a value of Operator type
443   /// \param max_nprim the maximum number of primitives per contracted Gaussian
444   /// shell (must be greater than 0)
445   /// \param max_l the maximum angular momentum of Gaussian shell (>=0)
446   /// \throw Engine::lmax_exceeded if \c max_l exceeds the angular momentum
447   /// limit of the library
448   /// \param deriv_order if not 0, will compute geometric derivatives of
449   /// Gaussian integrals of order \c deriv_order ,
450   ///                    (default=0)
451   /// \param precision specifies the target precision with which the integrals
452   /// will be computed; the default is the "epsilon"
453   ///        of \c scalar_type type, given by \c
454   ///        std::numeric_limits<scalar_type>::epsilon(). Currently precision control
455   ///        is implemented
456   ///        for two-body integrals only. The precision control is somewhat
457   ///        empirical,
458   ///        hence be conservative. \sa set_precision()
459   /// \param params a value of type
460   /// Engine::operator_traits<oper>::oper_params_type specifying the parameters
461   /// of
462   ///               the operator set, e.g. position and magnitude of the charges
463   ///               creating the Coulomb potential
464   ///               for oper == Operator::nuclear, etc.
465   /// For most values of \c oper
466   ///               this is not needed.
467   ///               \sa Engine::operator_traits
468   /// \param braket a value of BraKet type
469   /// \warning currently only one-contraction Shell objects are supported; i.e.
470   /// generally-contracted Shells are not yet supported
471   // clang-format on
472   template <typename Params = empty_pod>
473   Engine(Operator oper, size_t max_nprim, int max_l, int deriv_order = 0,
474          scalar_type precision = std::numeric_limits<scalar_type>::epsilon(),
475          Params params = empty_pod(), BraKet braket = BraKet::invalid)
476       : oper_(oper),
477         braket_(braket),
478         primdata_(),
479         spbra_(max_nprim),
480         spket_(max_nprim),
481         stack_size_(0),
482         lmax_(max_l),
483         deriv_order_(deriv_order),
484         cartesian_shell_normalization_(CartesianShellNormalization::standard),
485         scale_(1),
486         params_(enforce_params_type(oper, params)) {
487     set_precision(precision);
488     assert(max_nprim > 0);
489     initialize(max_nprim);
490     core_eval_pack_ = make_core_eval_pack(oper);  // must follow initialize() to
491                                                   // ensure default braket_ has
492                                                   // been set
493     init_core_ints_params(params_);
494   }
495 
496   /// move constructor
497   // intel does not accept "move ctor = default"
498   Engine(Engine&& other)
499       : oper_(other.oper_),
500         braket_(other.braket_),
501         primdata_(std::move(other.primdata_)),
502         spbra_(std::move(other.spbra_)),
503         spket_(std::move(other.spket_)),
504         stack_size_(other.stack_size_),
505         lmax_(other.lmax_),
506         hard_lmax_(other.hard_lmax_),
507         hard_default_lmax_(other.hard_default_lmax_),
508         deriv_order_(other.deriv_order_),
509         precision_(other.precision_),
510         ln_precision_(other.ln_precision_),
511         cartesian_shell_normalization_(other.cartesian_shell_normalization_),
512         scale_(other.scale_),
513         core_eval_pack_(std::move(other.core_eval_pack_)),
514         params_(std::move(other.params_)),
515         core_ints_params_(std::move(other.core_ints_params_)),
516         targets_(std::move(other.targets_)),
517         set_targets_(other.set_targets_),
518         scratch_(std::move(other.scratch_)),
519         scratch2_(other.scratch2_),
520         buildfnptrs_(other.buildfnptrs_) {
521     // leave other in an unusable (but valid) state
522     other.oper_ = Operator::invalid;
523     other.braket_ = BraKet::invalid;
524     other.scratch2_ = nullptr;
525   }
526 
527   /// (deep) copy constructor
528   Engine(const Engine& other)
529       : oper_(other.oper_),
530         braket_(other.braket_),
531         primdata_(other.primdata_.size()),
532         spbra_(other.spbra_),
533         spket_(other.spket_),
534         stack_size_(other.stack_size_),
535         lmax_(other.lmax_),
536         deriv_order_(other.deriv_order_),
537         precision_(other.precision_),
538         ln_precision_(other.ln_precision_),
539         cartesian_shell_normalization_(other.cartesian_shell_normalization_),
540         scale_(other.scale_),
541         core_eval_pack_(other.core_eval_pack_),
542         params_(other.params_),
543         core_ints_params_(other.core_ints_params_) {
544     initialize();
545   }
546 
547   ~Engine() { finalize(); }
548 
549   /// move assignment is default
550   Engine& operator=(Engine&& other) {
551     oper_ = other.oper_;
552     braket_ = other.braket_;
553     primdata_ = std::move(other.primdata_);
554     spbra_ = std::move(other.spbra_);
555     spket_ = std::move(other.spket_);
556     stack_size_ = other.stack_size_;
557     lmax_ = other.lmax_;
558     hard_lmax_ = other.hard_lmax_;
559     hard_default_lmax_ = other.hard_default_lmax_;
560     deriv_order_ = other.deriv_order_;
561     precision_ = other.precision_;
562     ln_precision_ = other.ln_precision_;
563     cartesian_shell_normalization_ = other.cartesian_shell_normalization_;
564     scale_ = other.scale_;
565     core_eval_pack_ = std::move(other.core_eval_pack_);
566     params_ = std::move(other.params_);
567     core_ints_params_ = std::move(other.core_ints_params_);
568     targets_ = std::move(other.targets_);
569     set_targets_ = other.set_targets_;
570     scratch_ = std::move(other.scratch_);
571     scratch2_ = other.scratch2_;
572     buildfnptrs_ = other.buildfnptrs_;
573     // leave other in an unusable state
574     other.oper_ = Operator::invalid;
575     other.braket_ = BraKet::invalid;
576     other.scratch2_ = nullptr;
577     return *this;
578   }
579 
580   /// (deep) copy assignment
581   Engine& operator=(const Engine& other) {
582     oper_ = other.oper_;
583     braket_ = other.braket_;
584     primdata_.resize(other.primdata_.size());
585     spbra_ = other.spbra_;
586     spket_ = other.spket_;
587     stack_size_ = other.stack_size_;
588     lmax_ = other.lmax_;
589     deriv_order_ = other.deriv_order_;
590     precision_ = other.precision_;
591     ln_precision_ = other.ln_precision_;
592     cartesian_shell_normalization_ = other.cartesian_shell_normalization_;
593     scale_ = other.scale_;
594     core_eval_pack_ = other.core_eval_pack_;
595     params_ = other.params_;
596     core_ints_params_ = other.core_ints_params_;
597     initialize();
598     return *this;
599   }
600 
601   /// @return the particle rank of the operator
602   int operator_rank() const { return rank(oper_); }
603 
604   /// @return rank of the braket (e.g., 2 for (a|b), 3 for (a|bc), etc.)
605   int braket_rank() const { return rank(braket_); }
606 
607   /// @return the operator
608   Operator oper() const { return oper_; }
609 
610   /// @return the braket
611   BraKet braket() const { return braket_; }
612 
613   /// @return the order of geometrical derivatives
614   int deriv_order() const { return deriv_order_; }
615 
616   /// (re)sets operator type to @c new_oper
617   /// @param[in] new_oper Operator whose integrals will be computed with the next call to Engine::compute()
618   /// @note this resets braket and params to their respective defaults for @c new_oper
619   Engine& set(Operator new_oper) {
620     if (oper_ != new_oper) {
621       oper_ = new_oper;
622       braket_ = default_braket(oper_);
623       params_ = default_params(oper_);
624       initialize();
625       core_eval_pack_ = make_core_eval_pack(oper_);  // must follow initialize() to
626                                                      // ensure default braket_ has
627                                                      // been set
628     }
629     return *this;
630   }
631 
632   /// (re)sets braket type to @c new_braket
633   /// @param[in] new_braket integrals BraKet that will be computed with the next call to Engine::compute()
634   Engine& set(BraKet new_braket) {
635     if (braket_ != new_braket) {
636       braket_ = new_braket;
637       initialize();
638     }
639     return *this;
640   }
641 
642   /// resets operator parameters; this may be useful e.g. if need to compute
643   /// Coulomb potential
644   /// integrals over batches of charges for the sake of parallelism.
645   template <typename Params>
646   Engine& set_params(const Params& params) {
647     params_ = params;
648     init_core_ints_params(params_);
649     reset_scratch();
650     return *this;
651   }
652 
653   /// @return the maximum number of primitives that this engine can handle
654   std::size_t max_nprim() const {
655     assert(spbra_.primpairs.size() == spket_.primpairs.size());
656     return static_cast<std::size_t>(std::sqrt(spbra_.primpairs.size()));
657   }
658 
659   /// reset the maximum number of primitives
660   /// @param[in] n the maximum number of primitives
661   /// @note left unchanged if the value returned by Engine::max_nprim is greater than @c n
662   Engine& set_max_nprim(std::size_t n) {
663     if (n*n > spbra_.primpairs.size()) {
664       spbra_.resize(n);
665       spket_.resize(n);
666       initialize(n);
667     }
668     return *this;
669   }
670 
671   /// @return the maximum angular momentum that this engine can handle
672   std::size_t max_l() const {
673     return lmax_;
674   }
675 
676   /// reset the maximum angular momentum
677   /// @param[in] L the maximum angular momentum
678   /// @note left unchanged if the value returned by Engine::max_l is greater than @c L
679   Engine& set_max_l(std::size_t L) {
680     if (L >= static_cast<std::size_t>(lmax_)) {
681       lmax_ = L;
682       initialize();
683     }
684     return *this;
685   }
686 
687   /// returns a vector that will hold pointers to shell sets computed with
688   /// Engine::compute()
689   /// or other compute functions. Only need to get this vector once, but the
690   /// values will change
691   /// after every compute() call.
692   const target_ptr_vec& results() const { return targets_; }
693 
694   /// reports the number of shell sets that each call to compute() produces.
695   /// this depends on the order of geometrical derivatives requested and
696   /// on the operator set. \sa compute_nshellsets()
697   unsigned int nshellsets() const { return targets_.size(); }
698 
699   /// Computes target shell sets of integrals.
700 
701   /// @return vector of pointers to target shell sets, the number of sets = Engine::nshellsets();
702   ///         if the first pointer equals \c nullptr then all elements were screened out.
703   /// \note resulting shell sets are stored in row-major order.
704   /// \note Call Engine::compute1() or Engine::compute2() directly to avoid extra copies.
705   template <typename... ShellPack>
706   __libint2_engine_inline const target_ptr_vec& compute(
707       const libint2::Shell& first_shell, const ShellPack&... rest_of_shells);
708 
709   /// Computes target shell sets of 1-body integrals.
710   /// @param[in] s1
711   /// @param[in] s2
712   /// @return vector of pointers to target shell sets, the number of sets = Engine::nshellsets()
713   /// \note resulting shell sets are stored in row-major order
714   __libint2_engine_inline const target_ptr_vec& compute1(
715       const libint2::Shell& s1, const libint2::Shell& s2);
716 
717   /// Computes target shell sets of 2-body integrals, @code  @endcode
718   /// @note result is stored in the "chemists"/Mulliken form, @code (bra1 bra2|ket1 ket2) @endcode;
719   /// the "physicists"/Dirac bra-ket form is @code <bra1 ket1|bra2 ket2> @endcode, where @c bra1 and @c bra2
720   /// refer to particle 1, and @c ket1 and @c ket2 refer to particle 2.
721   /// @tparam oper operator
722   /// @tparam braket the integral type
723   /// @tparam deriv_order the derivative order, values greater than 2 not yet supported
724   /// @param[in] bra1 the first shell in the Mulliken bra
725   /// @param[in] bra2 the second shell in the Mulliken bra
726   /// @param[in] ket1 the first shell in the Mulliken ket
727   /// @param[in] ket2 the second shell in the Mulliken ket
728   /// @param[in] spbra ShellPair data for shell pair @c {bra1,bra2}
729   /// @param[in] spket ShellPair data for shell pair @c {ket1,ket2}
730   /// @return vector of pointers to target shell sets, the number of sets = Engine::nshellsets();
731   ///         if the first pointer equals \c nullptr then all elements were screened out.
732   /// @note the result integrals are packed in shell sets in row-major order
733   /// (i.e. function index of shell ket2 has stride 1, function index of shell ket1 has
734   /// stride nket2, etc.).
735   /// @note internally the integrals are evaluated with shells permuted to according to the canonical
736   /// order predefined at the library generation time (see macro @c LIBINT_SHELL_SET ). To minimize the overhead it is recommended to
737   /// organize your shell loop nests in the order best suited for your particular instance of Libint library.
738   template <Operator oper, BraKet braket, size_t deriv_order>
739   __libint2_engine_inline const target_ptr_vec& compute2(const Shell& bra1,
740                                                          const Shell& bra2,
741                                                          const Shell& ket1,
742                                                          const Shell& ket2,
743                                                          const ShellPair* spbra = nullptr,
744                                                          const ShellPair* spket = nullptr);
745 
746   typedef const target_ptr_vec& (Engine::*compute2_ptr_type)(const Shell& bra1,
747                                                              const Shell& bra2,
748                                                              const Shell& ket1,
749                                                              const Shell& ket2,
750                                                              const ShellPair* spbra,
751                                                              const ShellPair* spket);
752 
753   /** this specifies target precision for computing the integrals.
754    * @param[in] prec the target precision
755    * @note target precision \f$ \epsilon \f$ is used in 3 ways:
756    *  (1) to screen out primitive pairs in ShellPair object for which
757    *      \f$ {\rm scr}_{12} = \max|c_1| \max|c_2| \exp(-\rho_{12}
758    * |AB|^2)/\gamma_{12} < \epsilon \f$ ;
759    *  (2) to screen out primitive quartets outside compute_primdata() for which
760    * \f$ {\rm scr}_{12} {\rm scr}_{34} <  \epsilon \f$;
761    *  (3) to screen out primitive quartets inside compute_primdata() for which
762    * the prefactor of \f$ F_m(\rho, T) \f$ is smaller
763    *      than \f$ \epsilon \f$ .
764    */
765   Engine& set_precision(scalar_type prec) {
766     if (prec <= 0.) {
767       precision_ = 0.;
768       ln_precision_ = std::numeric_limits<scalar_type>::lowest();
769     } else {
770       precision_ = prec;
771       using std::log;
772       ln_precision_ = log(precision_);
773     }
774     return *this;
775   }
776   /// @return the target precision for computing the integrals
777   /// @sa set_precision(scalar_type)
778   scalar_type precision() const { return precision_; }
779 
780   /// @return the Cartesian Gaussian normalization convention
781   CartesianShellNormalization cartesian_shell_normalization() const {
782     return cartesian_shell_normalization_;
783   }
784 
785   /// @param[in] norm the normalization convention for Cartesian Gaussians
786   /// @note the default convention is CartesianShellNormalization_Standard
787   /// @return reference to @c this for daisy-chaining
788   Engine& set(CartesianShellNormalization norm) {
789     cartesian_shell_normalization_ = norm;
790     return *this;
791   }
792 
793   /// @return the scaling factor by which the target integrals are multiplied
794   scalar_type prescaled_by() const {
795     return scale_;
796   }
797 
798   /// @param[in] sc the scaling factor by which the target integrals are multiplied
799   /// @note the default factor is 1
800   /// @return reference to @c this for daisy-chaining
801   Engine& prescale_by(scalar_type sc) {
802     scale_ = sc;
803     return *this;
804   }
805 
806   /// prints the contents of timers to @c os
807   void print_timers(std::ostream& os = std::cout) {
808 #ifdef LIBINT2_ENGINE_TIMERS
809     os << "timers: prereq = " << timers.read(0);
810 #ifndef LIBINT2_PROFILE  // if libint's profiling was on, engine's build timer
811                          // will include its overhead
812     // do not report it, detailed profiling from libint will be printed below
813     os << " build = " << timers.read(1);
814 #endif
815     os << " tform = " << timers.read(2) << std::endl;
816 #endif
817 #ifdef LIBINT2_PROFILE
818     os << "build timers: hrr = " << primdata_[0].timers->read(0)
819        << " vrr = " << primdata_[0].timers->read(1) << std::endl;
820 #endif
821 #ifdef LIBINT2_ENGINE_TIMERS
822 #ifdef LIBINT2_ENGINE_PROFILE_CLASS
823     for (const auto& p : class_profiles) {
824       char buf[1024];
825       std::snprintf(buf, sizeof(buf), "{\"%s\", %10.5lf, %10.5lf, %10.5lf, %10.5lf, %ld, %ld},",
826              p.first.to_string().c_str(), p.second.prereqs, p.second.build_vrr,
827              p.second.build_hrr, p.second.tform, p.second.nshellset,
828              p.second.nprimset);
829       os << buf << std::endl;
830     }
831 #endif
832 #endif
833   }
834 
835   /// Exception class to be used when the angular momentum limit is exceeded.
836   class lmax_exceeded : virtual public std::logic_error {
837    public:
838     lmax_exceeded(const char* task_name, size_t lmax_limit,
839                   size_t lmax_requested)
840         : std::logic_error(
841               "Engine::lmax_exceeded -- angular momentum limit exceeded"),
842           lmax_limit_(lmax_limit),
843           lmax_requested_(lmax_requested) {
844       strncpy(task_name_, task_name, 64);
845       task_name_[64] = '\0';
846     }
847     ~lmax_exceeded() noexcept {}
848 
849     const char* task_name() const {
850       return static_cast<const char*>(task_name_);
851     }
852     size_t lmax_limit() const { return lmax_limit_; }
853     size_t lmax_requested() const { return lmax_requested_; }
854 
855    private:
856     char task_name_[65];
857     size_t lmax_limit_;
858     size_t lmax_requested_;
859   };
860 
861   /// Exception class to be used when using default-initialized Engine
862   class using_default_initialized : virtual public std::logic_error {
863    public:
864     using_default_initialized()
865         : std::logic_error(
866         "Engine::using_default_initialized -- attempt to use a default-initialized Engine") {
867     }
868     ~using_default_initialized() noexcept {}
869   };
870 
871  private:
872   Operator oper_;
873   BraKet braket_;
874   std::vector<Libint_t> primdata_;
875   ShellPair spbra_, spket_;
876   size_t stack_size_;  // amount allocated by libint2_init_xxx in
877                        // primdata_[0].stack
878   int lmax_;
879   int hard_lmax_;  // max L supported by library for this operator type (i.e. LIBINT2_MAX_AM_<task><deriv_order>) + 1
880   int hard_default_lmax_;  // max L supported by library for default operator type
881                            // (i.e. LIBINT2_MAX_AM_default<deriv_order>) + 1
882                            // this is only set and used if LIBINT2_CENTER_DEPENDENT_MAX_AM_<task><deriv_order> == 1
883   int deriv_order_;
884   scalar_type precision_;
885   scalar_type ln_precision_;
886 
887   // specifies the normalization convention for Cartesian Gaussians
888   CartesianShellNormalization cartesian_shell_normalization_;
889 
890   // the target integrals are scaled by this factor (of course it is actually the core integrals that are scaled)
891   scalar_type scale_;
892 
893   any core_eval_pack_;
894 
895   /// operator params
896   any params_;  // operator params
897   /// for some operators need core ints params that are computed from operator
898   /// params,
899   /// e.g. integrals of \f$ f_{12}^2 \f$ are computed from parameters of \f$
900   /// f_{12} \f$
901   any core_ints_params_;
902   /// makes core ints params from the operator params
903   void init_core_ints_params(const any& params);
904 
905   /// pointers to target shell sets, size is updated by reset_scratch()
906   /// targets_.size() is returned by nshellsets()
907   target_ptr_vec targets_;
908   /// true if targets_ does not point primdata_[0].targets
909   /// hence must set its contents explicitly
910   bool set_targets_;
911 
912   std::vector<value_type>
913       scratch_;       // for transposes and/or transforming to solid harmonics
914   value_type* scratch2_;  // &scratch_[0] points to the first block large enough to
915                       // hold all target ints
916   // scratch2_ points to second such block. It could point into scratch_ or at
917   // primdata_[0].stack
918   typedef void (*buildfnptr_t)(const Libint_t*);
919   buildfnptr_t* buildfnptrs_;
920 
921   /// reports the number of shell sets that each call to compute() produces.
922   unsigned int compute_nshellsets() const {
923     const unsigned int num_operator_geometrical_derivatives =
924         (oper_ == Operator::nuclear || oper_ == Operator::erf_nuclear ||
925          oper_ == Operator::erfc_nuclear)
926             ? this->nparams()
927             : 0;
928     const auto ncenters = braket_rank() + num_operator_geometrical_derivatives;
929     return nopers() * num_geometrical_derivatives(ncenters, deriv_order_);
930   }
931 
932   void reset_scratch() {
933     const auto nshsets = compute_nshellsets();
934     targets_.resize(nshsets);
935     set_targets_ = (&targets_[0] != const_cast<const value_type**>(primdata_[0].targets));
936     const auto ncart_max = (lmax_ + 1) * (lmax_ + 2) / 2;
937     const auto target_shellset_size =
938         nshsets * std::pow(ncart_max, braket_rank());
939     // need to be able to hold 2 sets of target shellsets: the worst case occurs
940     // when dealing with
941     // 1-body Coulomb ints derivatives ... have 2+natom 1st-order derivative sets
942     // (and many more of 2nd and higher) that are stored in scratch
943     // then need to transform to solids. To avoid copying back and forth make
944     // sure that there is enough
945     // room to transform all ints and save them in correct order in single pass
946     const auto need_extra_large_scratch = stack_size_ < target_shellset_size;
947     scratch_.resize(need_extra_large_scratch ? 2 * target_shellset_size
948                                              : target_shellset_size);
949     scratch2_ = need_extra_large_scratch ? &scratch_[target_shellset_size]
950                                          : primdata_[0].stack;
951   }
952 
953   __libint2_engine_inline void compute_primdata(Libint_t& primdata,
954                                                 const Shell& s1,
955                                                 const Shell& s2, size_t p1,
956                                                 size_t p2, size_t oset);
957 
958 public:
959   /// 3-dim array of pointers to help dispatch efficiently based on oper_,
960   /// braket_, and deriv_order_
961   /// @note public since 2.7.0 to support efficient dispatch in user code
962   __libint2_engine_inline const std::vector<Engine::compute2_ptr_type>&
963   compute2_ptrs() const;
964 
965 private:
966   // max_nprim=0 avoids resizing primdata_
967   __libint2_engine_inline void initialize(size_t max_nprim = 0);
968   // generic _initializer
969   __libint2_engine_inline void _initialize();
970 
971   void finalize() {
972     if (primdata_.size() != 0) {
973       libint2_cleanup_default(&primdata_[0]);
974     }
975   }  // finalize()
976 
977   //-------
978   // utils
979   //-------
980   unsigned int nparams() const;
981   unsigned int nopers() const;
982   /// if Params == operator_traits<oper>::oper_params_type, will return
983   /// \c any(params)
984   /// else will set return \c any initialized with default value for
985   /// \c operator_traits<type>::oper_params_type
986   /// @param throw_if_wrong_type if true, and Params !=
987   /// operator_traits<type>::oper_params_type, will throw std::bad_cast
988   template <typename Params>
989   static any enforce_params_type(
990       Operator oper, const Params& params,
991       bool throw_if_wrong_type = !std::is_same<Params, empty_pod>::value);
992   /// @return core eval pack corresponding to
993   /// operator_traits<oper>::core_eval_type
994   any make_core_eval_pack(Operator oper) const;
995 
996   //-------
997   // profiling
998   //-------
999   static const bool skip_core_ints = false;
1000 };  // struct Engine
1001 
1002 
1003 }  // namespace libint2
1004 
1005 #ifndef LIBINT2_DOES_NOT_INLINE_ENGINE
1006 #include "./engine.impl.h"
1007 #endif
1008 
1009 #endif /* _libint2_src_lib_libint_engine_h_ */
1010 
1011