1 /*
2 * Copyright (C) 2004-2021 Edward F. Valeev
3 *
4 * This file is part of Libint.
5 *
6 * Libint is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * Libint is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with Libint. If not, see <http://www.gnu.org/licenses/>.
18 *
19 */
20
21 /**
22 This program produces optimized source code to compute matrix elements (integrals)
23 over Gaussian functions. Integrals are computed using recursive schemes. Generated source code
24 is low-level C++ and can be used from other languages.
25
26 Edward Valeev
27
28 Atlanta/Oak Ridge (December 2004 - August 2006)
29 Blacksburg (August 2006 - present)
30 */
31
32 #include <iostream>
33 #include <fstream>
34 #include <limits>
35 #include <vector>
36 #include <boost/preprocessor.hpp>
37 #if not BOOST_PP_VARIADICS // no variadic macros? your compiler is out of date! (should not be possible since variadic macros are part of C++11)
38 # error "your compiler does not provide variadic macros (but does support C++11), something is seriously broken, please create an issue at https://github.com/evaleev/libint/issues"
39 #endif
40
41
42 #include <libint2/config.h>
43 #include <global_macros.h>
44 #include <libint2/cgshell_ordering.h>
45 #include <libint2/shgshell_ordering.h>
46
47 #include <default_params.h>
48 #include <rr.h>
49 #include <dg.h>
50 #include <integral.h>
51 #include <iter.h>
52 #include <policy_spec.h>
53 #include <strategy.h>
54 #include <iface.h>
55 #include <graph_registry.h>
56 #include <task.h>
57 #include <extract.h>
58 #include <dims.h>
59 #include <purgeable.h>
60 #include <buildtest.h>
61 #include <libint2/deriv_iter.h>
62
63 #include <master_ints_list.h>
64
65 using namespace std;
66 using namespace libint2;
67
68 enum ShellSetType {
69 ShellSetType_Standard = LIBINT_SHELL_SET_STANDARD,
70 ShellSetType_ORCA = LIBINT_SHELL_SET_ORCA
71 };
72 template <ShellSetType ShSet> struct ShellQuartetSetPredicate {
73 // return true if this set of angular momenta is included
74 static bool value(int la, int lb, int lc, int ld);
75 };
76 template <> struct ShellQuartetSetPredicate<ShellSetType_Standard> {
valueShellQuartetSetPredicate77 static bool value(int la, int lb, int lc, int ld) {
78 return la >= lb && lc >= ld && la+lb <= lc+ld;
79 }
80 };
81 template <> struct ShellQuartetSetPredicate<ShellSetType_ORCA> {
valueShellQuartetSetPredicate82 static bool value(int la, int lb, int lc, int ld) {
83 return la <= lb && lc <= ld && ( la < lc || (la == lc && lb <= ld));
84 }
85 };
86 template <ShellSetType ShSet> struct ShellTripletSetPredicate {
87 // return true if this set of angular momenta is included
88 static bool value(int lb, int lc, int cd);
89 };
90 template <> struct ShellTripletSetPredicate<ShellSetType_Standard> {
valueShellTripletSetPredicate91 static bool value(int lb, int lc, int ld) {
92 return lc >= ld;
93 }
94 };
95 template <> struct ShellTripletSetPredicate<ShellSetType_ORCA> {
valueShellTripletSetPredicate96 static bool value(int lb, int lc, int ld) {
97 return lc <= ld;
98 }
99 };
100
101 #define STUDY_MEMORY_USAGE 0
102 long living_count = 0;
103
l_to_cgshellsize(size_t l)104 size_t l_to_cgshellsize(size_t l) {
105 return (l+1)*(l+2)/2;
106 }
107
task_label(const std::string & prefix,unsigned int deriv_level)108 std::string task_label(const std::string& prefix,
109 unsigned int deriv_level) {
110 std::stringstream oss;
111 if (deriv_level == 0)
112 return prefix;
113 else {
114 oss << prefix << deriv_level;
115 return oss.str();
116 }
117 }
118
119 /**
120 * Returns n-th token. If n > number of tokens, returns the last one.
121 * @param c_str input string
122 * @param delimiter delimiter/separator character
123 * @param n
124 * @return
125 */
126 template <typename T>
token(const char * c_str,char delimiter,std::size_t n=0)127 T token(const char* c_str,
128 char delimiter,
129 std::size_t n = 0) {
130 T result;
131 std::string result_str;
132
133 // replace all occurences of delimiter in str with whitespaces
134 std::string str(c_str);
135 std::cout << "token<>: str=" << str << std::endl;
136 std::string::size_type pos;
137 while ((pos = str.find(delimiter)) != std::string::npos) {
138 str[pos] = ' ';
139 }
140
141 // tokenize
142 std::istringstream iss(str);
143 for(size_t i=0; i<=n && !iss.eof(); ++i)
144 iss >> result;
145
146 return result;
147 }
148
149 /**
150 * Returns the number of tokens.
151 * @param c_str input string
152 * @param delimiter delimiter/separator character
153 * @return the number of tokens
154 */
ntokens(const char * c_str,char delimiter)155 inline size_t ntokens(const char* c_str,
156 char delimiter) {
157 size_t n = 1;
158
159 // replace all occurences of delimiter in str with whitespaces
160 std::string str(c_str);
161 std::cout << "ntokens: str=" << str << std::endl;
162 std::string::size_type pos;
163 while ((pos = str.find(delimiter)) != std::string::npos) {
164 str[pos] = ' ';
165 ++n;
166 }
167
168 return n;
169 }
170
171 static void try_main (int argc, char* argv[]);
172
main(int argc,char * argv[])173 int main(int argc, char* argv[])
174 {
175 int return_code = 0;
176 try {
177 try_main(argc,argv);
178 }
179 catch(std::exception& a) {
180 cout << endl
181 << " WARNING! Caught a standard exception:" << endl
182 << " " << a.what() << endl << endl;
183 return_code = 1;
184 }
185 catch(...) {
186 cout << endl
187 << " WARNING! Caught an unknown exception" << endl << endl;
188 return_code = 1;
189 }
190
191 return return_code;
192 }
193
194 static void print_header(std::ostream& os);
195 static void print_config(std::ostream& os);
196 // Put all configuration-specific API elements in here
197 static void config_to_api(const SafePtr<CompilationParameters>& cparams, SafePtr<Libint2Iface>& iface);
198
199 #ifdef INCLUDE_ERI
200 #define USE_GENERIC_ERI_BUILD 1
201 # if !USE_GENERIC_ERI_BUILD
202 static void build_TwoPRep_2b_2k(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
203 SafePtr<Libint2Iface>& iface);
204 # else
205 static void build_TwoPRep_2b_2k(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
206 SafePtr<Libint2Iface>& iface, unsigned int deriv_level);
207 # endif
208 #endif
209
210 #ifdef INCLUDE_ERI3
211 static void build_TwoPRep_1b_2k(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
212 SafePtr<Libint2Iface>& iface, unsigned int deriv_level);
213 #endif
214
215 #ifdef INCLUDE_ERI2
216 static void build_TwoPRep_1b_1k(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
217 SafePtr<Libint2Iface>& iface, unsigned int deriv_level);
218 #endif
219
220 #ifdef INCLUDE_G12
221 static void build_R12kG12_2b_2k(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
222 SafePtr<Libint2Iface>& iface);
223 static void build_R12kG12_2b_2k_separate(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
224 SafePtr<Libint2Iface>& iface);
225 #endif
226
227 #ifdef INCLUDE_G12DKH
228 static void build_G12DKH_2b_2k(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
229 SafePtr<Libint2Iface>& iface);
230 #endif
231
232 #ifdef INCLUDE_ONEBODY
233
234 # if LIBINT_SUPPORT_ONEBODYINTS == 0
235 # error "change LIBINT_SUPPORT_ONEBODYINTS in global_macros.h to 1 if need 1-body ints"
236 # endif
237
238 namespace {
239 template <typename OperType> struct AuxQuantaType;
240 template <> struct AuxQuantaType<ElecPotOper> {
241 typedef mType type;
242 };
243 template <typename OperType> struct AuxQuantaType {
244 typedef EmptySet type;
245 };
246
make_descr(int,int,int=0)247 template <typename OperDescrType> OperDescrType make_descr(int, int, int = 0) {
248 return OperDescrType();
249 }
make_descr(int x,int y,int z)250 template <> CartesianMultipole_Descr<3u> make_descr<CartesianMultipole_Descr<3u>>(int x, int y, int z) {
251 CartesianMultipole_Descr<3u> result;
252 // cartesian multipole quanta
253 result.inc(0,x);
254 result.inc(1,y);
255 result.inc(2,z);
256 return result;
257 }
make_descr(int l,int m,int)258 template <> SphericalMultipole_Descr make_descr<SphericalMultipole_Descr>(int l, int m, int) {
259 SphericalMultipole_Descr result(l,m);
260 return result;
261 }
262 }
263
264 template <typename _OperType>
265 void
build_onebody_1b_1k(std::ostream & os,std::string label,const SafePtr<CompilationParameters> & cparams,SafePtr<Libint2Iface> & iface,unsigned int deriv_level)266 build_onebody_1b_1k(std::ostream& os, std::string label, const SafePtr<CompilationParameters>& cparams,
267 SafePtr<Libint2Iface>& iface, unsigned int deriv_level)
268 {
269 // implement overlap as a special case of cartesian emultipole
270 using OperType = typename std::conditional<std::is_same<_OperType,OverlapOper>::value,CartesianMultipoleOper<3u>,_OperType>::type;
271 const std::string task = task_label(label, deriv_level);
272 typedef CGShell BFType;
273 typedef typename OperType::Descriptor OperDescrType;
274 typedef GenIntegralSet_1_1<CGShell, OperType, typename AuxQuantaType<OperType>::type> Onebody_sh_1_1;
275
276 vector<BFType*> shells;
277 unsigned int lmax = cparams->max_am(task);
278 for(unsigned int l=0; l<=lmax; l++) {
279 shells.push_back(new BFType(l));
280 }
281 ImplicitDimensions::set_default_dims(cparams);
282
283 LibraryTaskManager& taskmgr = LibraryTaskManager::Instance();
284 taskmgr.current(task);
285 iface->to_params(iface->macro_define( std::string("MAX_AM_") + task,lmax));
286
287 const auto nullaux = typename Onebody_sh_1_1::AuxIndexType(0u);
288
289 // optionally skip derivative property ints
290 #ifdef DISABLE_ONEBODY_PROPERTY_DERIVS
291 const auto property_operator = !(std::is_same<_OperType,OverlapOper>::value ||
292 std::is_same<_OperType,KineticOper>::value ||
293 std::is_same<_OperType,ElecPotOper>::value);
294 if (property_operator && deriv_level > 0)
295 return;
296 #endif
297 // derivatives of spherical multipole integrals are not implemented
298 {
299 if (std::is_same<_OperType,SphericalMultipoleOper>::value && deriv_level > 0)
300 throw std::invalid_argument("derivatives of spherical multipole ints are not yet implemented");
301 }
302
303
304 //
305 // Construct graphs for each desired target integral and
306 // 1) generate source code for the found traversal path
307 // 2) extract all remaining unresolved recurrence relations and
308 // append them to the stack. Such unresolved RRs are RRs applied
309 // to sets of integrals (rather than to individual integrals).
310 // 3) at the end, for each unresolved recurrence relation generate
311 // explicit source code
312 //
313 SafePtr<DirectedGraph> dg(new DirectedGraph);
314 SafePtr<Strategy> strat(new Strategy());
315 SafePtr<CodeContext> context(new CppCodeContext(cparams));
316 SafePtr<MemoryManager> memman(new WorstFitMemoryManager());
317
318 for(unsigned int la=0; la<=lmax; la++) {
319 for(unsigned int lb=0; lb<=lmax; lb++) {
320
321 // skip s|s overlap and elecpot integrals -- no need to involve LIBINT here
322 if (deriv_level == 0 && la == 0 && lb == 0 &&
323 (std::is_same<_OperType,OverlapOper>::value || std::is_same<_OperType,ElecPotOper>::value
324 )
325 )
326 continue;
327
328 SafePtr<Tactic> tactic(new TwoCenter_OS_Tactic(la,lb));
329
330 // this will hold all target shell sets
331 std::vector< SafePtr<Onebody_sh_1_1> > targets;
332
333 /////////////////////////////////
334 // loop over operator components
335 /////////////////////////////////
336 // most important operators have 1 component ...
337 std::vector<OperDescrType> descrs(1); // operator descriptors
338 // important EXCEPTION: multipole moments
339 if (std::is_same<_OperType,CartesianMultipoleOper<3u>>::value) {
340 // reset descriptors array
341 descrs.resize(0);
342
343 // parse the label ... 1emultipole means include multipoles of order 0 (overlap) and 1 (dipole)
344 unsigned int max_multipole_order = 0;
345 assert(label != "overlap");
346 auto key_pos = label.find("emultipole");
347 assert(key_pos != std::string::npos);
348 std::string tmp = label; tmp.erase(key_pos,std::string::npos);
349 istringstream iss(tmp);
350 iss >> max_multipole_order;
351 assert(max_multipole_order > 0);
352
353 // iterate over operators and construct their descriptors
354 for(int multipole_order=0; multipole_order<=max_multipole_order; ++multipole_order) {
355 // we iterate over them same way as over cartesian Gaussian shells
356 int x, y, z;
357 FOR_CART(x,y,z,multipole_order)
358 descrs.push_back(make_descr<OperDescrType>(x,y,z));
359 END_FOR_CART
360 }
361 }
362 if (std::is_same<_OperType,SphericalMultipoleOper>::value) {
363 // reset descriptors array
364 descrs.resize(0);
365 // iterate over operators and construct their descriptors
366 for(int l=0; l<=MULTIPOLE_MAX_ORDER; ++l) {
367 // we iterate over them same way as over solid harmonic Gaussian shells
368 int m;
369 FOR_SOLIDHARM(l,m)
370 descrs.push_back(make_descr<OperDescrType>(l,m));
371 END_FOR_SOLIDHARM
372 }
373 }
374
375 // derivative index is the outermost (slowest running)
376 // operator component is second slowest
377
378 ////////////
379 // loop over unique derivative index combinations
380 ////////////
381 // NB translational invariance is now handled by CR_DerivGauss
382 CartesianDerivIterator<2> diter(deriv_level);
383 bool last_deriv = false;
384 do {
385 BFType a(la);
386 BFType b(lb);
387
388 for(unsigned int c=0; c!=2; ++c) {
389 const unsigned int ndir = std::is_same<BFType,CGShell>::value ? 3 : 1;
390 for(unsigned int xyz=0; xyz<ndir; ++xyz) {
391 if (c == 0) a.deriv().inc(xyz, (*diter).at(xyz));
392 if (c == 1) b.deriv().inc(xyz, (*diter).at(3 + xyz));
393 }
394 }
395
396 // operator component loop
397 for(unsigned int op=0; op!=descrs.size(); ++op) {
398 OperType oper(descrs[op]);
399
400 SafePtr<Onebody_sh_1_1> target = Onebody_sh_1_1::Instance(a,b,nullaux,oper);
401 targets.push_back(target);
402 } // loop over operator components
403
404 last_deriv = diter.last();
405 if (!last_deriv) diter.next();
406 } while (!last_deriv); // loop over derivatives
407
408 // unroll only if max_am <= cparams->max_am_opt(task)
409 using std::max;
410 const unsigned int max_am = max(la,lb);
411 const auto nopers = descrs.size();
412 const bool need_to_optimize = (max_am <= cparams->max_am_opt(task));
413 // decide whether to unroll based on the aggregate size of undifferentiated quartets
414 const bool need_to_unroll = nopers * l_to_cgshellsize(la)*l_to_cgshellsize(lb) <= cparams->unroll_threshold();
415 const unsigned int unroll_threshold = need_to_optimize && need_to_unroll ? std::numeric_limits<unsigned int>::max() : 0;
416
417 dg->registry()->unroll_threshold(unroll_threshold);
418 dg->registry()->do_cse(need_to_optimize);
419 dg->registry()->condense_expr(condense_expr(cparams->unroll_threshold(),cparams->max_vector_length()>1));
420 // Need to accumulate integrals?
421 dg->registry()->accumulate_targets(cparams->accumulate_targets());
422
423 // shove all targets on the graph, IN ORDER
424 for(auto t = targets.begin(); t!=targets.end(); ++t) {
425 SafePtr<DGVertex> t_ptr = dynamic_pointer_cast<DGVertex,Onebody_sh_1_1>(*t);
426 dg->append_target(t_ptr);
427 }
428
429 // make label that characterizes this set of targets
430 std::string eval_label;
431 {
432 std::ostringstream oss;
433 oss << cparams->api_prefix() << "_" << label;
434 if (deriv_level > 0)
435 oss << "deriv" << deriv_level;
436 BFType a(la);
437 BFType b(lb);
438 oss << "_" << a.label() << "_" << b.label();
439 eval_label = oss.str();
440 }
441
442 std::cout << "working on " << eval_label << " ... "; std::cout.flush();
443
444 std::string prefix(cparams->source_directory());
445 std::deque<std::string> decl_filenames;
446 std::deque<std::string> def_filenames;
447
448 // this will generate code for this targets, and potentially generate code for its prerequisites
449 GenerateCode(dg, context, cparams, strat, tactic, memman,
450 decl_filenames, def_filenames,
451 prefix, eval_label, false);
452
453 // update max stack size and # of targets
454 const SafePtr<TaskParameters>& tparams = taskmgr.current().params();
455 tparams->max_stack_size(max_am, memman->max_memory_used());
456 tparams->max_ntarget(targets.size());
457 //os << " Max memory used = " << memman->max_memory_used() << std::endl;
458
459 // set pointer to the top-level evaluator function
460 ostringstream oss;
461 oss << context->label_to_name(cparams->api_prefix()) << "libint2_build_" << task << "[" << la << "][" << lb << "] = "
462 << context->label_to_name(label_to_funcname(eval_label))
463 << context->end_of_stat() << endl;
464 iface->to_static_init(oss.str());
465
466 // need to declare this function internally
467 for(std::deque<std::string>::const_iterator i=decl_filenames.begin();
468 i != decl_filenames.end();
469 ++i) {
470 oss.str("");
471 oss << "#include <" << *i << ">" << endl;
472 iface->to_int_iface(oss.str());
473 }
474
475 #if DEBUG
476 os << "Max memory used = " << memman->max_memory_used() << endl;
477 #endif
478 dg->reset();
479 memman->reset();
480
481 std::cout << "done" << std::endl;
482
483 } // end of b loop
484 } // end of a loop
485 }
486 #endif
487
try_main(int argc,char * argv[])488 void try_main (int argc, char* argv[])
489 {
490 std::ostream& os = cout;
491
492 // First must declare the tasks
493 LibraryTaskManager& taskmgr = LibraryTaskManager::Instance();
494 taskmgr.add("default");
495 #if defined(LIBINT_MAX_AM_LIST)
496 for(unsigned int d=1; d<ntokens(LIBINT_MAX_AM_LIST,','); ++d) {
497 taskmgr.add( task_label("default", d) );
498 }
499 #endif
500 #ifdef INCLUDE_ONEBODY
501
502 // overlap, kinetic, elecpot cannot be omitted
503 #define BOOST_PP_ONEBODY_TASK_TUPLE (overlap, \
504 kinetic, \
505 elecpot, \
506 1emultipole, \
507 2emultipole, \
508 3emultipole, \
509 sphemultipole \
510 )
511 #define BOOST_PP_ONEBODY_TASK_OPER_TUPLE (OverlapOper, \
512 KineticOper, \
513 ElecPotOper, \
514 CartesianMultipoleOper<3u>, \
515 CartesianMultipoleOper<3u>, \
516 CartesianMultipoleOper<3u>, \
517 SphericalMultipoleOper \
518 )
519 #define BOOST_PP_ONEBODY_TASK_LIST BOOST_PP_TUPLE_TO_LIST( BOOST_PP_ONEBODY_TASK_TUPLE )
520 #define BOOST_PP_ONEBODY_TASK_OPER_LIST BOOST_PP_TUPLE_TO_LIST( BOOST_PP_ONEBODY_TASK_OPER_TUPLE )
521
522 for(unsigned int d=0; d<=INCLUDE_ONEBODY; ++d) {
523 #define BOOST_PP_ONEBODY_MCR1(r,data,elem) \
524 taskmgr.add( task_label(BOOST_PP_STRINGIZE(elem),d) );
525
526 BOOST_PP_LIST_FOR_EACH ( BOOST_PP_ONEBODY_MCR1, _, BOOST_PP_ONEBODY_TASK_LIST)
527 #undef BOOST_PP_ONEBODY_MCR1
528 }
529 #endif
530 #ifdef INCLUDE_ERI
531 for(unsigned int d=0; d<=INCLUDE_ERI; ++d) {
532 taskmgr.add( task_label("eri",d) );
533 }
534 #endif
535 #ifdef INCLUDE_ERI3
536 for(unsigned int d=0; d<=INCLUDE_ERI3; ++d) {
537 taskmgr.add( task_label("3eri",d) );
538 }
539 #endif
540 #ifdef INCLUDE_ERI2
541 for(unsigned int d=0; d<=INCLUDE_ERI2; ++d) {
542 taskmgr.add( task_label("2eri",d) );
543 }
544 #endif
545 #ifdef INCLUDE_G12
546 taskmgr.add("r12kg12");
547 # if !LIBINT_USE_COMPOSITE_EVALUATORS
548 taskmgr.add("r12_0_g12");
549 taskmgr.add("r12_2_g12");
550 # endif
551 #endif
552 #ifdef INCLUDE_GENG12
553 taskmgr.add("geng12");
554 #endif
555 #ifdef INCLUDE_G12DKH
556 taskmgr.add("g12dkh");
557 #endif
558
559 // use default parameters
560 SafePtr<CompilationParameters> cparams(new CompilationParameters);
561
562 cparams->max_am("default",LIBINT_MAX_AM);
563 cparams->max_am_opt("default",LIBINT_OPT_AM);
564 #if defined(LIBINT_MAX_AM_LIST)
565 for(unsigned int d=0; d<ntokens(LIBINT_MAX_AM_LIST,','); ++d) {
566 cparams->max_am( task_label("default", d), token<unsigned int>(LIBINT_MAX_AM_LIST,',',d));
567 }
568 #endif
569 #if defined(LIBINT_OPT_AM_LIST)
570 for(unsigned int d=0; d<ntokens(LIBINT_OPT_AM_LIST,','); ++d) {
571 cparams->max_am_opt( task_label("default", d), token<unsigned int>(LIBINT_OPT_AM_LIST,',',d));
572 }
573 #endif
574 cparams->num_bf("default",4);
575
576 #ifdef INCLUDE_ONEBODY
577 for(unsigned int d=0; d<=INCLUDE_ONEBODY; ++d) {
578
579 #if defined(ONEBODY_MAX_AM_LIST)
580 # define BOOST_PP_ONEBODY_MCR2(r,data,elem) \
581 cparams->max_am( task_label(elem, d), token<unsigned int>(ONEBODY_MAX_AM_LIST,',',d));
582 BOOST_PP_LIST_FOR_EACH ( BOOST_PP_ONEBODY_MCR2, _, BOOST_PP_ONEBODY_TASK_LIST)
583 # undef BOOST_PP_ONEBODY_MCR2
584 #elif defined(ONEBODY_MAX_AM)
585 # define BOOST_PP_ONEBODY_MCR3(r,data,elem) \
586 cparams->max_am( task_label(elem, d), ONEBODY_MAX_AM);
587 BOOST_PP_LIST_FOR_EACH ( BOOST_PP_ONEBODY_MCR3, _, BOOST_PP_ONEBODY_TASK_LIST)
588 # undef BOOST_PP_ONEBODY_MCR3
589 #endif
590 #if defined(ONEBODY_OPT_AM_LIST)
591 # define BOOST_PP_ONEBODY_MCR4(r,data,elem) \
592 cparams->max_am_opt( task_label(elem, d) ,token<unsigned int>(ONEBODY_OPT_AM_LIST,',',d));
593 BOOST_PP_LIST_FOR_EACH ( BOOST_PP_ONEBODY_MCR4, _, BOOST_PP_ONEBODY_TASK_LIST)
594 # undef BOOST_PP_ONEBODY_MCR4
595 #elif defined(ONEBODY_OPT_AM)
596 # define BOOST_PP_ONEBODY_MCR5(r,data,elem) \
597 cparams->max_am_opt( task_label(elem, d) , ONEBODY_OPT_AM);
598 BOOST_PP_LIST_FOR_EACH ( BOOST_PP_ONEBODY_MCR5, _, BOOST_PP_ONEBODY_TASK_LIST)
599 # undef BOOST_PP_ONEBODY_MCR5
600 #endif
601 }
602 for(unsigned int d=0; d<=INCLUDE_ONEBODY; ++d) {
603 #define BOOST_PP_ONEBODY_MCR6(r,data,elem) \
604 cparams->num_bf(task_label(BOOST_PP_STRINGIZE(elem), d), 2);
605 BOOST_PP_LIST_FOR_EACH ( BOOST_PP_ONEBODY_MCR6, _, BOOST_PP_ONEBODY_TASK_LIST)
606 # undef BOOST_PP_ONEBODY_MCR6
607 }
608 #endif // INCLUDE_ONEBODY
609
610 #ifdef INCLUDE_ERI
611 for(unsigned int d=0; d<=INCLUDE_ERI; ++d) {
612 #if defined(ERI_MAX_AM_LIST)
613 cparams->max_am( task_label("eri", d), token<unsigned int>(ERI_MAX_AM_LIST,',',d));
614 #elif defined(ERI_MAX_AM)
615 cparams->max_am( task_label("eri", d), ERI_MAX_AM);
616 #endif
617 #if defined(ERI_OPT_AM_LIST)
618 cparams->max_am_opt( task_label("eri", d) ,token<unsigned int>(ERI_OPT_AM_LIST,',',d));
619 #elif defined(ERI_OPT_AM)
620 cparams->max_am_opt( task_label("eri", d) , ERI_OPT_AM);
621 #endif
622 }
623 for(unsigned int d=0; d<=INCLUDE_ERI; ++d) {
624 cparams->num_bf(task_label("eri", d), 4);
625 }
626 #endif
627 #ifdef INCLUDE_ERI3
628 for(unsigned int d=0; d<=INCLUDE_ERI3; ++d) {
629 #if defined(ERI3_MAX_AM_LIST)
630 cparams->max_am( task_label("3eri", d), token<unsigned int>(ERI3_MAX_AM_LIST,',',d));
631 #elif defined(ERI3_MAX_AM)
632 cparams->max_am( task_label("3eri", d), ERI3_MAX_AM);
633 #endif
634 #if defined(ERI3_OPT_AM_LIST)
635 cparams->max_am_opt( task_label("3eri", d) ,token<unsigned int>(ERI3_OPT_AM_LIST,',',d));
636 #elif defined(ERI3_OPT_AM)
637 cparams->max_am_opt( task_label("3eri", d) , ERI3_OPT_AM);
638 #endif
639
640 #if defined(LIBINT_MAX_AM_LIST)
641 cparams->max_am( task_label("3eri", d), cparams->max_am(task_label("default", d)), 1 );
642 cparams->max_am( task_label("3eri", d), cparams->max_am(task_label("default", d)), 2 );
643 #else
644 cparams->max_am( task_label("3eri", d), cparams->max_am("default"), 1 );
645 cparams->max_am( task_label("3eri", d), cparams->max_am("default"), 2 );
646 #endif
647 }
648 for(unsigned int d=0; d<=INCLUDE_ERI3; ++d) {
649 cparams->num_bf( task_label("3eri", d) ,3);
650 }
651 #endif
652 #ifdef INCLUDE_ERI2
653 for(unsigned int d=0; d<=INCLUDE_ERI2; ++d) {
654 #if defined(ERI2_MAX_AM_LIST)
655 cparams->max_am( task_label("2eri", d), token<unsigned int>(ERI2_MAX_AM_LIST,',',d));
656 #elif defined(ERI2_MAX_AM)
657 cparams->max_am( task_label("2eri", d), ERI2_MAX_AM);
658 #endif
659 #if defined(ERI2_OPT_AM_LIST)
660 cparams->max_am_opt( task_label("2eri", d) ,token<unsigned int>(ERI2_OPT_AM_LIST,',',d));
661 #elif defined(ERI2_OPT_AM)
662 cparams->max_am_opt( task_label("2eri", d) , ERI2_OPT_AM);
663 #endif
664 }
665 for(unsigned int d=0; d<=INCLUDE_ERI2; ++d) {
666 cparams->num_bf( task_label("2eri", d) ,2);
667 }
668 #endif
669 #ifdef INCLUDE_G12
670 # ifndef G12_MAX_AM
671 # define LIBINT_MAX_AM G12_MAX_AM
672 # endif
673 # ifndef G12_OPT_AM
674 # define LIBINT_OPT_AM G12_OPT_AM
675 # endif
676 cparams->max_am("r12kg12",G12_MAX_AM);
677 cparams->max_am_opt("r12kg12",G12_OPT_AM);
678 cparams->num_bf("r12kg12", 4);
679 # if !LIBINT_USE_COMPOSITE_EVALUATORS
680 cparams->max_am("r12_0_g12",G12_MAX_AM);
681 cparams->max_am_opt("r12_0_g12",G12_OPT_AM);
682 cparams->num_bf("r12_0_g12", 4);
683 cparams->max_am("r12_2_g12",G12_MAX_AM);
684 cparams->max_am_opt("r12_2_g12",G12_OPT_AM);
685 cparams->num_bf("r12_2_g12", 4);
686 # endif
687 #endif
688 #ifdef INCLUDE_G12DKH
689 # ifndef G12DKH_MAX_AM
690 # define LIBINT_MAX_AM G12DKH_MAX_AM
691 # endif
692 # ifndef G12DKH_OPT_AM
693 # define LIBINT_OPT_AM G12DKH_OPT_AM
694 # endif
695 cparams->max_am("g12dkh",G12DKH_MAX_AM);
696 cparams->max_am_opt("g12dkh",G12DKH_OPT_AM);
697 #endif
698 #if LIBINT_ENABLE_UNROLLING
699 cparams->unroll_threshold(LIBINT_ENABLE_UNROLLING);
700 #endif
701 #ifdef LIBINT_VECTOR_LENGTH
702 cparams->max_vector_length(LIBINT_VECTOR_LENGTH);
703 #endif
704 #ifdef LIBINT_VECTOR_METHOD
705 {
706 const std::string token("line");
707 const bool vectorize_by_line = (token == LIBINT_VECTOR_METHOD);
708 cparams->vectorize_by_line(vectorize_by_line);
709 }
710 #endif
711 #ifdef LIBINT_ALIGN_SIZE
712 cparams->align_size(LIBINT_ALIGN_SIZE);
713 #endif
714 #if LIBINT_FLOP_COUNT
715 cparams->count_flops(true);
716 #endif
717 #if LIBINT_PROFILE
718 cparams->profile(true);
719 #endif
720 #if LIBINT_ACCUM_INTS
721 cparams->accumulate_targets(true);
722 #else
723 cparams->accumulate_targets(false);
724 #endif
725 #if LIBINT_CONTRACTED_INTS
726 cparams->contracted_targets(true);
727 CGShell::set_contracted_default_value(true);
728 #else
729 cparams->contracted_targets(false);
730 CGShell::set_contracted_default_value(false);
731 #endif
732 #ifdef LIBINT_USER_DEFINED_REAL
733 {
734 const std::string realtype(LIBINT_USER_DEFINED_REAL);
735 cparams->realtype(realtype);
736 }
737 #endif
738 #ifdef LIBINT_API_PREFIX
739 {
740 const std::string api_prefix(LIBINT_API_PREFIX);
741 cparams->api_prefix(api_prefix);
742 }
743 #endif
744 #ifdef LIBINT_SINGLE_EVALTYPE
745 {
746 cparams->single_evaltype(true);
747 }
748 #else
749 throw std::runtime_error("Cannot generate specialized evaluator types yet");
750 #endif
751
752 // initialize code context to produce library API
753 SafePtr<CodeContext> icontext(new CppCodeContext(cparams));
754 // initialize object to generate interface
755 SafePtr<Libint2Iface> iface(new Libint2Iface(cparams,icontext));
756
757 print_header(os);
758 print_config(os);
759 // transfer some configuration parameters to the generated library API
760 iface->to_params(iface->macro_define("USE_COMPOSITE_EVALUATORS",LIBINT_USE_COMPOSITE_EVALUATORS));
761 iface->to_params(iface->macro_define("CARTGAUSS_MAX_AM",LIBINT_CARTGAUSS_MAX_AM));
762 iface->to_params(iface->macro_define("CGSHELL_ORDERING",LIBINT_CGSHELL_ORDERING));
763 iface->to_params(iface->macro_define("CGSHELL_ORDERING_STANDARD",LIBINT_CGSHELL_ORDERING_STANDARD));
764 iface->to_params(iface->macro_define("CGSHELL_ORDERING_INTV3",LIBINT_CGSHELL_ORDERING_INTV3));
765 iface->to_params(iface->macro_define("CGSHELL_ORDERING_GAMESS",LIBINT_CGSHELL_ORDERING_GAMESS));
766 iface->to_params(iface->macro_define("CGSHELL_ORDERING_ORCA",LIBINT_CGSHELL_ORDERING_ORCA));
767 iface->to_params(iface->macro_define("CGSHELL_ORDERING_BAGEL",LIBINT_CGSHELL_ORDERING_BAGEL));
768 iface->to_params(iface->macro_define("SHELLQUARTET_SET",LIBINT_SHELL_SET));
769 iface->to_params(iface->macro_define("SHELLQUARTET_SET_STANDARD",LIBINT_SHELL_SET_STANDARD));
770 iface->to_params(iface->macro_define("SHELLQUARTET_SET_ORCA",LIBINT_SHELL_SET_ORCA));
771 #if defined(LIBINT_MAX_AM_LIST)
772 for(unsigned int d=0; d<ntokens(LIBINT_MAX_AM_LIST,','); ++d) {
773 {
774 std::ostringstream oss;
775 oss << "MAX_AM";
776 if (d > 0) oss << d;
777 iface->to_params(iface->macro_define(oss.str(),token<unsigned int>(LIBINT_MAX_AM_LIST,',',d)));
778 }
779 {
780 std::ostringstream oss;
781 oss << "MAX_AM_default";
782 if (d > 0) oss << d;
783 iface->to_params(iface->macro_define(oss.str(),token<unsigned int>(LIBINT_MAX_AM_LIST,',',d)));
784 }
785 }
786 #else
787 iface->to_params(iface->macro_define("MAX_AM",LIBINT_MAX_AM));
788 for(unsigned int d=0; d<=INCLUDE_ERI; ++d) {
789 std::ostringstream oss;
790 oss << "MAX_AM_default";
791 if (d > 0) oss << d;
792 iface->to_params(iface->macro_define(oss.str(),LIBINT_MAX_AM));
793 }
794 #endif
795 cparams->print(os);
796
797 #ifdef INCLUDE_ONEBODY
798 for(unsigned int d=0; d<=INCLUDE_ONEBODY; ++d) {
799 # define BOOST_PP_ONEBODY_MCR7(r,data,i,elem) \
800 build_onebody_1b_1k< BOOST_PP_LIST_AT (BOOST_PP_ONEBODY_TASK_OPER_LIST, i) >(os,BOOST_PP_STRINGIZE(elem),cparams,iface,d);
801 BOOST_PP_LIST_FOR_EACH_I ( BOOST_PP_ONEBODY_MCR7, _, BOOST_PP_ONEBODY_TASK_LIST)
802 # undef BOOST_PP_ONEBODY_MCR7
803 }
804 #endif
805 #ifdef INCLUDE_ERI
806 # if !USE_GENERIC_ERI_BUILD
807 build_TwoPRep_2b_2k(os,cparams,iface);
808 # else
809 for(unsigned int d=0; d<=INCLUDE_ERI; ++d) {
810 build_TwoPRep_2b_2k(os,cparams,iface,d);
811 }
812 # endif
813 #endif
814 #ifdef INCLUDE_ERI3
815 for(unsigned int d=0; d<=INCLUDE_ERI3; ++d) {
816 build_TwoPRep_1b_2k(os,cparams,iface,d);
817 }
818 # if ERI3_PURE_SH
819 iface->to_params(iface->macro_define("ERI3_PURE_SH",1));
820 # endif
821 #endif
822 #ifdef INCLUDE_ERI2
823 for(unsigned int d=0; d<=INCLUDE_ERI2; ++d) {
824 build_TwoPRep_1b_1k(os,cparams,iface,d);
825 }
826 # if ERI2_PURE_SH
827 iface->to_params(iface->macro_define("ERI2_PURE_SH",1));
828 # endif
829 #endif
830 #ifdef INCLUDE_G12
831 # if LIBINT_USE_COMPOSITE_EVALUATORS
832 build_R12kG12_2b_2k(os,cparams,iface);
833 # else
834 build_R12kG12_2b_2k_separate(os,cparams,iface);
835 # endif
836 #endif
837 #ifdef INCLUDE_G12DKH
838 build_G12DKH_2b_2k(os,cparams,iface);
839 #endif
840
841 // Generate code for the set-level RRs
842 std::deque<std::string> decl_filenames, def_filenames;
843 generate_rr_code(os,cparams, decl_filenames, def_filenames);
844
845 #if DEBUG
846 // print out the external symbols found for each task
847 typedef LibraryTaskManager::TasksCIter tciter;
848 const tciter tend = taskmgr.plast();
849 for(tciter t=taskmgr.first(); t!=tend; ++t) {
850 const SafePtr<TaskExternSymbols> tsymbols = t->symbols();
851 typedef TaskExternSymbols::SymbolList SymbolList;
852 const SymbolList& symbols = tsymbols->symbols();
853 // print out the labels
854 std::cout << "Recovered labels for task " << t->label() << std::endl;
855 typedef SymbolList::const_iterator citer;
856 citer end = symbols.end();
857 for(citer s=symbols.begin(); s!=end; ++s)
858 std::cout << *s << std::endl;
859 }
860 #endif
861
862 // transfer some library configuration to library API
863 config_to_api(cparams,iface);
864
865 os << "Compilation finished. Goodbye." << endl;
866 }
867
868 void
print_header(std::ostream & os)869 print_header(std::ostream& os)
870 {
871 os << "----------------------------------------------" << endl;
872 os << " build_libint2: optimizing integrals compiler " << endl;
873 os << " by Edward F. Valeev " << endl;
874 os << " and ideas by Justin Fermann " << endl;
875 os << " and Curtis Janssen " << endl;
876 os << "----------------------------------------------" << endl << endl;
877 }
878
879 void
print_config(std::ostream & os)880 print_config(std::ostream& os)
881 {
882 #ifdef INCLUDE_ONEBODY
883 os << "Will support one-body integrals";
884 if (INCLUDE_ONEBODY > 0)
885 os << "(deriv order = " << INCLUDE_ONEBODY << ")";
886 os << endl;
887 #endif
888 #ifdef INCLUDE_ERI
889 os << "Will support ERI";
890 if (INCLUDE_ERI > 0)
891 os << "(deriv order = " << INCLUDE_ERI << ")";
892 os << endl;
893 #endif
894 #ifdef INCLUDE_ERI3
895 os << "Will support 3-center ERI";
896 if (INCLUDE_ERI3 > 0)
897 os << "(deriv order = " << INCLUDE_ERI3 << ")";
898 os << endl;
899 #endif
900 #ifdef INCLUDE_ERI2
901 os << "Will support 2-center ERI";
902 if (INCLUDE_ERI2 > 0)
903 os << "(deriv order = " << INCLUDE_ERI2 << ")";
904 os << endl;
905 #endif
906 #ifdef INCLUDE_G12
907 os << "Will support G12 (commutators = ";
908 # if SUPPORT_T1G12
909 os << "yes";
910 # else
911 os << "false";
912 # endif
913 os << ")" << endl;
914 #endif
915 #ifdef INCLUDE_G12DKH
916 os << "Will support G12DKH" << endl;
917 #endif
918 }
919
920
921 #ifdef INCLUDE_ERI
922 void
build_TwoPRep_2b_2k(std::ostream & os,const SafePtr<CompilationParameters> & cparams,SafePtr<Libint2Iface> & iface,unsigned int deriv_level)923 build_TwoPRep_2b_2k(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
924 SafePtr<Libint2Iface>& iface, unsigned int deriv_level)
925 {
926 const std::string task = task_label("eri", deriv_level);
927 typedef TwoPRep_11_11_sq TwoPRep_sh_11_11;
928 vector<CGShell*> shells;
929 unsigned int lmax = cparams->max_am(task);
930 for(unsigned int l=0; l<=lmax; l++) {
931 shells.push_back(new CGShell(l));
932 }
933 ImplicitDimensions::set_default_dims(cparams);
934
935 LibraryTaskManager& taskmgr = LibraryTaskManager::Instance();
936 taskmgr.current(task);
937 iface->to_params(iface->macro_define( std::string("MAX_AM_") + task,lmax));
938
939 //
940 // Construct graphs for each desired target integral and
941 // 1) generate source code for the found traversal path
942 // 2) extract all remaining unresolved recurrence relations and
943 // append them to the stack. Such unresolved RRs are RRs applied
944 // to sets of integrals (rather than to individual integrals).
945 // 3) at the end, for each unresolved recurrence relation generate
946 // explicit source code
947 //
948 SafePtr<DirectedGraph> dg_xxxx(new DirectedGraph);
949 SafePtr<Strategy> strat(new Strategy());
950 SafePtr<CodeContext> context(new CppCodeContext(cparams));
951 SafePtr<MemoryManager> memman(new WorstFitMemoryManager());
952
953 for(unsigned int la=0; la<=lmax; la++) {
954 for(unsigned int lb=0; lb<=lmax; lb++) {
955 for(unsigned int lc=0; lc<=lmax; lc++) {
956 for(unsigned int ld=0; ld<=lmax; ld++) {
957
958 if (!ShellQuartetSetPredicate<static_cast<ShellSetType>(LIBINT_SHELL_SET)>::value(la,lb,lc,ld))
959 continue;
960
961 //SafePtr<Tactic> tactic(new ParticleDirectionTactic(la+lb > lc+ld ? false : true));
962 SafePtr<Tactic> tactic(new FourCenter_OS_Tactic(la, lb, lc, ld));
963
964 #if STUDY_MEMORY_USAGE
965 const int lim = 1;
966 if (! (la == lim && lb == lim && lc == lim && ld == lim) )
967 continue;
968 #endif
969
970 // unroll only if max_am <= cparams->max_am_opt(task)
971 using std::max;
972 const unsigned int max_am = max(max(la,lb),max(lc,ld));
973 const bool need_to_optimize = (max_am <= cparams->max_am_opt(task));
974 const bool need_to_unroll = l_to_cgshellsize(la)*l_to_cgshellsize(lb)*
975 l_to_cgshellsize(lc)*l_to_cgshellsize(ld) <= cparams->unroll_threshold();
976 const unsigned int unroll_threshold = need_to_optimize && need_to_unroll ? std::numeric_limits<unsigned int>::max() : 0;
977 dg_xxxx->registry()->unroll_threshold(unroll_threshold);
978 dg_xxxx->registry()->do_cse(need_to_optimize);
979 dg_xxxx->registry()->condense_expr(condense_expr(cparams->unroll_threshold(),cparams->max_vector_length()>1));
980 //dg_xxxx->registry()->condense_expr(true);
981 // Need to accumulate integrals?
982 dg_xxxx->registry()->accumulate_targets(cparams->accumulate_targets());
983 // need to profile?
984 if (cparams->profile()) {
985 dg_xxxx->registry()->current_timer(0);
986 }
987
988 ////////////
989 // loop over unique derivative index combinations
990 ////////////
991 // NB translational invariance is now handled by CR_DerivGauss
992 CartesianDerivIterator<4> diter(deriv_level);
993 std::vector< SafePtr<TwoPRep_sh_11_11> > targets;
994 bool last_deriv = false;
995 do {
996 CGShell a(la);
997 CGShell b(lb);
998 CGShell c(lc);
999 CGShell d(ld);
1000
1001 for(unsigned int i=0; i<4; ++i) {
1002 for(unsigned int xyz=0; xyz<3; ++xyz) {
1003 if (i == 0) a.deriv().inc(xyz, (*diter).at(3 * i + xyz));
1004 if (i == 1) b.deriv().inc(xyz, (*diter).at(3 * i + xyz));
1005 if (i == 2) c.deriv().inc(xyz, (*diter).at(3 * i + xyz));
1006 if (i == 3) d.deriv().inc(xyz, (*diter).at(3 * i + xyz));
1007 }
1008 }
1009
1010 SafePtr<TwoPRep_sh_11_11> abcd = TwoPRep_sh_11_11::Instance(a,b,c,d,mType(0u));
1011 targets.push_back(abcd);
1012 last_deriv = diter.last();
1013 if (!last_deriv) diter.next();
1014 } while (!last_deriv);
1015 // append all derivatives as targets to the graph
1016 for(std::vector< SafePtr<TwoPRep_sh_11_11> >::const_iterator t=targets.begin();
1017 t != targets.end();
1018 ++t) {
1019 SafePtr<DGVertex> t_ptr = dynamic_pointer_cast<DGVertex,TwoPRep_sh_11_11>(*t);
1020 dg_xxxx->append_target(t_ptr);
1021 }
1022
1023 // make label that characterizes this set of targets
1024 // use the label of the nondifferentiated integral as a base
1025 std::string abcd_label;
1026 {
1027 CGShell a(la);
1028 CGShell b(lb);
1029 CGShell c(lc);
1030 CGShell d(ld);
1031 SafePtr<TwoPRep_sh_11_11> abcd = TwoPRep_sh_11_11::Instance(a,b,c,d,mType(0u));
1032 abcd_label = abcd->label();
1033 }
1034 // + derivative level (if deriv_level > 0)
1035 std::string label;
1036 {
1037 label = cparams->api_prefix();
1038 if (deriv_level != 0) {
1039 std::ostringstream oss;
1040 oss << "deriv" << deriv_level;
1041 label += oss.str();
1042 }
1043 label += abcd_label;
1044 }
1045
1046 std::cout << "working on " << label << " ... "; std::cout.flush();
1047
1048 std::string prefix(cparams->source_directory());
1049 std::deque<std::string> decl_filenames;
1050 std::deque<std::string> def_filenames;
1051
1052 // this will generate code for these targets, and potentially generate code for its prerequisites
1053 GenerateCode(dg_xxxx, context, cparams, strat, tactic, memman,
1054 decl_filenames, def_filenames,
1055 prefix, label, false);
1056
1057 // update max stack size and # of targets
1058 const SafePtr<TaskParameters>& tparams = taskmgr.current().params();
1059 tparams->max_stack_size(max_am, memman->max_memory_used());
1060 tparams->max_ntarget(targets.size());
1061 //os << " Max memory used = " << memman->max_memory_used() << std::endl;
1062
1063 // set pointer to the top-level evaluator function
1064 ostringstream oss;
1065 oss << context->label_to_name(cparams->api_prefix()) << "libint2_build_" << task << "[" << la << "][" << lb << "][" << lc << "]["
1066 << ld <<"] = " << context->label_to_name(label_to_funcname(label))
1067 << context->end_of_stat() << endl;
1068 iface->to_static_init(oss.str());
1069
1070 // need to declare this function internally
1071 for(std::deque<std::string>::const_iterator i=decl_filenames.begin();
1072 i != decl_filenames.end();
1073 ++i) {
1074 oss.str("");
1075 oss << "#include <" << *i << ">" << endl;
1076 iface->to_int_iface(oss.str());
1077 }
1078
1079 #if DEBUG
1080 os << "Max memory used = " << memman->max_memory_used() << endl;
1081 #endif
1082 dg_xxxx->reset();
1083 memman->reset();
1084
1085 std::cout << "done" << std::endl;
1086
1087 } // end of d loop
1088 } // end of c loop
1089 } // end of b loop
1090 } // end of a loop
1091 }
1092
1093 #endif // INCLUDE_ERI
1094
1095 #ifdef INCLUDE_ERI3
1096
1097 void
build_TwoPRep_1b_2k(std::ostream & os,const SafePtr<CompilationParameters> & cparams,SafePtr<Libint2Iface> & iface,unsigned int deriv_level)1098 build_TwoPRep_1b_2k(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
1099 SafePtr<Libint2Iface>& iface, unsigned int deriv_level)
1100 {
1101 const std::string task = task_label("3eri", deriv_level);
1102 typedef TwoPRep_11_11_sq TwoPRep_sh_11_11;
1103 vector<CGShell*> shells;
1104 const unsigned int lmax = cparams->max_am(task);
1105 const unsigned int lmax_default = const_cast<const CompilationParameters*>(cparams.get())->max_am(task, 1);
1106 if (lmax != lmax_default)
1107 iface->to_params(iface->macro_define( std::string("CENTER_DEPENDENT_MAX_AM_") + task, 1));
1108
1109 for(unsigned int l=0; l<=lmax; l++) {
1110 shells.push_back(new CGShell(l));
1111 }
1112 ImplicitDimensions::set_default_dims(cparams);
1113
1114 LibraryTaskManager& taskmgr = LibraryTaskManager::Instance();
1115 taskmgr.current(task);
1116 iface->to_params(iface->macro_define( std::string("MAX_AM_") + task,lmax));
1117
1118 //
1119 // Construct graphs for each desired target integral and
1120 // 1) generate source code for the found traversal path
1121 // 2) extract all remaining unresolved recurrence relations and
1122 // append them to the stack. Such unresolved RRs are RRs applied
1123 // to sets of integrals (rather than to individual integrals).
1124 // 3) at the end, for each unresolved recurrence relation generate
1125 // explicit source code
1126 //
1127 SafePtr<DirectedGraph> dg_xxx(new DirectedGraph);
1128 SafePtr<Strategy> strat(new Strategy());
1129 SafePtr<CodeContext> context(new CppCodeContext(cparams));
1130 SafePtr<MemoryManager> memman(new WorstFitMemoryManager());
1131
1132 for(unsigned int lbra=0; lbra<=lmax; lbra++) {
1133 for(unsigned int lc=0; lc<=lmax_default; lc++) {
1134 for(unsigned int ld=0; ld<=lmax_default; ld++) {
1135
1136 // eliminate some cases depending on the desired convention
1137 if (!ShellTripletSetPredicate<static_cast<ShellSetType>(LIBINT_SHELL_SET)>::value(lbra,lc,ld))
1138 continue;
1139
1140 // I will use 4-center recurrence relations and integrals, and have one center carry an s function
1141 // unfortunately, depending on the direction in which the build goes it must be A(0) or B(1)
1142 const unsigned int dummy_center = (LIBINT_SHELL_SET == LIBINT_SHELL_SET_ORCA) ? 0 : 1;
1143
1144 //SafePtr<Tactic> tactic(new ParticleDirectionTactic(lbra > lc+ld ? false : true));
1145 SafePtr<Tactic> tactic(new FourCenter_OS_Tactic(dummy_center==0?0:lbra,
1146 dummy_center==1?0:lbra, lc, ld));
1147
1148 #if STUDY_MEMORY_USAGE
1149 const int lim = 1;
1150 if (! (lbra == lim && lc == lim && ld == lim) )
1151 continue;
1152 #endif
1153
1154 // unroll only if max_am <= cparams->max_am_opt(task)
1155 using std::max;
1156 const unsigned int max_am = max(max(lc,ld),lbra);
1157 const bool need_to_optimize = (max_am <= cparams->max_am_opt(task));
1158 const bool need_to_unroll = l_to_cgshellsize(lbra)*
1159 l_to_cgshellsize(lc)*
1160 l_to_cgshellsize(ld) <= cparams->unroll_threshold();
1161 const unsigned int unroll_threshold = need_to_optimize && need_to_unroll ? std::numeric_limits<unsigned int>::max() : 0;
1162 dg_xxx->registry()->unroll_threshold(unroll_threshold);
1163 dg_xxx->registry()->do_cse(need_to_optimize);
1164 dg_xxx->registry()->condense_expr(condense_expr(cparams->unroll_threshold(),cparams->max_vector_length()>1));
1165 //dg_xxx->registry()->condense_expr(true);
1166 // Need to accumulate integrals?
1167 dg_xxx->registry()->accumulate_targets(cparams->accumulate_targets());
1168
1169 ////////////
1170 // loop over unique derivative index combinations
1171 ////////////
1172 // NB translational invariance is now handled by CR_DerivGauss
1173 CartesianDerivIterator<3> diter(deriv_level);
1174 std::vector< SafePtr<TwoPRep_sh_11_11> > targets;
1175 bool last_deriv = false;
1176 do {
1177 CGShell a = (dummy_center == 0) ? CGShell::unit() : CGShell(lbra);
1178 CGShell b = (dummy_center == 1) ? CGShell::unit() : CGShell(lbra);
1179 CGShell c(lc);
1180 CGShell d(ld);
1181 #if ERI3_PURE_SH
1182 if (dummy_center == 1 && deriv_level == 0) a.pure_sh(true);
1183 if (dummy_center == 0 && deriv_level == 0) b.pure_sh(true);
1184 #endif
1185
1186 unsigned int center = 0;
1187 for(unsigned int i=0; i<4; ++i) {
1188 if (i == dummy_center)
1189 continue;
1190 for(unsigned int xyz=0; xyz<3; ++xyz) {
1191 if (i == 0) a.deriv().inc(xyz, (*diter).at(3 * center + xyz));
1192 if (i == 1) b.deriv().inc(xyz, (*diter).at(3 * center + xyz));
1193 if (i == 2) c.deriv().inc(xyz, (*diter).at(3 * center + xyz));
1194 if (i == 3) d.deriv().inc(xyz, (*diter).at(3 * center + xyz));
1195 }
1196 ++center;
1197 }
1198
1199 // use 4-center integrals
1200 SafePtr<TwoPRep_sh_11_11> abcd = TwoPRep_sh_11_11::Instance(a,b,c,d,mType(0u));
1201 targets.push_back(abcd);
1202 last_deriv = diter.last();
1203 if (!last_deriv) diter.next();
1204 } while (!last_deriv);
1205 // append all derivatives as targets to the graph
1206 for(std::vector< SafePtr<TwoPRep_sh_11_11> >::const_iterator t=targets.begin();
1207 t != targets.end();
1208 ++t) {
1209 SafePtr<DGVertex> t_ptr = dynamic_pointer_cast<DGVertex,TwoPRep_sh_11_11>(*t);
1210 dg_xxx->append_target(t_ptr);
1211 }
1212
1213 // make label that characterizes this set of targets
1214 // use the label of the nondifferentiated integral as a base
1215 std::string abcd_label;
1216 {
1217 CGShell a = (dummy_center == 0) ? CGShell::unit() : CGShell(lbra);
1218 CGShell b = (dummy_center == 1) ? CGShell::unit() : CGShell(lbra);
1219 CGShell c(lc);
1220 CGShell d(ld);
1221 #if ERI3_PURE_SH
1222 if (dummy_center == 1 && deriv_level == 0) a.pure_sh(true);
1223 if (dummy_center == 0 && deriv_level == 0) b.pure_sh(true);
1224 #endif
1225 SafePtr<TwoPRep_sh_11_11> abcd = TwoPRep_sh_11_11::Instance(a,b,c,d,mType(0u));
1226 abcd_label = abcd->label();
1227 }
1228 // + derivative level (if deriv_level > 0)
1229 std::string label;
1230 {
1231 label = cparams->api_prefix();
1232 if (deriv_level != 0) {
1233 std::ostringstream oss;
1234 oss << "deriv" << deriv_level;
1235 label += oss.str();
1236 }
1237 label += "eri3";
1238 label += abcd_label;
1239 }
1240
1241 std::cout << "working on " << label << " ... "; std::cout.flush();
1242
1243 std::string prefix(cparams->source_directory());
1244 std::deque<std::string> decl_filenames;
1245 std::deque<std::string> def_filenames;
1246
1247 // this will generate code for this targets, and potentially generate code for its prerequisites
1248 GenerateCode(dg_xxx, context, cparams, strat, tactic, memman,
1249 decl_filenames, def_filenames,
1250 prefix, label, false);
1251
1252 // update max stack size and # of targets
1253 const SafePtr<TaskParameters>& tparams = taskmgr.current().params();
1254 tparams->max_stack_size(max_am, memman->max_memory_used());
1255 tparams->max_ntarget(targets.size());
1256 //os << " Max memory used = " << memman->max_memory_used() << std::endl;
1257
1258 // set pointer to the top-level evaluator function
1259 ostringstream oss;
1260 oss << context->label_to_name(cparams->api_prefix()) << "libint2_build_" << task << "[" << lbra << "][" << lc << "][" << ld << "] = "
1261 << context->label_to_name(label_to_funcname(label))
1262 << context->end_of_stat() << endl;
1263 iface->to_static_init(oss.str());
1264
1265 // need to declare this function internally
1266 for(std::deque<std::string>::const_iterator i=decl_filenames.begin();
1267 i != decl_filenames.end();
1268 ++i) {
1269 oss.str("");
1270 oss << "#include <" << *i << ">" << endl;
1271 iface->to_int_iface(oss.str());
1272 }
1273
1274 #if DEBUG
1275 os << "Max memory used = " << memman->max_memory_used() << endl;
1276 #endif
1277 dg_xxx->reset();
1278 memman->reset();
1279
1280 } // end of d loop
1281 } // end of c loop
1282 } // end of bra loop
1283 }
1284 #endif // INCLUDE_ERI3
1285
1286 #ifdef INCLUDE_ERI2
1287
1288 void
build_TwoPRep_1b_1k(std::ostream & os,const SafePtr<CompilationParameters> & cparams,SafePtr<Libint2Iface> & iface,unsigned int deriv_level)1289 build_TwoPRep_1b_1k(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
1290 SafePtr<Libint2Iface>& iface, unsigned int deriv_level)
1291 {
1292 const std::string task = task_label("2eri", deriv_level);
1293 typedef TwoPRep_11_11_sq TwoPRep_sh_11_11;
1294 vector<CGShell*> shells;
1295 unsigned int lmax = cparams->max_am(task);
1296 for(unsigned int l=0; l<=lmax; l++) {
1297 shells.push_back(new CGShell(l));
1298 }
1299 ImplicitDimensions::set_default_dims(cparams);
1300
1301 LibraryTaskManager& taskmgr = LibraryTaskManager::Instance();
1302 taskmgr.current(task);
1303 iface->to_params(iface->macro_define( std::string("MAX_AM_") + task,lmax));
1304
1305 //
1306 // Construct graphs for each desired target integral and
1307 // 1) generate source code for the found traversal path
1308 // 2) extract all remaining unresolved recurrence relations and
1309 // append them to the stack. Such unresolved RRs are RRs applied
1310 // to sets of integrals (rather than to individual integrals).
1311 // 3) at the end, for each unresolved recurrence relation generate
1312 // explicit source code
1313 //
1314 SafePtr<DirectedGraph> dg_xxx(new DirectedGraph);
1315 SafePtr<Strategy> strat(new Strategy());
1316 SafePtr<CodeContext> context(new CppCodeContext(cparams));
1317 SafePtr<MemoryManager> memman(new WorstFitMemoryManager());
1318
1319 for(unsigned int lbra=0; lbra<=lmax; lbra++) {
1320 for(unsigned int lket=0; lket<=lmax; lket++) {
1321
1322 // I will use 4-center recurrence relations and integrals, and have two centers carry an s function
1323 // unfortunately, depending on the direction in which the build goes it must be A(0) and C(2) or B(1) and D(3)
1324 const unsigned int dummy_center1 = (LIBINT_SHELL_SET == LIBINT_SHELL_SET_ORCA) ? 0 : 1;
1325 const unsigned int dummy_center2 = (LIBINT_SHELL_SET == LIBINT_SHELL_SET_ORCA) ? 2 : 3;
1326
1327 //SafePtr<Tactic> tactic(new ParticleDirectionTactic(lbra > lket ? false : true));
1328 SafePtr<Tactic> tactic(new FourCenter_OS_Tactic(dummy_center1==0?0:lbra,
1329 dummy_center1==1?0:lbra,
1330 dummy_center2==2?0:lket,
1331 dummy_center2==3?0:lket));
1332
1333 #if STUDY_MEMORY_USAGE
1334 const int lim = 1;
1335 if (! (lbra == lim && lket == lim) )
1336 continue;
1337 #endif
1338
1339 // unroll only if max_am <= cparams->max_am_opt(task)
1340 using std::max;
1341 const unsigned int max_am = max(lbra,lket);
1342 const bool need_to_optimize = (max_am <= cparams->max_am_opt(task));
1343 const bool need_to_unroll = l_to_cgshellsize(lbra)*
1344 l_to_cgshellsize(lket) <= cparams->unroll_threshold();
1345 const unsigned int unroll_threshold = need_to_optimize && need_to_unroll ? std::numeric_limits<unsigned int>::max() : 0;
1346 dg_xxx->registry()->unroll_threshold(unroll_threshold);
1347 dg_xxx->registry()->do_cse(need_to_optimize);
1348 dg_xxx->registry()->condense_expr(condense_expr(cparams->unroll_threshold(),cparams->max_vector_length()>1));
1349 // Need to accumulate integrals?
1350 dg_xxx->registry()->accumulate_targets(cparams->accumulate_targets());
1351
1352 ////////////
1353 // loop over unique derivative index combinations
1354 ////////////
1355 // NB translational invariance is now handled by CR_DerivGauss
1356 CartesianDerivIterator<2> diter(deriv_level);
1357 std::vector< SafePtr<TwoPRep_sh_11_11> > targets;
1358 bool last_deriv = false;
1359 do {
1360 CGShell a = (dummy_center1 == 0) ? CGShell::unit() : CGShell(lbra);
1361 CGShell b = (dummy_center1 == 1) ? CGShell::unit() : CGShell(lbra);
1362 CGShell c = (dummy_center2 == 2) ? CGShell::unit() : CGShell(lket);
1363 CGShell d = (dummy_center2 == 3) ? CGShell::unit() : CGShell(lket);
1364 #if ERI2_PURE_SH
1365 if (dummy_center1 == 1 && deriv_level == 0) a.pure_sh(true);
1366 if (dummy_center1 == 0 && deriv_level == 0) b.pure_sh(true);
1367 if (dummy_center2 == 3 && deriv_level == 0) c.pure_sh(true);
1368 if (dummy_center2 == 2 && deriv_level == 0) d.pure_sh(true);
1369 #endif
1370
1371 unsigned int center = 0;
1372 for(unsigned int i=0; i<4; ++i) {
1373 if (i == dummy_center1 || i == dummy_center2)
1374 continue;
1375 for(unsigned int xyz=0; xyz<3; ++xyz) {
1376 if (i == 0) a.deriv().inc(xyz, (*diter).at(3 * center + xyz));
1377 if (i == 1) b.deriv().inc(xyz, (*diter).at(3 * center + xyz));
1378 if (i == 2) c.deriv().inc(xyz, (*diter).at(3 * center + xyz));
1379 if (i == 3) d.deriv().inc(xyz, (*diter).at(3 * center + xyz));
1380 }
1381 ++center;
1382 }
1383
1384 // use 4-center integrals
1385 SafePtr<TwoPRep_sh_11_11> abcd = TwoPRep_sh_11_11::Instance(a,b,c,d,mType(0u));
1386 targets.push_back(abcd);
1387 last_deriv = diter.last();
1388 if (!last_deriv) diter.next();
1389 } while (!last_deriv);
1390 // append all derivatives as targets to the graph
1391 for(std::vector< SafePtr<TwoPRep_sh_11_11> >::const_iterator t=targets.begin();
1392 t != targets.end();
1393 ++t) {
1394 SafePtr<DGVertex> t_ptr = dynamic_pointer_cast<DGVertex,TwoPRep_sh_11_11>(*t);
1395 dg_xxx->append_target(t_ptr);
1396 }
1397
1398 // make label that characterizes this set of targets
1399 // use the label of the nondifferentiated integral as a base
1400 std::string abcd_label;
1401 {
1402 CGShell a = (dummy_center1 == 0) ? CGShell::unit() : CGShell(lbra);
1403 CGShell b = (dummy_center1 == 1) ? CGShell::unit() : CGShell(lbra);
1404 CGShell c = (dummy_center2 == 2) ? CGShell::unit() : CGShell(lket);
1405 CGShell d = (dummy_center2 == 3) ? CGShell::unit() : CGShell(lket);
1406 #if ERI2_PURE_SH
1407 if (dummy_center1 == 1 && deriv_level == 0) a.pure_sh(true);
1408 if (dummy_center1 == 0 && deriv_level == 0) b.pure_sh(true);
1409 if (dummy_center2 == 3 && deriv_level == 0) c.pure_sh(true);
1410 if (dummy_center2 == 2 && deriv_level == 0) d.pure_sh(true);
1411 #endif
1412 SafePtr<TwoPRep_sh_11_11> abcd = TwoPRep_sh_11_11::Instance(a,b,c,d,mType(0u));
1413 abcd_label = abcd->label();
1414 }
1415 // + derivative level (if deriv_level > 0)
1416 std::string label;
1417 {
1418 label = cparams->api_prefix();
1419 if (deriv_level != 0) {
1420 std::ostringstream oss;
1421 oss << "deriv" << deriv_level;
1422 label += oss.str();
1423 }
1424 label += "eri2";
1425 label += abcd_label;
1426 }
1427
1428 std::cout << "working on " << label << " ... "; std::cout.flush();
1429
1430 std::string prefix(cparams->source_directory());
1431 std::deque<std::string> decl_filenames;
1432 std::deque<std::string> def_filenames;
1433
1434 // this will generate code for this targets, and potentially generate code for its prerequisites
1435 GenerateCode(dg_xxx, context, cparams, strat, tactic, memman,
1436 decl_filenames, def_filenames,
1437 prefix, label, false);
1438
1439 // update max stack size and # of targets
1440 const SafePtr<TaskParameters>& tparams = taskmgr.current().params();
1441 tparams->max_stack_size(max_am, memman->max_memory_used());
1442 tparams->max_ntarget(targets.size());
1443 //os << " Max memory used = " << memman->max_memory_used() << std::endl;
1444
1445 // set pointer to the top-level evaluator function
1446 ostringstream oss;
1447 oss << context->label_to_name(cparams->api_prefix()) << "libint2_build_" << task << "[" << lbra << "][" << lket << "] = "
1448 << context->label_to_name(label_to_funcname(label))
1449 << context->end_of_stat() << endl;
1450 iface->to_static_init(oss.str());
1451
1452 // need to declare this function internally
1453 for(std::deque<std::string>::const_iterator i=decl_filenames.begin();
1454 i != decl_filenames.end();
1455 ++i) {
1456 oss.str("");
1457 oss << "#include <" << *i << ">" << endl;
1458 iface->to_int_iface(oss.str());
1459 }
1460
1461 #if DEBUG
1462 os << "Max memory used = " << memman->max_memory_used() << endl;
1463 #endif
1464 dg_xxx->reset();
1465 memman->reset();
1466
1467 } // end of ket loop
1468 } // end of bra loop
1469
1470 }
1471 #endif // INCLUDE_ERI2
1472
1473 #ifdef INCLUDE_G12
1474 void
build_R12kG12_2b_2k(std::ostream & os,const SafePtr<CompilationParameters> & cparams,SafePtr<Libint2Iface> & iface)1475 build_R12kG12_2b_2k(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
1476 SafePtr<Libint2Iface>& iface)
1477 {
1478 const std::string task("r12kg12");
1479 vector<CGShell*> shells;
1480 unsigned int lmax = cparams->max_am(task);
1481 for(unsigned int l=0; l<=lmax; l++) {
1482 shells.push_back(new CGShell(l));
1483 }
1484 ImplicitDimensions::set_default_dims(cparams);
1485
1486 LibraryTaskManager& taskmgr = LibraryTaskManager::Instance();
1487 taskmgr.current(task);
1488 iface->to_params(iface->macro_define("MAX_AM_R12kG12",lmax));
1489 #if SUPPORT_T1G12
1490 iface->to_params(iface->macro_define("SUPPORT_T1G12",1));
1491 #else
1492 iface->to_params(iface->macro_define("SUPPORT_T1G12",0));
1493 #endif
1494
1495 //
1496 // Construct graphs for each desired target integral and
1497 // 1) generate source code for the found traversal path
1498 // 2) extract all remaining unresolved recurrence relations and
1499 // append them to the stack. Such unresolved RRs are RRs applied
1500 // to sets of integrals (rather than to individual integrals).
1501 // 3) at the end, for each unresolved recurrence relation generate
1502 // explicit source code
1503 //
1504 SafePtr<DirectedGraph> dg_xxxx(new DirectedGraph);
1505 SafePtr<Strategy> strat(new Strategy);
1506 SafePtr<Tactic> tactic(new FirstChoiceTactic<DummyRandomizePolicy>);
1507 //SafePtr<Tactic> tactic(new RandomChoiceTactic());
1508 //SafePtr<Tactic> tactic(new FewestNewVerticesTactic(dg_xxxx));
1509 SafePtr<CodeContext> context(new CppCodeContext(cparams));
1510 SafePtr<MemoryManager> memman(new WorstFitMemoryManager());
1511
1512 for(unsigned int la=0; la<=lmax; la++) {
1513 for(unsigned int lb=0; lb<=lmax; lb++) {
1514 for(unsigned int lc=0; lc<=lmax; lc++) {
1515 for(unsigned int ld=0; ld<=lmax; ld++) {
1516
1517 if (la < lb || lc < ld || la+lb > lc+ld)
1518 continue;
1519 bool ssss = false;
1520 if (la+lb+lc+ld == 0)
1521 ssss = true;
1522 #if !SUPPORT_T1G12
1523 if (ssss) continue;
1524 #endif
1525
1526 // unroll only if max_am <= cparams->max_am_opt(task)
1527 using std::max;
1528 const unsigned int max_am = max(max(la,lb),max(lc,ld));
1529 const bool need_to_optimize = (max_am <= cparams->max_am_opt(task));
1530 const unsigned int unroll_threshold = need_to_optimize ? cparams->unroll_threshold() : 0;
1531 dg_xxxx->registry()->unroll_threshold(unroll_threshold);
1532 dg_xxxx->registry()->do_cse(need_to_optimize);
1533 dg_xxxx->registry()->condense_expr(condense_expr(cparams->unroll_threshold(),cparams->max_vector_length()>1));
1534 // Need to accumulate integrals?
1535 dg_xxxx->registry()->accumulate_targets(cparams->accumulate_targets());
1536
1537 typedef R12kG12_11_11_sq int_type;
1538 typedef R12kG12 oper_type;
1539 // k=0
1540 if (!ssss) {
1541 typedef oper_type::Descriptor oper_descr;
1542 oper_type oper(oper_descr(0));
1543 #if LIBINT_CONTRACTED_INTS
1544 oper.descr().contract();
1545 #endif
1546 SafePtr<int_type> abcd = int_type::Instance(*shells[la],*shells[lb],*shells[lc],*shells[ld],0u,oper);
1547 os << "building " << abcd->description() << endl;
1548 SafePtr<DGVertex> abcd_ptr = dynamic_pointer_cast<DGVertex,int_type>(abcd);
1549 dg_xxxx->append_target(abcd_ptr);
1550 }
1551
1552 // k=-1
1553 if (!ssss) {
1554 typedef oper_type::Descriptor oper_descr;
1555 oper_type oper(oper_descr(-1));
1556 #if LIBINT_CONTRACTED_INTS
1557 oper.descr().contract();
1558 #endif
1559 SafePtr<int_type> abcd = int_type::Instance(*shells[la],*shells[lb],*shells[lc],*shells[ld],0u,oper);
1560 os << "building " << abcd->description() << endl;
1561 SafePtr<DGVertex> abcd_ptr = dynamic_pointer_cast<DGVertex,int_type>(abcd);
1562 dg_xxxx->append_target(abcd_ptr);
1563 }
1564
1565 #if SUPPORT_T1G12
1566 // [T_1,G12]
1567 if (true) {
1568 typedef TiG12_11_11_sq int_type;
1569 typedef int_type::OperType oper_type;
1570 typedef oper_type::Descriptor oper_descr;
1571 oper_type oper(oper_descr(0));
1572 #if LIBINT_CONTRACTED_INTS
1573 oper.descr().contract();
1574 #endif
1575 SafePtr<int_type> abcd = int_type::Instance(*shells[la],*shells[lb],*shells[lc],*shells[ld],0u,oper);
1576 os << "building " << abcd->description() << endl;
1577 SafePtr<DGVertex> abcd_ptr = dynamic_pointer_cast<DGVertex,int_type>(abcd);
1578 dg_xxxx->append_target(abcd_ptr);
1579 }
1580
1581 // [T_2,G12]
1582 if (true) {
1583 typedef TiG12_11_11_sq int_type;
1584 typedef int_type::OperType oper_type;
1585 typedef oper_type::Descriptor oper_descr;
1586 oper_type oper(oper_descr(1));
1587 #if LIBINT_CONTRACTED_INTS
1588 oper.descr().contract();
1589 #endif
1590 SafePtr<int_type> abcd = int_type::Instance(*shells[la],*shells[lb],*shells[lc],*shells[ld],0u,oper);
1591 os << "building " << abcd->description() << endl;
1592 SafePtr<DGVertex> abcd_ptr = dynamic_pointer_cast<DGVertex,int_type>(abcd);
1593 dg_xxxx->append_target(abcd_ptr);
1594 }
1595 #endif
1596
1597 // [G12,[T1,G12]]
1598 if (!ssss) {
1599 typedef G12TiG12_11_11_sq int_type;
1600 typedef int_type::OperType oper_type;
1601 typedef oper_type::Descriptor oper_descr;
1602 oper_type oper(oper_descr(0)); // doesn't matter whether T1 or T2 here
1603 #if LIBINT_CONTRACTED_INTS
1604 oper.descr().contract();
1605 #endif
1606 SafePtr<int_type> abcd = int_type::Instance(*shells[la],*shells[lb],*shells[lc],*shells[ld],0u,oper);
1607 os << "building " << abcd->description() << endl;
1608 SafePtr<DGVertex> abcd_ptr = dynamic_pointer_cast<DGVertex,int_type>(abcd);
1609 dg_xxxx->append_target(abcd_ptr);
1610 }
1611
1612 std::string _label;
1613 {
1614 ostringstream os;
1615 os << "(" << shells[la]->label() << " "
1616 << shells[lb]->label()
1617 << " | r_{12}^K * exp(-g*r_{12}^2) | "
1618 << shells[lc]->label() << " "
1619 << shells[ld]->label() << ")";
1620 _label = os.str();
1621 }
1622
1623 std::string prefix(cparams->source_directory());
1624 std::string label(cparams->api_prefix() + _label);
1625 std::deque<std::string> decl_filenames;
1626 std::deque<std::string> def_filenames;
1627
1628 // this will generate code for this targets, and potentially generate code for its prerequisites
1629 GenerateCode(dg_xxxx, context, cparams, strat, tactic, memman,
1630 decl_filenames, def_filenames,
1631 prefix, label, false);
1632
1633 // update max stack size
1634 const SafePtr<TaskParameters>& tparams = taskmgr.current().params();
1635 tparams->max_stack_size(max_am, memman->max_memory_used());
1636 tparams->max_ntarget(5);
1637
1638 ostringstream oss;
1639 oss << context->label_to_name(cparams->api_prefix()) << "libint2_build_r12kg12[" << la << "][" << lb << "][" << lc << "]["
1640 << ld <<"] = " << context->label_to_name(label_to_funcname(label))
1641 << context->end_of_stat() << endl;
1642 iface->to_static_init(oss.str());
1643
1644 // need to declare this function internally
1645 for(std::deque<std::string>::const_iterator i=decl_filenames.begin();
1646 i != decl_filenames.end();
1647 ++i) {
1648 oss.str("");
1649 oss << "#include <" << *i << ">" << endl;
1650 iface->to_int_iface(oss.str());
1651 }
1652
1653 #if DEBUG
1654 os << "Max memory used = " << memman->max_memory_used() << endl;
1655 #endif
1656 dg_xxxx->reset();
1657 memman->reset();
1658 } // end of d loop
1659 } // end of c loop
1660 } // end of b loop
1661 } // end of a loop
1662 }
1663
1664 #endif // INCLUDE_G12
1665
1666 #ifdef INCLUDE_G12
1667 void
build_R12kG12_2b_2k_separate(std::ostream & os,const SafePtr<CompilationParameters> & cparams,SafePtr<Libint2Iface> & iface)1668 build_R12kG12_2b_2k_separate(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
1669 SafePtr<Libint2Iface>& iface)
1670 {
1671 // do not support this if the commutator integrals are needed
1672 #if SUPPORT_T1G12
1673 assert(false);
1674 #endif
1675
1676 // Note that because r12_-1_g12 integrals are evaluated using the same RR as ERI, no need to generate their code at all
1677 const int ntasks = 2;
1678 const char* task_names[] = {"r12_0_g12", "g12_T1_g12"};
1679 const char* task_NAMES[] = {"R12_0_R12", "G12_T1_G12"};
1680
1681 vector<CGShell*> shells;
1682 unsigned int lmax = cparams->max_am("r12kg12");
1683 for(unsigned int l=0; l<=lmax; l++) {
1684 shells.push_back(new CGShell(l));
1685 }
1686 ImplicitDimensions::set_default_dims(cparams);
1687
1688 for(int task=0; task<ntasks; ++task) {
1689
1690 const std::string task_name(task_names[task]);
1691
1692 LibraryTaskManager& taskmgr = LibraryTaskManager::Instance();
1693 taskmgr.current(task_name);
1694 iface->to_params(iface->macro_define(std::string("MAX_AM_") + task_NAMES[task],lmax));
1695 iface->to_params(iface->macro_define("SUPPORT_T1G12",0));
1696
1697 SafePtr<DirectedGraph> dg_xxxx(new DirectedGraph);
1698 SafePtr<Strategy> strat(new Strategy);
1699 SafePtr<Tactic> tactic(new FirstChoiceTactic<DummyRandomizePolicy>);
1700 SafePtr<CodeContext> context(new CppCodeContext(cparams));
1701 SafePtr<MemoryManager> memman(new WorstFitMemoryManager());
1702
1703 for(unsigned int la=0; la<=lmax; la++) {
1704 for(unsigned int lb=0; lb<=lmax; lb++) {
1705 for(unsigned int lc=0; lc<=lmax; lc++) {
1706 for(unsigned int ld=0; ld<=lmax; ld++) {
1707
1708 if (la+lb+lc+ld == 0)
1709 continue;
1710
1711 if (!ShellQuartetSetPredicate<static_cast<ShellSetType>(LIBINT_SHELL_SET)>::value(la,lb,lc,ld))
1712 continue;
1713
1714 using std::max;
1715 const unsigned int max_am = max(max(la,lb),max(lc,ld));
1716 const bool need_to_optimize = (max_am <= cparams->max_am_opt("r12kg12"));
1717 const unsigned int unroll_threshold = need_to_optimize ? cparams->unroll_threshold() : 0;
1718 dg_xxxx->registry()->unroll_threshold(unroll_threshold);
1719 dg_xxxx->registry()->do_cse(need_to_optimize);
1720 dg_xxxx->registry()->condense_expr(condense_expr(cparams->unroll_threshold(),cparams->max_vector_length()>1));
1721 // Need to accumulate integrals?
1722 dg_xxxx->registry()->accumulate_targets(cparams->accumulate_targets());
1723
1724 std::string _label;
1725 // k=0
1726 if (task == 0) {
1727 typedef R12kG12_11_11_sq int_type;
1728 typedef R12kG12 oper_type;
1729 typedef oper_type::Descriptor oper_descr;
1730 oper_type oper(oper_descr(0));
1731 #if LIBINT_CONTRACTED_INTS
1732 oper.descr().contract();
1733 #endif
1734 SafePtr<int_type> abcd = int_type::Instance(*shells[la],*shells[lb],*shells[lc],*shells[ld],0u,oper);
1735 os << "building " << abcd->description() << endl;
1736 SafePtr<DGVertex> abcd_ptr = dynamic_pointer_cast<DGVertex,int_type>(abcd);
1737 dg_xxxx->append_target(abcd_ptr);
1738 _label = abcd_ptr->label();
1739 }
1740
1741 // [G12,[T1,G12]]
1742 if (task == 1) {
1743 typedef G12TiG12_11_11_sq int_type;
1744 typedef int_type::OperType oper_type;
1745 typedef oper_type::Descriptor oper_descr;
1746 oper_type oper(oper_descr(0)); // doesn't matter whether T1 or T2 here
1747 #if LIBINT_CONTRACTED_INTS
1748 oper.descr().contract();
1749 #endif
1750 SafePtr<int_type> abcd = int_type::Instance(*shells[la],*shells[lb],*shells[lc],*shells[ld],0u,oper);
1751 os << "building " << abcd->description() << endl;
1752 SafePtr<DGVertex> abcd_ptr = dynamic_pointer_cast<DGVertex,int_type>(abcd);
1753 dg_xxxx->append_target(abcd_ptr);
1754 _label = abcd_ptr->label();
1755 }
1756
1757 std::string prefix(cparams->source_directory());
1758 std::string label(cparams->api_prefix() + _label);
1759 std::deque<std::string> decl_filenames;
1760 std::deque<std::string> def_filenames;
1761
1762 // this will generate code for this targets, and potentially generate code for its prerequisites
1763 GenerateCode(dg_xxxx, context, cparams, strat, tactic, memman,
1764 decl_filenames, def_filenames,
1765 prefix, label, false);
1766
1767 // update max stack size
1768 const SafePtr<TaskParameters>& tparams = taskmgr.current().params();
1769 tparams->max_stack_size(max_am, memman->max_memory_used());
1770 tparams->max_ntarget(1);
1771
1772 ostringstream oss;
1773 oss << context->label_to_name(cparams->api_prefix()) << "libint2_build_" << task_names[task]
1774 << "[" << la << "][" << lb << "][" << lc << "]["
1775 << ld <<"] = " << context->label_to_name(label_to_funcname(label))
1776 << context->end_of_stat() << endl;
1777 iface->to_static_init(oss.str());
1778
1779 // need to declare this function internally
1780 for(std::deque<std::string>::const_iterator i=decl_filenames.begin();
1781 i != decl_filenames.end();
1782 ++i) {
1783 oss.str("");
1784 oss << "#include <" << *i << ">" << endl;
1785 iface->to_int_iface(oss.str());
1786 }
1787
1788 #if DEBUG
1789 os << "Max memory used = " << memman->max_memory_used() << endl;
1790 #endif
1791 dg_xxxx->reset();
1792 memman->reset();
1793 } // end of d loop
1794 } // end of c loop
1795 } // end of b loop
1796 } // end of a loop
1797 } // end of task loop
1798 }
1799
1800 #endif // INCLUDE_G12
1801
1802 #ifdef INCLUDE_G12DKH
1803 void
build_G12DKH_2b_2k(std::ostream & os,const SafePtr<CompilationParameters> & cparams,SafePtr<Libint2Iface> & iface)1804 build_G12DKH_2b_2k(std::ostream& os, const SafePtr<CompilationParameters>& cparams,
1805 SafePtr<Libint2Iface>& iface)
1806 {
1807 const std::string task("g12dkh");
1808 vector<CGShell*> shells;
1809 unsigned int lmax = cparams->max_am(task);
1810 for(int l=0; l<=lmax; l++) {
1811 shells.push_back(new CGShell(l));
1812 }
1813 ImplicitDimensions::set_default_dims(cparams);
1814
1815 LibraryTaskManager& taskmgr = LibraryTaskManager::Instance();
1816 taskmgr.current(task);
1817 iface->to_params(iface->macro_define("MAX_AM_G12DKH",lmax));
1818
1819 //
1820 // Construct graphs for each desired target integral and
1821 // 1) generate source code for the found traversal path
1822 // 2) extract all remaining unresolved recurrence relations and
1823 // append them to the stack. Such unresolved RRs are RRs applied
1824 // to sets of integrals (rather than to individual integrals).
1825 // 3) at the end, for each unresolved recurrence relation generate
1826 // explicit source code
1827 //
1828 SafePtr<DirectedGraph> dg_xxxx(new DirectedGraph);
1829 SafePtr<Strategy> strat(new Strategy);
1830 SafePtr<Tactic> tactic(new FirstChoiceTactic<DummyRandomizePolicy>);
1831 for(int la=0; la<=lmax; la++) {
1832 for(int lb=0; lb<=lmax; lb++) {
1833 for(int lc=0; lc<=lmax; lc++) {
1834 for(int ld=0; ld<=lmax; ld++) {
1835
1836 if (la < lb || lc < ld || la+lb > lc+ld)
1837 continue;
1838 bool ssss = false;
1839 if (la+lb+lc+ld == 0)
1840 ssss = true;
1841
1842 #if STUDY_MEMORY_USAGE
1843 const int lim = 5;
1844 if (! (la == lim && lb == lim && lc == lim && ld == lim) )
1845 continue;
1846 #endif
1847
1848 // unroll only if max_am <= cparams->max_am_opt(task)
1849 using std::max;
1850 const unsigned int max_am = max(max(la,lb),max(lc,ld));
1851 const bool need_to_optimize = (max_am <= cparams->max_am_opt(task));
1852 const unsigned int unroll_threshold = need_to_optimize ? cparams->unroll_threshold() : 0;
1853 dg_xxxx->registry()->unroll_threshold(unroll_threshold);
1854 dg_xxxx->registry()->do_cse(need_to_optimize);
1855 dg_xxxx->registry()->condense_expr(condense_expr(cparams->unroll_threshold(),cparams->max_vector_length()>1));
1856 // Need to accumulate integrals?
1857 dg_xxxx->registry()->accumulate_targets(cparams->accumulate_targets());
1858
1859 // k=0
1860 if (!ssss) {
1861 typedef R12kG12_11_11_sq int_type;
1862 typedef R12kG12 oper_type;
1863 typedef oper_type::Descriptor oper_descr;
1864 SafePtr<int_type> abcd = int_type::Instance(*shells[la],*shells[lb],*shells[lc],*shells[ld],0u,oper_type(oper_descr(0)));
1865 os << "building " << abcd->description() << endl;
1866 SafePtr<DGVertex> abcd_ptr = dynamic_pointer_cast<DGVertex,int_type>(abcd);
1867 dg_xxxx->append_target(abcd_ptr);
1868 }
1869 // k=2
1870 if (!ssss) {
1871 typedef R12kG12_11_11_sq int_type;
1872 typedef R12kG12 oper_type;
1873 typedef oper_type::Descriptor oper_descr;
1874 SafePtr<int_type> abcd = int_type::Instance(*shells[la],*shells[lb],*shells[lc],*shells[ld],0u,oper_type(oper_descr(2)));
1875 os << "building " << abcd->description() << endl;
1876 SafePtr<DGVertex> abcd_ptr = dynamic_pointer_cast<DGVertex,int_type>(abcd);
1877 dg_xxxx->append_target(abcd_ptr);
1878 }
1879 // k=4
1880 if (!ssss) {
1881 typedef R12kG12_11_11_sq int_type;
1882 typedef R12kG12 oper_type;
1883 typedef oper_type::Descriptor oper_descr;
1884 SafePtr<int_type> abcd = int_type::Instance(*shells[la],*shells[lb],*shells[lc],*shells[ld],0u,oper_type(oper_descr(4)));
1885 os << "building " << abcd->description() << endl;
1886 SafePtr<DGVertex> abcd_ptr = dynamic_pointer_cast<DGVertex,int_type>(abcd);
1887 dg_xxxx->append_target(abcd_ptr);
1888 }
1889 // (G12prime.Div1)^2
1890 if (true) {
1891 typedef DivG12prime_xTx_11_11_sq int_type;
1892 typedef int_type::OperType oper_type;
1893 typedef oper_type::Descriptor oper_descr;
1894 SafePtr<int_type> abcd = int_type::Instance(*shells[la],*shells[lb],*shells[lc],*shells[ld],0u,oper_type(oper_descr(0)));
1895 os << "building " << abcd->description() << endl;
1896 SafePtr<DGVertex> abcd_ptr = dynamic_pointer_cast<DGVertex,int_type>(abcd);
1897 dg_xxxx->append_target(abcd_ptr);
1898 }
1899 // (G12prime.Div2)^2
1900 if (true) {
1901 typedef DivG12prime_xTx_11_11_sq int_type;
1902 typedef int_type::OperType oper_type;
1903 typedef oper_type::Descriptor oper_descr;
1904 SafePtr<int_type> abcd = int_type::Instance(*shells[la],*shells[lb],*shells[lc],*shells[ld],0u,oper_type(oper_descr(1)));
1905 os << "building " << abcd->description() << endl;
1906 SafePtr<DGVertex> abcd_ptr = dynamic_pointer_cast<DGVertex,int_type>(abcd);
1907 dg_xxxx->append_target(abcd_ptr);
1908 }
1909
1910 SafePtr<CodeContext> context(new CppCodeContext(cparams));
1911 SafePtr<MemoryManager> memman(new WorstFitMemoryManager());
1912 dg_xxxx->apply(strat,tactic);
1913 dg_xxxx->optimize_rr_out(context);
1914 dg_xxxx->traverse();
1915 #if DEBUG
1916 os << "The number of vertices = " << dg_xxxx->num_vertices() << endl;
1917 #endif
1918
1919 std::string label;
1920 {
1921 ostringstream os;
1922 os << "(" << shells[la]->label() << " "
1923 << shells[lb]->label()
1924 << " | r_{12}^(0,2,4) * exp(-g*r_{12}^2) | "
1925 << shells[lc]->label() << " "
1926 << shells[ld]->label() << ")";
1927 label = os.str();
1928 }
1929 std::string prefix(cparams->source_directory());
1930 std::string decl_filename(prefix + context->label_to_name(label)); decl_filename += ".h";
1931 std::string src_filename(prefix + context->label_to_name(label)); src_filename += ".cc";
1932 std::basic_ofstream<char> declfile(decl_filename.c_str());
1933 std::basic_ofstream<char> srcfile(src_filename.c_str());
1934 dg_xxxx->generate_code(context,memman,ImplicitDimensions::default_dims(),SafePtr<CodeSymbols>(new CodeSymbols),label,declfile,srcfile);
1935
1936 // update max stack size
1937 const SafePtr<TaskParameters>& tparams = taskmgr.current().params();
1938 tparams->max_stack_size(max_am, memman->max_memory_used());
1939 tparams->max_ntarget(3);
1940
1941 ostringstream oss;
1942 oss << context->label_to_name(cparams->api_prefix()) << "libint2_build_g12dkh[" << la << "][" << lb << "][" << lc << "]["
1943 << ld <<"] = " << context->label_to_name(label_to_funcname(label))
1944 << context->end_of_stat() << endl;
1945 iface->to_static_init(oss.str());
1946
1947 oss.str("");
1948 oss << "#include <" << decl_filename << ">" << endl;
1949 iface->to_int_iface(oss.str());
1950
1951 // For the most expensive (i.e. presumably complete) graph extract all precomputed quantities -- these will be members of the evaluator structure
1952 // also extract all RRs -- need to keep track of these to figure out which external symbols appearing in RR code belong to this task also
1953 if (la == lmax &&
1954 lb == lmax &&
1955 lc == lmax &&
1956 ld == lmax)
1957 extract_symbols(dg_xxxx);
1958
1959 #if DEBUG
1960 os << "Max memory used = " << memman->max_memory_used() << endl;
1961 #endif
1962 dg_xxxx->reset();
1963 declfile.close();
1964 srcfile.close();
1965 } // end of d loop
1966 } // end of c loop
1967 } // end of b loop
1968 } // end of a loop
1969 }
1970
1971 #endif // INCLUDE_G12DKH
1972
1973 void
config_to_api(const SafePtr<CompilationParameters> & cparams,SafePtr<Libint2Iface> & iface)1974 config_to_api(const SafePtr<CompilationParameters>& cparams, SafePtr<Libint2Iface>& iface)
1975 {
1976 int max_deriv_order = 0;
1977 #ifdef INCLUDE_ONEBODY
1978 iface->to_params(iface->macro_define("SUPPORT_ONEBODY",1));
1979 iface->to_params(iface->macro_define("DERIV_ONEBODY_ORDER",INCLUDE_ONEBODY));
1980 #ifdef DISABLE_ONEBODY_PROPERTY_DERIVS
1981 iface->to_params(iface->macro_define("DERIV_ONEBODY_PROPERTY_ORDER",0));
1982 #else
1983 iface->to_params(iface->macro_define("DERIV_ONEBODY_PROPERTY_ORDER",INCLUDE_ONEBODY));
1984 #endif
1985 max_deriv_order = std::max(max_deriv_order,INCLUDE_ONEBODY);
1986 #endif
1987 #ifdef INCLUDE_ERI
1988 iface->to_params(iface->macro_define("SUPPORT_ERI",1));
1989 iface->to_params(iface->macro_define("DERIV_ERI_ORDER",INCLUDE_ERI));
1990 max_deriv_order = std::max(max_deriv_order,INCLUDE_ERI);
1991 #endif
1992 #ifdef INCLUDE_ERI3
1993 iface->to_params(iface->macro_define("SUPPORT_ERI3",1));
1994 iface->to_params(iface->macro_define("DERIV_ERI3_ORDER",INCLUDE_ERI3));
1995 max_deriv_order = std::max(max_deriv_order,INCLUDE_ERI3);
1996 #endif
1997 #ifdef INCLUDE_ERI2
1998 iface->to_params(iface->macro_define("SUPPORT_ERI2",1));
1999 iface->to_params(iface->macro_define("DERIV_ERI2_ORDER",INCLUDE_ERI2));
2000 max_deriv_order = std::max(max_deriv_order,INCLUDE_ERI2);
2001 #endif
2002 #ifdef INCLUDE_G12
2003 iface->to_params(iface->macro_define("SUPPORT_G12",1));
2004 iface->to_params(iface->macro_define("DERIV_G12_ORDER",INCLUDE_G12));
2005 max_deriv_order = std::max(max_deriv_order,INCLUDE_G12);
2006 #endif
2007 #ifdef INCLUDE_G12DKH
2008 iface->to_params(iface->macro_define("SUPPORT_G12DKH",1));
2009 iface->to_params(iface->macro_define("DERIV_G12DKH_ORDER",INCLUDE_G12DKH));
2010 max_deriv_order = std::max(max_deriv_order,INCLUDE_G12DKH);
2011 #endif
2012 iface->to_params(iface->macro_define("MAX_DERIV_ORDER",max_deriv_order));
2013
2014 // this is only needed for preprocessor-based generic processing of all generated tasks
2015 // declare all tasks in a range of valid tasks as defined or not
2016 LibraryTaskManager& taskmgr = LibraryTaskManager::Instance();
2017 // the range is defined by max # of centers, max deriv order, and operator set
2018 const size_t max_ncenter = 4;
2019 for(unsigned int ncenter=0; ncenter<=max_ncenter; ++ncenter) {
2020
2021 std::stringstream oss;
2022 oss << ncenter;
2023
2024 for(unsigned int d=0; d<=max_deriv_order; ++d) {
2025 std::string abbrv_label, full_label;
2026
2027 { // 1-body ints
2028 std::string ncenter_str = oss.str();
2029 std::string ncenter_str_abbrv = ncenter == 2 ? std::string("") : oss.str();
2030 #define BOOST_PP_MCR1(r,data,elem) \
2031 abbrv_label = task_label(ncenter_str_abbrv + BOOST_PP_STRINGIZE(elem),d); \
2032 full_label = task_label(ncenter_str + BOOST_PP_STRINGIZE(elem),d); \
2033 iface->to_params(iface->macro_define(std::string("TASK_EXISTS_") + full_label,taskmgr.exists(abbrv_label) ? 1 : 0));
2034
2035 BOOST_PP_LIST_FOR_EACH ( BOOST_PP_MCR1, _, BOOST_PP_ONEBODY_TASK_LIST)
2036 #undef BOOST_PP_MCR1
2037 }
2038
2039 { // 2-body ints
2040
2041 #define BOOST_PP_TWOBODY_TASKOPER_TUPLE ("eri", \
2042 "r12kg12", \
2043 "r12_0_g12", \
2044 "r12_2_g12", \
2045 "g12_T1_g12", \
2046 "g12dkh" \
2047 )
2048 #define BOOST_PP_TWOBODY_TASKOPER_LIST BOOST_PP_TUPLE_TO_LIST( BOOST_PP_TWOBODY_TASKOPER_TUPLE )
2049
2050 std::string ncenter_str = oss.str();
2051 std::string ncenter_str_abbrv = ncenter == 4 ? std::string("") : oss.str();
2052 #define BOOST_PP_MCR1(r,data,elem) \
2053 abbrv_label = task_label(ncenter_str_abbrv + elem,d); \
2054 full_label = task_label(ncenter_str + elem,d); \
2055 iface->to_params(iface->macro_define(std::string("TASK_EXISTS_") + full_label,taskmgr.exists(abbrv_label) ? 1 : 0));
2056
2057 BOOST_PP_LIST_FOR_EACH ( BOOST_PP_MCR1, _, BOOST_PP_TWOBODY_TASKOPER_LIST)
2058 #undef BOOST_PP_MCR1
2059 }
2060 }
2061 }
2062
2063 }
2064
2065