1 //
2 // BAGEL - Brilliantly Advanced General Electronic Structure Library
3 // Filename: asd_dmrg/block_ops_2.cc
4 // Copyright (C) 2014 Toru Shiozaki
5 //
6 // Author: Shane Parker <shane.parker@u.northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
23 //
24 
25 #include <src/asd/dmrg/block_operators.h>
26 #include <src/asd/dmrg/dmrg_block.h>
27 #include <src/util/prim_op.h>
28 
29 using namespace std;
30 using namespace bagel;
31 
BlockOperators2(shared_ptr<const DMRG_Block2> blocks,shared_ptr<DimerJop> jop,const double thresh)32 BlockOperators2::BlockOperators2(shared_ptr<const DMRG_Block2> blocks, shared_ptr<DimerJop> jop, const double thresh) : BlockOperators(thresh), blocks_(blocks), jop_(jop) {
33   const int norb = jop->nocc();
34   Matrix mo1e = *jop->mo1e()->matrix();
35   const btas::TensorView4<double> mo2e = btas::make_view(btas::CRange<4>(norb, norb, norb, norb), jop->mo2e()->storage());
36 
37   const int rasnorb = norb - (blocks->left_block()->norb() + blocks->right_block()->norb());
38   const int lnorb = blocks->left_block()->norb();
39   const int rnorb = blocks->right_block()->norb();
40 
41   { // make left jop to be used for left BlockOperators1
42     const int leftsize = rasnorb+lnorb;
43     auto left1e = make_shared<CSymMatrix>(mo1e.get_submatrix(0, 0, leftsize, leftsize));
44     auto left2e = make_shared<Matrix>(leftsize*leftsize, leftsize*leftsize);
45     auto low = {0, 0, 0, 0};
46     auto up = {leftsize, leftsize, leftsize, leftsize};
47     const btas::TensorView4<double> mo2eslice = btas::make_view(mo2e.range().slice(low, up), mo2e.storage());
48     copy(mo2eslice.begin(), mo2eslice.end(), left2e->begin());
49     auto leftjop = make_shared<DimerJop>(rasnorb, lnorb, left1e, left2e);
50     left_ops_ = make_shared<BlockOperators1>(blocks->left_block(), leftjop);
51   }
52 
53   { // make double block one to be used for intra BlockOperators1
54     const int blocksize = lnorb + rnorb;
55     auto block1e = make_shared<CSymMatrix>(mo1e.get_submatrix(rasnorb, rasnorb, blocksize, blocksize));
56     auto block2e = make_shared<Matrix>(blocksize*blocksize, blocksize*blocksize);
57     auto low = {rasnorb, rasnorb, rasnorb, rasnorb};
58     auto up = {norb, norb, norb, norb};
59     const btas::TensorView4<double> mo2eslice = btas::make_view(mo2e.range().slice(low, up), mo2e.storage());
60     copy(mo2eslice.begin(), mo2eslice.end(), block2e->begin());
61     auto intrajop = make_shared<DimerJop>(lnorb, rnorb, block1e, block2e);
62     intra_ops_ = make_shared<BlockOperators1>(blocks->right_block(), intrajop);
63   }
64 
65   { // make right jop to be used for right BlockOperators1
66     const int rightsize = rasnorb + rnorb;
67     auto right1efull = mo1e.get_submatrix(0, 0, rightsize, rightsize); // keeps the rasnorb x rasnorb terms in there
68     right1efull->copy_block(0, rasnorb, rasnorb, rnorb, mo1e.get_submatrix(0, rasnorb+lnorb, rasnorb, rnorb));
69     right1efull->copy_block(rasnorb, 0, rnorb, rasnorb, mo1e.get_submatrix(rasnorb+lnorb, 0, rnorb, rasnorb));
70     right1efull->copy_block(rasnorb, rasnorb, rnorb, rnorb, mo1e.get_submatrix(rasnorb+lnorb, rasnorb+lnorb, rnorb, rnorb));
71     auto right1e = make_shared<CSymMatrix>(right1efull);
72 
73     auto right2e = make_shared<Matrix>(rightsize*rightsize, rightsize*rightsize);
74     btas::TensorView4<double> right2eView = btas::make_rwview(btas::CRange<4>(rightsize, rightsize, rightsize, rightsize), right2e->storage());
75     for (int i = 0; i < rightsize; ++i) {
76       const int ii = i < rasnorb ? i : i + lnorb;
77       for (int j = 0; j < rightsize; ++j) {
78         const int jj = j < rasnorb ? j : j + lnorb;
79         for (int k = 0; k < rightsize; ++k) {
80           const int kk = k < rasnorb ? k : k + lnorb;
81           for (int l = 0; l < rightsize; ++l) {
82             const int ll = l < rasnorb ? l : l + lnorb;
83             right2eView(l, k, j, i) = mo2e(ll, kk, jj, ii);
84           }
85         }
86       }
87     }
88     auto rightjop = make_shared<DimerJop>(rasnorb, rnorb, right1e, right2e);
89     right_ops_ = make_shared<BlockOperators1>(blocks->right_block(), rightjop);
90   }
91 }
92 
93 // TODO: use sparsity
gamma_a(const BlockKey bk,int i) const94 shared_ptr<BlockSparseMatrix> BlockOperators2::gamma_a(const BlockKey bk, int i) const {
95   const int lnorb = blocks_->left_block()->norb();
96   const BlockInfo binfo = blocks_->blockinfo(bk);
97   const BlockKey target_bk(bk.nelea+1, bk.neleb);
98   assert(blocks_->contains(target_bk));
99   const BlockInfo tinfo = blocks_->blockinfo(target_bk);
100 
101   auto out = make_shared<Matrix>(tinfo.nstates, binfo.nstates, true);
102 
103   const vector<DMRG::BlockPair>& target_pairs = blocks_->blockpairs(target_bk);
104   if (i < lnorb) { // i in L block
105     for (auto& spair : blocks_->blockpairs(bk)) {
106       BlockInfo rblock = spair.right;
107       BlockInfo lsource = spair.left;
108 
109       auto iter = find_if(target_pairs.begin(), target_pairs.end(), [&rblock, &lsource] (DMRG::BlockPair bp)
110         { return make_pair(BlockKey(lsource.nelea+1, lsource.neleb), rblock.key())==make_pair(bp.left.key(), bp.right.key()); } );
111       if (iter != target_pairs.end()) {
112         Matrix Runit(rblock.nstates, rblock.nstates); Runit.unit();
113         Matrix Lgamma = *left_ops_->gamma_a_as_matrix(lsource.key(), i);
114 
115         out->add_block(1.0, iter->offset, spair.offset, iter->nstates(), spair.nstates(), kronecker_product(false, Runit, false, Lgamma));
116       }
117     }
118   }
119   else {
120     for (auto& spair : blocks_->blockpairs(bk)) {
121       BlockInfo lblock = spair.left;
122       BlockInfo rsource = spair.right;
123 
124       const int phase = 1 - (((lblock.nelea + lblock.neleb)%2) << 1);
125 
126       auto iter = find_if(target_pairs.begin(), target_pairs.end(), [&lblock, &rsource] (DMRG::BlockPair bp)
127         { return make_pair(lblock.key(), BlockKey(rsource.nelea+1, rsource.neleb))==make_pair(bp.left.key(), bp.right.key()); } );
128       if (iter != target_pairs.end()) {
129         Matrix Lunit(lblock.nstates, lblock.nstates); Lunit.unit();
130         Matrix Rgamma = *right_ops_->gamma_a_as_matrix(rsource.key(), i - lnorb);
131 
132         out->add_block(phase, iter->offset, spair.offset, iter->nstates(), spair.nstates(), kronecker_product(false, Rgamma, false, Lunit));
133       }
134     }
135   }
136 
137   return make_shared<BlockSparseMatrix>(out);
138 }
139 
140 // TODO: use sparsity
gamma_b(const BlockKey bk,int i) const141 shared_ptr<BlockSparseMatrix> BlockOperators2::gamma_b(const BlockKey bk, int i) const {
142   const int lnorb = blocks_->left_block()->norb();
143   const BlockInfo binfo = blocks_->blockinfo(bk);
144   const BlockKey target_bk(bk.nelea, bk.neleb+1);
145   assert(blocks_->contains(target_bk));
146   const BlockInfo tinfo = blocks_->blockinfo(target_bk);
147 
148   auto out = make_shared<Matrix>(tinfo.nstates, binfo.nstates, true);
149 
150   const vector<DMRG::BlockPair>& target_pairs = blocks_->blockpairs(target_bk);
151   if (i < lnorb) { // i in L block
152     for (auto& spair : blocks_->blockpairs(bk)) {
153       BlockInfo rblock = spair.right;
154       BlockInfo lsource = spair.left;
155 
156       auto iter = find_if(target_pairs.begin(), target_pairs.end(), [&rblock, &lsource] (DMRG::BlockPair bp)
157         { return make_pair(BlockKey(lsource.nelea, lsource.neleb+1), rblock.key())==make_pair(bp.left.key(), bp.right.key()); } );
158       if (iter != target_pairs.end()) {
159         Matrix Runit(rblock.nstates, rblock.nstates); Runit.unit();
160         Matrix Lgamma = *left_ops_->gamma_b_as_matrix(lsource.key(), i);
161 
162         out->add_block(1.0, iter->offset, spair.offset, iter->nstates(), spair.nstates(), kronecker_product(false, Runit, false, Lgamma));
163       }
164     }
165   }
166   else {
167     for (auto& spair : blocks_->blockpairs(bk)) {
168       BlockInfo lblock = spair.left;
169       BlockInfo rsource = spair.right;
170 
171       const int phase = 1 - (((lblock.nelea + lblock.neleb)%2) << 1);
172 
173       auto iter = find_if(target_pairs.begin(), target_pairs.end(), [&lblock, &rsource] (DMRG::BlockPair bp)
174         { return make_pair(lblock.key(), BlockKey(rsource.nelea, rsource.neleb+1))==make_pair(bp.left.key(), bp.right.key()); } );
175       if (iter != target_pairs.end()) {
176         Matrix Lunit(lblock.nstates, lblock.nstates); Lunit.unit();
177         Matrix Rgamma = *right_ops_->gamma_b_as_matrix(rsource.key(), i - lnorb);
178 
179         out->add_block(phase, iter->offset, spair.offset, iter->nstates(), spair.nstates(), kronecker_product(false, Rgamma, false, Lunit));
180       }
181     }
182   }
183 
184   return make_shared<BlockSparseMatrix>(out);
185 }
186 
D_a(const BlockKey bk,int brastate,int ketstate,int i,int j,int k) const187 double BlockOperators2::D_a(const BlockKey bk, int brastate, int ketstate, int i, int j, int k) const {
188   const DMRG::BlockPair source_pair = *find_if(blocks_->blockpairs(bk).begin(), blocks_->blockpairs(bk).end(), [ketstate] (const DMRG::BlockPair bp)
189     { return (ketstate >= bp.offset && ketstate < bp.offset+bp.nstates()); } );
190 
191   const BlockKey target_key(bk.nelea+1, bk.neleb);
192   const DMRG::BlockPair target_pair = *find_if(blocks_->blockpairs(target_key).begin(), blocks_->blockpairs(target_key).end(), [brastate] (const DMRG::BlockPair bp)
193     { return (brastate >= bp.offset && brastate < bp.offset+bp.nstates()); } );
194 
195   const int left_ketstate = (ketstate - source_pair.offset) % source_pair.left.nstates;
196   const int right_ketstate = (ketstate - source_pair.offset) / source_pair.left.nstates;
197 
198   const int left_brastate = (brastate - target_pair.offset) % target_pair.left.nstates;
199   const int right_brastate = (brastate - target_pair.offset) / target_pair.left.nstates;
200 
201   double out = 0.0;
202 
203   if (source_pair.left.key() == target_pair.left.key()) {
204     if (left_ketstate==left_brastate)
205       out += right_ops_->D_a(source_pair.right.key(), right_brastate, right_ketstate, i, j, k);
206   }
207   else if (source_pair.right.key() == target_pair.right.key()) {
208     if (right_ketstate==right_brastate)
209       out += left_ops_->D_a(source_pair.left.key(), left_brastate, left_ketstate, i, j, k);
210   }
211   return out;
212 }
213 
D_b(const BlockKey bk,int brastate,int ketstate,int i,int j,int k) const214 double BlockOperators2::D_b(const BlockKey bk, int brastate, int ketstate, int i, int j, int k) const {
215   const DMRG::BlockPair source_pair = *find_if(blocks_->blockpairs(bk).begin(), blocks_->blockpairs(bk).end(), [ketstate] (const DMRG::BlockPair bp)
216     { return (ketstate >= bp.offset && ketstate < bp.offset+bp.nstates()); } );
217 
218   const BlockKey target_key(bk.nelea, bk.neleb+1);
219   const DMRG::BlockPair target_pair = *find_if(blocks_->blockpairs(target_key).begin(), blocks_->blockpairs(target_key).end(), [brastate] (const DMRG::BlockPair bp)
220     { return (brastate >= bp.offset && brastate < bp.offset+bp.nstates()); } );
221 
222   const int left_ketstate = (ketstate - source_pair.offset) % source_pair.left.nstates;
223   const int right_ketstate = (ketstate - source_pair.offset) / source_pair.left.nstates;
224 
225   const int left_brastate = (brastate - target_pair.offset) % target_pair.left.nstates;
226   const int right_brastate = (brastate - target_pair.offset) / target_pair.left.nstates;
227 
228   double out = 0.0;
229 
230   if (source_pair.left.key() == target_pair.left.key()) {
231     if (left_ketstate==left_brastate)
232       out += right_ops_->D_b(source_pair.right.key(), right_brastate, right_ketstate, i, j, k);
233   }
234   else if (source_pair.right.key() == target_pair.right.key()) {
235     if (right_ketstate==right_brastate)
236       out += left_ops_->D_b(source_pair.left.key(), left_brastate, left_ketstate, i, j, k);
237   }
238   return out;
239 }
240 
241 // all of the offdiagonal blocks get an extra factor of two so I can call H = 0.5 * (H + H^t) at the end
ham(const BlockKey bk) const242 shared_ptr<Matrix> BlockOperators2::ham(const BlockKey bk) const {
243   const vector<DMRG::BlockPair>& block_pairs = blocks_->blockpairs(bk);
244 
245   BlockInfo binfo = blocks_->blockinfo(bk);
246 
247   auto out = make_shared<Matrix>(binfo.nstates, binfo.nstates);
248 
249   const int lnorb = blocks_->left_block()->norb();
250 
251   for (auto& source_pair : block_pairs) {
252     { // diag parts
253       // H_A (x) I_B + I_A (x) H_B
254       shared_ptr<const Matrix> leftham = left_ops_->ham(source_pair.left);
255       Matrix leftunit = *leftham->clone(); leftunit.unit();
256       shared_ptr<const Matrix> rightham = right_ops_->ham(source_pair.right);
257       Matrix rightunit = *rightham->clone(); rightunit.unit();
258 
259       Matrix diag = kronecker_product(false, rightunit, false, *leftham) + kronecker_product(false, *rightham, false, leftunit);
260 
261       // Q_aa (x) A^t   A
262       const MatView Q_aa_view = intra_ops_->Q_aa_as_matview(source_pair.right.key());
263       const MatView gamma_aa_view(btas::make_view(btas::CRange<2>(source_pair.left.nstates*source_pair.left.nstates, lnorb*lnorb),
264                                blocks_->left_block()->coupling({GammaSQ::CreateAlpha, GammaSQ::AnnihilateAlpha}).at({source_pair.left.key(),source_pair.left.key()}).data->storage()), true);
265 
266       const MatView Q_bb_view = intra_ops_->Q_bb_as_matview(source_pair.right.key());
267       const MatView gamma_bb_view(btas::make_view(btas::CRange<2>(source_pair.left.nstates*source_pair.left.nstates, lnorb*lnorb),
268                                blocks_->left_block()->coupling({GammaSQ::CreateBeta, GammaSQ::AnnihilateBeta}).at({source_pair.left.key(),source_pair.left.key()}).data->storage()), true);
269 
270       Matrix ham_block = (gamma_aa_view ^ Q_aa_view) + (gamma_bb_view ^ Q_bb_view);
271 
272       sort_indices<0,2,1,3,1,1,1,1>(ham_block.data(), diag.data(), source_pair.left.nstates, source_pair.left.nstates, source_pair.right.nstates, source_pair.right.nstates);
273 
274       out->add_block(1.0, source_pair.offset, source_pair.offset, source_pair.nstates(), source_pair.nstates(), diag);
275     }
276 
277     { // Q_ab (x) A^t   B
278       const BlockKey right_target(source_pair.right.nelea-1, source_pair.right.neleb+1);
279       const BlockKey left_target(source_pair.left.nelea+1, source_pair.left.neleb-1);
280 
281       auto iter = find_if(block_pairs.begin(), block_pairs.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
282         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
283       );
284 
285       if (iter != block_pairs.end()) {
286         DMRG::BlockPair target_pair = *iter;
287 
288         const MatView Q_ab_view = intra_ops_->Q_ab_as_matview(source_pair.right.key());
289         const MatView gamma_view(btas::make_view(btas::CRange<2>(target_pair.left.nstates*source_pair.left.nstates, lnorb*lnorb),
290                                  blocks_->left_block()->coupling({GammaSQ::CreateBeta, GammaSQ::AnnihilateAlpha}).at({source_pair.left.key(),left_target}).data->storage()), true);
291 
292         // TODO maybe it would make more sense to reorder the block_ops in the first place?
293         Matrix Qab(Q_ab_view);
294         sort_indices<0,2,1,0,1,1,1>(Q_ab_view.data(), Qab.data(), Qab.ndim(), lnorb, lnorb);
295 
296         Matrix ham_block = gamma_view ^ Qab;
297 
298         Matrix tmp(target_pair.left.nstates*target_pair.right.nstates, source_pair.left.nstates*source_pair.right.nstates);
299         sort_indices<1,2,0,3,0,1,1,1>(ham_block.data(), tmp.data(), source_pair.left.nstates, target_pair.left.nstates, target_pair.right.nstates, source_pair.right.nstates);
300 
301         out->add_block(2.0, target_pair.offset, source_pair.offset, tmp.ndim(), tmp.mdim(), tmp);
302       }
303     }
304 
305     { // P_aa (x) A^t A^t
306       const BlockKey right_target(source_pair.right.nelea-2, source_pair.right.neleb);
307       const BlockKey left_target(source_pair.left.nelea+2, source_pair.left.neleb);
308 
309       auto iter = find_if(block_pairs.begin(), block_pairs.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
310         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
311       );
312 
313       if (iter != block_pairs.end()) {
314         DMRG::BlockPair target_pair = *iter;
315 
316         const MatView P_aa_view = intra_ops_->P_aa_as_matview(source_pair.right.key());
317         const MatView gamma_view(btas::make_view(btas::CRange<2>(target_pair.left.nstates*source_pair.left.nstates, lnorb*lnorb),
318                                  blocks_->left_block()->coupling({GammaSQ::AnnihilateAlpha, GammaSQ::AnnihilateAlpha}).at({source_pair.left.key(),left_target}).data->storage()), true);
319 
320         Matrix ham_block = gamma_view ^ P_aa_view;
321         Matrix tmp(target_pair.nstates(), source_pair.nstates());
322 
323         sort_indices<1,2,0,3,0,1,-1,1>(ham_block.data(), tmp.data() , source_pair.left.nstates, target_pair.left.nstates, target_pair.right.nstates, source_pair.right.nstates);
324         out->add_block(2.0, target_pair.offset, source_pair.offset, tmp.ndim(), tmp.mdim(), tmp);
325       }
326     }
327 
328     { // P_bb (x) B^t B^t
329       const BlockKey right_target(source_pair.right.nelea, source_pair.right.neleb-2);
330       const BlockKey left_target(source_pair.left.nelea, source_pair.left.neleb+2);
331 
332       auto iter = find_if(block_pairs.begin(), block_pairs.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
333         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
334       );
335 
336       if (iter != block_pairs.end()) {
337         DMRG::BlockPair target_pair = *iter;
338 
339         const MatView P_bb_view = intra_ops_->P_bb_as_matview(source_pair.right.key());
340         const MatView gamma_view(btas::make_view(btas::CRange<2>(target_pair.left.nstates*source_pair.left.nstates, lnorb*lnorb),
341                                  blocks_->left_block()->coupling({GammaSQ::AnnihilateBeta, GammaSQ::AnnihilateBeta}).at({source_pair.left.key(),left_target}).data->storage()), true);
342 
343         Matrix ham_block = P_bb_view ^ gamma_view;
344         Matrix tmp(target_pair.nstates(), source_pair.nstates());
345 
346         sort_indices<1,2,0,3,0,1,-1,1>(ham_block.data(), tmp.data() , source_pair.left.nstates, target_pair.left.nstates, target_pair.right.nstates, source_pair.right.nstates);
347         out->add_block(2.0, target_pair.offset, source_pair.offset, tmp.ndim(), tmp.mdim(), tmp);
348       }
349     }
350 
351     { // P_ab (x) A^t B^t
352       const BlockKey right_target(source_pair.right.nelea-1, source_pair.right.neleb-1);
353       const BlockKey left_target(source_pair.left.nelea+1, source_pair.left.neleb+1);
354 
355       auto iter = find_if(block_pairs.begin(), block_pairs.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
356         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
357       );
358 
359       if (iter != block_pairs.end()) {
360         DMRG::BlockPair target_pair = *iter;
361 
362         const MatView P_ab_view = intra_ops_->P_ab_as_matview(source_pair.right.key());
363         const MatView gamma_view(btas::make_view(btas::CRange<2>(target_pair.left.nstates*source_pair.left.nstates, lnorb*lnorb),
364                                  blocks_->left_block()->coupling({GammaSQ::AnnihilateBeta, GammaSQ::AnnihilateAlpha}).at({source_pair.left.key(),left_target}).data->storage()), true);
365 
366         Matrix Pab(P_ab_view);
367         sort_indices<0,2,1,0,1,1,1>(P_ab_view.data(), Pab.data(), Pab.ndim(), lnorb, lnorb);
368 
369         Matrix ham_block = gamma_view ^ Pab;
370 
371         Matrix tmp(target_pair.nstates(), source_pair.nstates());
372         sort_indices<1,2,0,3,0,1,1,1>(ham_block.data(), tmp.data(), source_pair.left.nstates, target_pair.left.nstates, target_pair.right.nstates, source_pair.right.nstates);
373 
374         out->add_block(2.0, target_pair.offset, source_pair.offset, tmp.ndim(), tmp.mdim(), tmp);
375       }
376     }
377 
378     { // S_a (x)   A   and    D_a (x)   A^t A A + B^t B A
379       const BlockKey left_target(source_pair.left.nelea-1, source_pair.left.neleb);
380       const BlockKey right_target(source_pair.right.nelea+1, source_pair.right.neleb);
381       auto iter = find_if(block_pairs.begin(), block_pairs.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
382         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
383       );
384 
385       if (iter != block_pairs.end()) {
386         DMRG::BlockPair target_pair = *iter;
387 
388         const MatView S_a_view = intra_ops_->S_a_as_matview(source_pair.right.key());
389         const MatView gamma_view(btas::make_view(btas::CRange<2>(target_pair.left.nstates*source_pair.left.nstates, lnorb),
390                                  blocks_->left_block()->coupling({GammaSQ::CreateAlpha}).at({source_pair.left.key(),left_target}).data->storage()), true);
391 
392 
393         const MatView D_a_view = intra_ops_->D_a_as_matview(source_pair.right.key());
394         shared_ptr<const btas::Tensor3<double>> gamma_aaa = blocks_->left_block()->coupling({GammaSQ::CreateAlpha, GammaSQ::CreateAlpha, GammaSQ::AnnihilateAlpha}).at({source_pair.left.key(),left_target}).data;
395         shared_ptr<const btas::Tensor3<double>> gamma_abb = blocks_->left_block()->coupling({GammaSQ::CreateAlpha, GammaSQ::CreateBeta, GammaSQ::AnnihilateBeta}).at({source_pair.left.key(),left_target}).data;
396         const MatView gamma_aaa_view(btas::make_view(btas::CRange<2>(target_pair.left.nstates*source_pair.left.nstates, lnorb*lnorb*lnorb), gamma_aaa->storage()), true);
397         const MatView gamma_abb_view(btas::make_view(btas::CRange<2>(target_pair.left.nstates*source_pair.left.nstates, lnorb*lnorb*lnorb), gamma_abb->storage()), true);
398 
399         Matrix gamma_akk(gamma_aaa_view + gamma_abb_view);
400 
401         Matrix Da(D_a_view);
402         sort_indices<0,3,2,1,0,1,1,1>(D_a_view.data(), Da.data(), Da.ndim(), lnorb, lnorb, lnorb);
403 
404         Matrix ham_block = (gamma_view ^ S_a_view) + (gamma_akk ^ Da);
405 
406         Matrix tmp(target_pair.nstates(), source_pair.nstates());
407         sort_indices<1,2,0,3,0,1,1,1>(ham_block.data(), tmp.data(), source_pair.left.nstates, target_pair.left.nstates, target_pair.right.nstates, source_pair.right.nstates);
408 
409         const double fac = -2.0 * static_cast<int>(1 - (((source_pair.left.nelea+source_pair.left.neleb)%2) << 1));
410         out->add_block(fac, target_pair.offset, source_pair.offset, tmp.ndim(), tmp.mdim(), tmp);
411       }
412     }
413 
414     { // S_b (x)   B   and    D_b (x)   B^t B B + A^t A B
415       const BlockKey left_target(source_pair.left.nelea, source_pair.left.neleb-1);
416       const BlockKey right_target(source_pair.right.nelea, source_pair.right.neleb+1);
417       auto iter = find_if(block_pairs.begin(), block_pairs.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
418         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
419       );
420 
421       if (iter != block_pairs.end()) {
422         DMRG::BlockPair target_pair = *iter;
423 
424         const MatView S_b_view = intra_ops_->S_b_as_matview(source_pair.right.key());
425         const MatView gamma_view(btas::make_view(btas::CRange<2>(target_pair.left.nstates*source_pair.left.nstates, lnorb),
426                                  blocks_->left_block()->coupling({GammaSQ::CreateBeta}).at({source_pair.left.key(),left_target}).data->storage()), true);
427 
428 
429         const MatView D_b_view = intra_ops_->D_b_as_matview(source_pair.right.key());
430         shared_ptr<const btas::Tensor3<double>> gamma_bbb = blocks_->left_block()->coupling({GammaSQ::CreateBeta, GammaSQ::CreateBeta, GammaSQ::AnnihilateBeta}).at({source_pair.left.key(),left_target}).data;
431         shared_ptr<const btas::Tensor3<double>> gamma_baa = blocks_->left_block()->coupling({GammaSQ::CreateBeta, GammaSQ::CreateAlpha, GammaSQ::AnnihilateAlpha}).at({source_pair.left.key(),left_target}).data;
432         const MatView gamma_bbb_view(btas::make_view(btas::CRange<2>(target_pair.left.nstates*source_pair.left.nstates, lnorb*lnorb*lnorb), gamma_bbb->storage()), true);
433         const MatView gamma_baa_view(btas::make_view(btas::CRange<2>(target_pair.left.nstates*source_pair.left.nstates, lnorb*lnorb*lnorb), gamma_baa->storage()), true);
434 
435         Matrix gamma_bkk(gamma_bbb_view + gamma_baa_view);
436 
437         Matrix Db(D_b_view);
438         sort_indices<0,3,2,1,0,1,1,1>(D_b_view.data(), Db.data(), Db.ndim(), lnorb, lnorb, lnorb);
439 
440         Matrix ham_block = (gamma_view ^ S_b_view) + (gamma_bkk ^ Db);
441 
442         Matrix tmp(target_pair.nstates(), source_pair.nstates());
443         sort_indices<1,2,0,3,0,1,1,1>(ham_block.data(), tmp.data(), source_pair.left.nstates, target_pair.left.nstates, target_pair.right.nstates, source_pair.right.nstates);
444 
445         const double fac = -2.0 * static_cast<int>(1 - (((source_pair.left.nelea+source_pair.left.neleb)%2) << 1));
446         out->add_block(fac, target_pair.offset, source_pair.offset, tmp.ndim(), tmp.mdim(), tmp);
447       }
448     }
449   }
450 
451   // because symmetrize is actually very slow right now
452   const size_t dim = out->ndim();
453   double* data = out->data();
454   for (size_t i = 0; i < dim; ++i)
455     for (size_t j = 0; j < i; ++j)
456       data[j + i*dim] = data[i + j*dim] = 0.5 * (data[j+i*dim] + data[i + j*dim]);
457 
458   return out;
459 }
460 
461 /*-----------------------------------------------------------------------------------------------------
462   All code below this point was generated by gen/blockops.py
463 -------------------------------------------------------------------------------------------------------*/
464 
S_a(BlockKey bk,const int i) const465 shared_ptr<BlockSparseMatrix> BlockOperators2::S_a(BlockKey bk, const int i) const {
466   BlockKey target_bk(bk.nelea+1,bk.neleb);
467   assert(blocks_->contains(target_bk));
468 
469   const vector<DMRG::BlockPair>& source_pvec = blocks_->blockpairs(bk);
470   const vector<DMRG::BlockPair>& target_pvec = blocks_->blockpairs(target_bk);
471 
472   const BlockInfo source_info = blocks_->blockinfo(bk);
473   const BlockInfo target_info = blocks_->blockinfo(target_bk);
474 
475   const int norb = jop_->nocc();
476   const int lnorb = blocks_->left_block()->norb();
477   const int rnorb = blocks_->right_block()->norb();
478   const int loffset = norb - (lnorb + rnorb); // convenience variable for offset of left orbitals from zero
479   const int roffset = norb - rnorb; // convenience variable for offset of right orbitals from zero
480 
481   const double* mo2e = jop_->mo2e()->data();
482 
483   const int max_L_M = max_element(blocks_->left_block()->blocks().begin(), blocks_->left_block()->blocks().end(),
484                                    [] (const BlockInfo& a, const BlockInfo& b)
485                                      { return a.nstates < b.nstates; })->nstates;
486   const int max_R_M = max_element(blocks_->right_block()->blocks().begin(), blocks_->right_block()->blocks().end(),
487                                    [] (const BlockInfo& a, const BlockInfo& b)
488                                      { return a.nstates < b.nstates; })->nstates;
489   const size_t max_L_intermediate = max_L_M * max_L_M * rnorb;
490   const size_t max_R_intermediate = max_R_M * max_R_M * lnorb;
491   const size_t max_intermediate = max(max_L_intermediate, max_R_intermediate);
492   unique_ptr<double[]> scratch(new double[max_intermediate]);
493 
494   const size_t max_coulomb_size = lnorb < rnorb ? rnorb*rnorb*lnorb : lnorb*lnorb*rnorb;
495   unique_ptr<double[]> coulomb(new double[max_coulomb_size]);
496   map<pair<size_t, size_t>, shared_ptr<Matrix>> out;
497 
498   for (auto& spair : source_pvec) {
499     // phase accumulated by moving an operator past the whole left ket block
500     const int left_phase = 1 - (((spair.left.nelea+spair.left.neleb)%2) << 1);
501 
502     { // - 1.0 <L'| A^t B |L> (x) <R'| B^t |R>
503       const BlockKey left_target(spair.left.nelea+1, spair.left.neleb-1);
504       const BlockKey right_target(spair.right.nelea, spair.right.neleb+1);
505 
506       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
507         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
508       );
509       if(iter!=target_pvec.end()) {
510         DMRG::BlockPair tpair = *iter;
511 
512         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
513 
514         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta, GammaSQ::AnnihilateAlpha}).at({spair.left.key(),tpair.left.key()}).data;
515         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta}).at({tpair.right.key(),spair.right.key()}).data;
516 
517         const int Lndim = Lgamma->extent(0);
518         const int Lmdim = Lgamma->extent(1);
519         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
520 
521         const int Rndim = Rgamma->extent(0);
522         const int Rmdim = Rgamma->extent(1);
523         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
524 
525         // fill in coulomb part
526         assert(max_coulomb_size >= rnorb*lnorb*lnorb);
527         fill_n(scratch.get(), Lsize * rnorb, 0.0);
528         for (int p = 0; p < rnorb; ++p) {
529           for (int a = 0; a < lnorb; ++a) {
530             for (int b = 0; b < lnorb; ++b) {
531               coulomb[a + b*lnorb + p*lnorb*lnorb] = mo2e[(i)+(p+roffset)*norb+(b+loffset)*norb*norb+(a+loffset)*norb*norb*norb];
532             }
533           }
534         }
535 
536         dgemm_("N", "N", Lsize, rnorb, lnorb*lnorb, -1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb*lnorb, 1.0, scratch.get(), Lsize);
537         for (int p = 0; p < rnorb; ++p) {
538           const double* ldata = scratch.get() + Lsize * p;
539           const double* rdata = Rgamma->data() + Rsize * p;
540           kronecker_product(left_phase, false, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
541         }
542 
543         // add to map if large enough
544         if (out_block->rms() > thresh_)
545           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
546       }
547     }
548 
549     { // + 1.0 <L'| B |L> (x) <R'| A^t B^t |R>
550       const BlockKey left_target(spair.left.nelea, spair.left.neleb-1);
551       const BlockKey right_target(spair.right.nelea+1, spair.right.neleb+1);
552 
553       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
554         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
555       );
556       if(iter!=target_pvec.end()) {
557         DMRG::BlockPair tpair = *iter;
558 
559         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
560 
561         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta}).at({spair.left.key(),tpair.left.key()}).data;
562         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::AnnihilateBeta, GammaSQ::AnnihilateAlpha}).at({spair.right.key(),tpair.right.key()}).data;
563 
564         const int Lndim = Lgamma->extent(0);
565         const int Lmdim = Lgamma->extent(1);
566         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
567 
568         const int Rndim = Rgamma->extent(0);
569         const int Rmdim = Rgamma->extent(1);
570         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
571 
572         // fill in coulomb part
573         assert(max_coulomb_size >= lnorb*rnorb*rnorb);
574         fill_n(scratch.get(), Rsize * lnorb, 0.0);
575         for (int a = 0; a < lnorb; ++a) {
576           for (int p = 0; p < rnorb; ++p) {
577             for (int q = 0; q < rnorb; ++q) {
578               coulomb[p + q*rnorb + a*rnorb*rnorb] = mo2e[(i)+(p+roffset)*norb+(q+roffset)*norb*norb+(a+loffset)*norb*norb*norb];
579             }
580           }
581         }
582 
583         dgemm_("N", "N", Rsize, lnorb, rnorb*rnorb, 1.0, Rgamma->data(), Rsize, coulomb.get(), rnorb*rnorb, 1.0, scratch.get(), Rsize);
584         for (int a = 0; a < lnorb; ++a) {
585           const double* ldata = Lgamma->data() + Lsize * a;
586           const double* rdata = scratch.get() + Rsize * a;
587           kronecker_product(1.0, true, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
588         }
589 
590         // add to map if large enough
591         if (out_block->rms() > thresh_)
592           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
593       }
594     }
595 
596     { // + 1.0 <L'| A |L> (x) <R'| A^t A^t |R>
597       const BlockKey left_target(spair.left.nelea-1, spair.left.neleb);
598       const BlockKey right_target(spair.right.nelea+2, spair.right.neleb);
599 
600       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
601         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
602       );
603       if(iter!=target_pvec.end()) {
604         DMRG::BlockPair tpair = *iter;
605 
606         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
607 
608         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateAlpha}).at({spair.left.key(),tpair.left.key()}).data;
609         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::AnnihilateAlpha, GammaSQ::AnnihilateAlpha}).at({spair.right.key(),tpair.right.key()}).data;
610 
611         const int Lndim = Lgamma->extent(0);
612         const int Lmdim = Lgamma->extent(1);
613         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
614 
615         const int Rndim = Rgamma->extent(0);
616         const int Rmdim = Rgamma->extent(1);
617         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
618 
619         // fill in coulomb part
620         assert(max_coulomb_size >= lnorb*rnorb*rnorb);
621         fill_n(scratch.get(), Rsize * lnorb, 0.0);
622         for (int a = 0; a < lnorb; ++a) {
623           for (int p = 0; p < rnorb; ++p) {
624             for (int q = 0; q < rnorb; ++q) {
625               coulomb[p + q*rnorb + a*rnorb*rnorb] = mo2e[(i)+(p+roffset)*norb+(q+roffset)*norb*norb+(a+loffset)*norb*norb*norb];
626             }
627           }
628         }
629 
630         dgemm_("N", "N", Rsize, lnorb, rnorb*rnorb, 1.0, Rgamma->data(), Rsize, coulomb.get(), rnorb*rnorb, 1.0, scratch.get(), Rsize);
631         for (int a = 0; a < lnorb; ++a) {
632           const double* ldata = Lgamma->data() + Lsize * a;
633           const double* rdata = scratch.get() + Rsize * a;
634           kronecker_product(1.0, true, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
635         }
636 
637         // add to map if large enough
638         if (out_block->rms() > thresh_)
639           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
640       }
641     }
642 
643     { // S_a (x) I  + 1.0 <L'| A^t |L> (x) <R'| A^t A |R> - 1.0 <L'| A^t |L> (x) <R'| A^t A |R> + 1.0 <L'| A^t |L> (x) <R'| B^t B |R>
644       const BlockKey left_target(spair.left.nelea+1, spair.left.neleb);
645       const BlockKey right_target = spair.right.key();
646 
647       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
648         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
649       );
650       if(iter!=target_pvec.end()) {
651         DMRG::BlockPair tpair = *iter;
652 
653         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
654 
655         // S_a (x) I
656         left_ops_->S_a_copy_to(scratch.get(), spair.left.key(), i);
657         kronecker_product_I_B(1.0, spair.right.nstates, false, tpair.left.nstates, spair.left.nstates, scratch.get(), tpair.left.nstates, out_block->data(), out_block->ndim());
658 
659         { // ["1.0 <L'| A^t |L> (x) <R'| A^t A |R>", "1.0 <L'| A^t |L> (x) <R'| B^t B |R>"]
660           shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateAlpha}).at({tpair.left.key(),spair.left.key()}).data;
661           shared_ptr<const btas::Tensor3<double>> Rgamma1 = blocks_->right_block()->coupling({GammaSQ::CreateAlpha, GammaSQ::AnnihilateAlpha}).at({tpair.right.key(),spair.right.key()}).data;
662           shared_ptr<const btas::Tensor3<double>> Rgamma2 = blocks_->right_block()->coupling({GammaSQ::CreateBeta, GammaSQ::AnnihilateBeta}).at({tpair.right.key(),spair.right.key()}).data;
663 
664           const int Lndim = Lgamma->extent(0);
665           const int Lmdim = Lgamma->extent(1);
666           const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
667 
668           const int Rndim = Rgamma1->extent(0);
669           const int Rmdim = Rgamma1->extent(1);
670           const int Rsize = Rgamma1->extent(0) * Rgamma1->extent(1);
671 
672           // fill in coulomb part
673           assert(max_coulomb_size >= lnorb*rnorb*rnorb);
674           fill_n(scratch.get(), Rsize * lnorb, 0.0);
675           for (int a = 0; a < lnorb; ++a) {
676             for (int p = 0; p < rnorb; ++p) {
677               for (int q = 0; q < rnorb; ++q) {
678                 coulomb[p + q*rnorb + a*rnorb*rnorb] = (mo2e[(i)+(p+roffset)*norb+(a+loffset)*norb*norb+(q+roffset)*norb*norb*norb] - mo2e[(i)+(a+loffset)*norb+(p+roffset)*norb*norb+(q+roffset)*norb*norb*norb]);
679               }
680             }
681           }
682 
683           dgemm_("N", "N", Rsize, lnorb, rnorb*rnorb, 1.0, Rgamma1->data(), Rsize, coulomb.get(), rnorb*rnorb, 1.0, scratch.get(), Rsize);
684           for (int a = 0; a < lnorb; ++a) {
685             for (int p = 0; p < rnorb; ++p) {
686               for (int q = 0; q < rnorb; ++q) {
687                 coulomb[p + q*rnorb + a*rnorb*rnorb] = mo2e[(i)+(p+roffset)*norb+(a+loffset)*norb*norb+(q+roffset)*norb*norb*norb];
688               }
689             }
690           }
691 
692           dgemm_("N", "N", Rsize, lnorb, rnorb*rnorb, 1.0, Rgamma2->data(), Rsize, coulomb.get(), rnorb*rnorb, 1.0, scratch.get(), Rsize);
693           for (int a = 0; a < lnorb; ++a) {
694             const double* ldata = Lgamma->data() + Lsize * a;
695             const double* rdata = scratch.get() + Rsize * a;
696             kronecker_product(1.0, false, Rndim, Rmdim, rdata, Rndim, false, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
697           }
698         }
699 
700         // add to map if large enough
701         if (out_block->rms() > thresh_)
702           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
703       }
704     }
705 
706     { // + 1.0 <L'| A^t A^t |L> (x) <R'| A |R>
707       const BlockKey left_target(spair.left.nelea+2, spair.left.neleb);
708       const BlockKey right_target(spair.right.nelea-1, spair.right.neleb);
709 
710       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
711         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
712       );
713       if(iter!=target_pvec.end()) {
714         DMRG::BlockPair tpair = *iter;
715 
716         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
717 
718         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::AnnihilateAlpha, GammaSQ::AnnihilateAlpha}).at({spair.left.key(),tpair.left.key()}).data;
719         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateAlpha}).at({spair.right.key(),tpair.right.key()}).data;
720 
721         const int Lndim = Lgamma->extent(0);
722         const int Lmdim = Lgamma->extent(1);
723         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
724 
725         const int Rndim = Rgamma->extent(0);
726         const int Rmdim = Rgamma->extent(1);
727         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
728 
729         // fill in coulomb part
730         assert(max_coulomb_size >= rnorb*lnorb*lnorb);
731         fill_n(scratch.get(), Lsize * rnorb, 0.0);
732         for (int p = 0; p < rnorb; ++p) {
733           for (int a = 0; a < lnorb; ++a) {
734             for (int b = 0; b < lnorb; ++b) {
735               coulomb[a + b*lnorb + p*lnorb*lnorb] = mo2e[(i)+(a+loffset)*norb+(b+loffset)*norb*norb+(p+roffset)*norb*norb*norb];
736             }
737           }
738         }
739 
740         dgemm_("N", "N", Lsize, rnorb, lnorb*lnorb, 1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb*lnorb, 1.0, scratch.get(), Lsize);
741         for (int p = 0; p < rnorb; ++p) {
742           const double* ldata = scratch.get() + Lsize * p;
743           const double* rdata = Rgamma->data() + Rsize * p;
744           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
745         }
746 
747         // add to map if large enough
748         if (out_block->rms() > thresh_)
749           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
750       }
751     }
752 
753     { // I (x) S_a - 1.0 <L'| A^t A |L> (x) <R'| A^t |R> + 1.0 <L'| A^t A |L> (x) <R'| A^t |R> + 1.0 <L'| B^t B |L> (x) <R'| A^t |R>
754       const BlockKey left_target = spair.left.key();
755       const BlockKey right_target(spair.right.nelea+1, spair.right.neleb);
756 
757       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
758         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
759       );
760       if(iter!=target_pvec.end()) {
761         DMRG::BlockPair tpair = *iter;
762 
763         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
764 
765         // I (x) S_a
766         right_ops_->S_a_copy_to(scratch.get(), spair.right.key(), i);
767         kronecker_product_A_I(left_phase, false, tpair.right.nstates, spair.right.nstates, scratch.get(), tpair.right.nstates, spair.left.nstates, out_block->data(), out_block->ndim());
768 
769         { // ["-1.0 <L'| A^t A |L> (x) <R'| A^t |R>", "1.0 <L'| B^t B |L> (x) <R'| A^t |R>"]
770           shared_ptr<const btas::Tensor3<double>> Lgamma1 = blocks_->left_block()->coupling({GammaSQ::CreateAlpha, GammaSQ::AnnihilateAlpha}).at({tpair.left.key(),spair.left.key()}).data;
771           shared_ptr<const btas::Tensor3<double>> Lgamma2 = blocks_->left_block()->coupling({GammaSQ::CreateBeta, GammaSQ::AnnihilateBeta}).at({tpair.left.key(),spair.left.key()}).data;
772           shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateAlpha}).at({tpair.right.key(),spair.right.key()}).data;
773 
774           const int Lndim = Lgamma1->extent(0);
775           const int Lmdim = Lgamma1->extent(1);
776           const int Lsize = Lgamma1->extent(0) * Lgamma1->extent(1);
777 
778           const int Rndim = Rgamma->extent(0);
779           const int Rmdim = Rgamma->extent(1);
780           const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
781 
782           // fill in coulomb part
783           assert(max_coulomb_size >= rnorb*lnorb*lnorb);
784           fill_n(scratch.get(), Lsize * rnorb, 0.0);
785           for (int p = 0; p < rnorb; ++p) {
786             for (int a = 0; a < lnorb; ++a) {
787               for (int b = 0; b < lnorb; ++b) {
788                 coulomb[a + b*lnorb + p*lnorb*lnorb] = (mo2e[(i)+(p+roffset)*norb+(a+loffset)*norb*norb+(b+loffset)*norb*norb*norb] - mo2e[(i)+(a+loffset)*norb+(p+roffset)*norb*norb+(b+loffset)*norb*norb*norb]);
789               }
790             }
791           }
792 
793           dgemm_("N", "N", Lsize, rnorb, lnorb*lnorb, -1.0, Lgamma1->data(), Lsize, coulomb.get(), lnorb*lnorb, 1.0, scratch.get(), Lsize);
794           for (int p = 0; p < rnorb; ++p) {
795             for (int a = 0; a < lnorb; ++a) {
796               for (int b = 0; b < lnorb; ++b) {
797                 coulomb[a + b*lnorb + p*lnorb*lnorb] = mo2e[(i)+(a+loffset)*norb+(p+roffset)*norb*norb+(b+loffset)*norb*norb*norb];
798               }
799             }
800           }
801 
802           dgemm_("N", "N", Lsize, rnorb, lnorb*lnorb, 1.0, Lgamma2->data(), Lsize, coulomb.get(), lnorb*lnorb, 1.0, scratch.get(), Lsize);
803           for (int p = 0; p < rnorb; ++p) {
804             const double* ldata = scratch.get() + Lsize * p;
805             const double* rdata = Rgamma->data() + Rsize * p;
806             kronecker_product(left_phase, false, Rndim, Rmdim, rdata, Rndim, false, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
807           }
808         }
809 
810         // add to map if large enough
811         if (out_block->rms() > thresh_)
812           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
813       }
814     }
815 
816     { // - 1.0 <L'| B^t |L> (x) <R'| A^t B |R>
817       const BlockKey left_target(spair.left.nelea, spair.left.neleb+1);
818       const BlockKey right_target(spair.right.nelea+1, spair.right.neleb-1);
819 
820       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
821         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
822       );
823       if(iter!=target_pvec.end()) {
824         DMRG::BlockPair tpair = *iter;
825 
826         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
827 
828         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta}).at({tpair.left.key(),spair.left.key()}).data;
829         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta, GammaSQ::AnnihilateAlpha}).at({spair.right.key(),tpair.right.key()}).data;
830 
831         const int Lndim = Lgamma->extent(0);
832         const int Lmdim = Lgamma->extent(1);
833         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
834 
835         const int Rndim = Rgamma->extent(0);
836         const int Rmdim = Rgamma->extent(1);
837         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
838 
839         // fill in coulomb part
840         assert(max_coulomb_size >= lnorb*rnorb*rnorb);
841         fill_n(scratch.get(), Rsize * lnorb, 0.0);
842         for (int a = 0; a < lnorb; ++a) {
843           for (int p = 0; p < rnorb; ++p) {
844             for (int q = 0; q < rnorb; ++q) {
845               coulomb[p + q*rnorb + a*rnorb*rnorb] = mo2e[(i)+(a+loffset)*norb+(q+roffset)*norb*norb+(p+roffset)*norb*norb*norb];
846             }
847           }
848         }
849 
850         dgemm_("N", "N", Rsize, lnorb, rnorb*rnorb, -1.0, Rgamma->data(), Rsize, coulomb.get(), rnorb*rnorb, 1.0, scratch.get(), Rsize);
851         for (int a = 0; a < lnorb; ++a) {
852           const double* ldata = Lgamma->data() + Lsize * a;
853           const double* rdata = scratch.get() + Rsize * a;
854           kronecker_product(1.0, true, Rndim, Rmdim, rdata, Rndim, false, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
855         }
856 
857         // add to map if large enough
858         if (out_block->rms() > thresh_)
859           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
860       }
861     }
862 
863     { // + 1.0 <L'| A^t B^t |L> (x) <R'| B |R>
864       const BlockKey left_target(spair.left.nelea+1, spair.left.neleb+1);
865       const BlockKey right_target(spair.right.nelea, spair.right.neleb-1);
866 
867       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
868         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
869       );
870       if(iter!=target_pvec.end()) {
871         DMRG::BlockPair tpair = *iter;
872 
873         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
874 
875         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::AnnihilateBeta, GammaSQ::AnnihilateAlpha}).at({spair.left.key(),tpair.left.key()}).data;
876         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta}).at({spair.right.key(),tpair.right.key()}).data;
877 
878         const int Lndim = Lgamma->extent(0);
879         const int Lmdim = Lgamma->extent(1);
880         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
881 
882         const int Rndim = Rgamma->extent(0);
883         const int Rmdim = Rgamma->extent(1);
884         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
885 
886         // fill in coulomb part
887         assert(max_coulomb_size >= rnorb*lnorb*lnorb);
888         fill_n(scratch.get(), Lsize * rnorb, 0.0);
889         for (int p = 0; p < rnorb; ++p) {
890           for (int a = 0; a < lnorb; ++a) {
891             for (int b = 0; b < lnorb; ++b) {
892               coulomb[a + b*lnorb + p*lnorb*lnorb] = mo2e[(i)+(a+loffset)*norb+(b+loffset)*norb*norb+(p+roffset)*norb*norb*norb];
893             }
894           }
895         }
896 
897         dgemm_("N", "N", Lsize, rnorb, lnorb*lnorb, 1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb*lnorb, 1.0, scratch.get(), Lsize);
898         for (int p = 0; p < rnorb; ++p) {
899           const double* ldata = scratch.get() + Lsize * p;
900           const double* rdata = Rgamma->data() + Rsize * p;
901           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
902         }
903 
904         // add to map if large enough
905         if (out_block->rms() > thresh_)
906           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
907       }
908     }
909   }
910 
911   return make_shared<BlockSparseMatrix>(target_info.nstates, source_info.nstates, out);
912 }
913 
914 
S_b(BlockKey bk,const int i) const915 shared_ptr<BlockSparseMatrix> BlockOperators2::S_b(BlockKey bk, const int i) const {
916   BlockKey target_bk(bk.nelea,bk.neleb+1);
917   assert(blocks_->contains(target_bk));
918 
919   const vector<DMRG::BlockPair>& source_pvec = blocks_->blockpairs(bk);
920   const vector<DMRG::BlockPair>& target_pvec = blocks_->blockpairs(target_bk);
921 
922   const BlockInfo source_info = blocks_->blockinfo(bk);
923   const BlockInfo target_info = blocks_->blockinfo(target_bk);
924 
925   const int norb = jop_->nocc();
926   const int lnorb = blocks_->left_block()->norb();
927   const int rnorb = blocks_->right_block()->norb();
928   const int loffset = norb - (lnorb + rnorb); // convenience variable for offset of left orbitals from zero
929   const int roffset = norb - rnorb; // convenience variable for offset of right orbitals from zero
930 
931   const double* mo2e = jop_->mo2e()->data();
932 
933   const int max_L_M = max_element(blocks_->left_block()->blocks().begin(), blocks_->left_block()->blocks().end(),
934                                    [] (const BlockInfo& a, const BlockInfo& b)
935                                      { return a.nstates < b.nstates; })->nstates;
936   const int max_R_M = max_element(blocks_->right_block()->blocks().begin(), blocks_->right_block()->blocks().end(),
937                                    [] (const BlockInfo& a, const BlockInfo& b)
938                                      { return a.nstates < b.nstates; })->nstates;
939   const size_t max_L_intermediate = max_L_M * max_L_M * rnorb;
940   const size_t max_R_intermediate = max_R_M * max_R_M * lnorb;
941   const size_t max_intermediate = max(max_L_intermediate, max_R_intermediate);
942   unique_ptr<double[]> scratch(new double[max_intermediate]);
943 
944   const size_t max_coulomb_size = lnorb < rnorb ? rnorb*rnorb*lnorb : lnorb*lnorb*rnorb;
945   unique_ptr<double[]> coulomb(new double[max_coulomb_size]);
946   map<pair<size_t, size_t>, shared_ptr<Matrix>> out;
947 
948   for (auto& spair : source_pvec) {
949     // phase accumulated by moving an operator past the whole left ket block
950     const int left_phase = 1 - (((spair.left.nelea+spair.left.neleb)%2) << 1);
951 
952     { // - 1.0 <L'| B^t A |L> (x) <R'| A^t |R>
953       const BlockKey left_target(spair.left.nelea-1, spair.left.neleb+1);
954       const BlockKey right_target(spair.right.nelea+1, spair.right.neleb);
955 
956       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
957         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
958       );
959       if(iter!=target_pvec.end()) {
960         DMRG::BlockPair tpair = *iter;
961 
962         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
963 
964         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta, GammaSQ::AnnihilateAlpha}).at({tpair.left.key(),spair.left.key()}).data;
965         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateAlpha}).at({tpair.right.key(),spair.right.key()}).data;
966 
967         const int Lndim = Lgamma->extent(0);
968         const int Lmdim = Lgamma->extent(1);
969         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
970 
971         const int Rndim = Rgamma->extent(0);
972         const int Rmdim = Rgamma->extent(1);
973         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
974 
975         // fill in coulomb part
976         assert(max_coulomb_size >= rnorb*lnorb*lnorb);
977         fill_n(scratch.get(), Lsize * rnorb, 0.0);
978         for (int p = 0; p < rnorb; ++p) {
979           for (int a = 0; a < lnorb; ++a) {
980             for (int b = 0; b < lnorb; ++b) {
981               coulomb[a + b*lnorb + p*lnorb*lnorb] = mo2e[(i)+(p+roffset)*norb+(a+loffset)*norb*norb+(b+loffset)*norb*norb*norb];
982             }
983           }
984         }
985 
986         dgemm_("N", "N", Lsize, rnorb, lnorb*lnorb, -1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb*lnorb, 1.0, scratch.get(), Lsize);
987         for (int p = 0; p < rnorb; ++p) {
988           const double* ldata = scratch.get() + Lsize * p;
989           const double* rdata = Rgamma->data() + Rsize * p;
990           kronecker_product(left_phase, false, Rndim, Rmdim, rdata, Rndim, false, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
991         }
992 
993         // add to map if large enough
994         if (out_block->rms() > thresh_)
995           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
996       }
997     }
998 
999     { // + 1.0 <L'| B |L> (x) <R'| B^t B^t |R>
1000       const BlockKey left_target(spair.left.nelea, spair.left.neleb-1);
1001       const BlockKey right_target(spair.right.nelea, spair.right.neleb+2);
1002 
1003       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1004         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1005       );
1006       if(iter!=target_pvec.end()) {
1007         DMRG::BlockPair tpair = *iter;
1008 
1009         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1010 
1011         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta}).at({spair.left.key(),tpair.left.key()}).data;
1012         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::AnnihilateBeta, GammaSQ::AnnihilateBeta}).at({spair.right.key(),tpair.right.key()}).data;
1013 
1014         const int Lndim = Lgamma->extent(0);
1015         const int Lmdim = Lgamma->extent(1);
1016         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1017 
1018         const int Rndim = Rgamma->extent(0);
1019         const int Rmdim = Rgamma->extent(1);
1020         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1021 
1022         // fill in coulomb part
1023         assert(max_coulomb_size >= lnorb*rnorb*rnorb);
1024         fill_n(scratch.get(), Rsize * lnorb, 0.0);
1025         for (int a = 0; a < lnorb; ++a) {
1026           for (int p = 0; p < rnorb; ++p) {
1027             for (int q = 0; q < rnorb; ++q) {
1028               coulomb[p + q*rnorb + a*rnorb*rnorb] = mo2e[(i)+(p+roffset)*norb+(q+roffset)*norb*norb+(a+loffset)*norb*norb*norb];
1029             }
1030           }
1031         }
1032 
1033         dgemm_("N", "N", Rsize, lnorb, rnorb*rnorb, 1.0, Rgamma->data(), Rsize, coulomb.get(), rnorb*rnorb, 1.0, scratch.get(), Rsize);
1034         for (int a = 0; a < lnorb; ++a) {
1035           const double* ldata = Lgamma->data() + Lsize * a;
1036           const double* rdata = scratch.get() + Rsize * a;
1037           kronecker_product(1.0, true, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1038         }
1039 
1040         // add to map if large enough
1041         if (out_block->rms() > thresh_)
1042           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1043       }
1044     }
1045 
1046     { // - 1.0 <L'| B^t A^t |L> (x) <R'| A |R>
1047       const BlockKey left_target(spair.left.nelea+1, spair.left.neleb+1);
1048       const BlockKey right_target(spair.right.nelea-1, spair.right.neleb);
1049 
1050       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1051         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1052       );
1053       if(iter!=target_pvec.end()) {
1054         DMRG::BlockPair tpair = *iter;
1055 
1056         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1057 
1058         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::AnnihilateBeta, GammaSQ::AnnihilateAlpha}).at({spair.left.key(),tpair.left.key()}).data;
1059         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateAlpha}).at({spair.right.key(),tpair.right.key()}).data;
1060 
1061         const int Lndim = Lgamma->extent(0);
1062         const int Lmdim = Lgamma->extent(1);
1063         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1064 
1065         const int Rndim = Rgamma->extent(0);
1066         const int Rmdim = Rgamma->extent(1);
1067         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1068 
1069         // fill in coulomb part
1070         assert(max_coulomb_size >= rnorb*lnorb*lnorb);
1071         fill_n(scratch.get(), Lsize * rnorb, 0.0);
1072         for (int p = 0; p < rnorb; ++p) {
1073           for (int a = 0; a < lnorb; ++a) {
1074             for (int b = 0; b < lnorb; ++b) {
1075               coulomb[a + b*lnorb + p*lnorb*lnorb] = mo2e[(i)+(b+loffset)*norb+(a+loffset)*norb*norb+(p+roffset)*norb*norb*norb];
1076             }
1077           }
1078         }
1079 
1080         dgemm_("N", "N", Lsize, rnorb, lnorb*lnorb, -1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb*lnorb, 1.0, scratch.get(), Lsize);
1081         for (int p = 0; p < rnorb; ++p) {
1082           const double* ldata = scratch.get() + Lsize * p;
1083           const double* rdata = Rgamma->data() + Rsize * p;
1084           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1085         }
1086 
1087         // add to map if large enough
1088         if (out_block->rms() > thresh_)
1089           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1090       }
1091     }
1092 
1093     { // S_b (x) I  + 1.0 <L'| B^t |L> (x) <R'| B^t B |R> - 1.0 <L'| B^t |L> (x) <R'| B^t B |R> + 1.0 <L'| B^t |L> (x) <R'| A^t A |R>
1094       const BlockKey left_target(spair.left.nelea, spair.left.neleb+1);
1095       const BlockKey right_target = spair.right.key();
1096 
1097       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1098         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1099       );
1100       if(iter!=target_pvec.end()) {
1101         DMRG::BlockPair tpair = *iter;
1102 
1103         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1104 
1105         // S_b (x) I
1106         left_ops_->S_b_copy_to(scratch.get(), spair.left.key(), i);
1107         kronecker_product_I_B(1.0, spair.right.nstates, false, tpair.left.nstates, spair.left.nstates, scratch.get(), tpair.left.nstates, out_block->data(), out_block->ndim());
1108 
1109         { // ["1.0 <L'| B^t |L> (x) <R'| B^t B |R>", "1.0 <L'| B^t |L> (x) <R'| A^t A |R>"]
1110           shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta}).at({tpair.left.key(),spair.left.key()}).data;
1111           shared_ptr<const btas::Tensor3<double>> Rgamma1 = blocks_->right_block()->coupling({GammaSQ::CreateBeta, GammaSQ::AnnihilateBeta}).at({tpair.right.key(),spair.right.key()}).data;
1112           shared_ptr<const btas::Tensor3<double>> Rgamma2 = blocks_->right_block()->coupling({GammaSQ::CreateAlpha, GammaSQ::AnnihilateAlpha}).at({tpair.right.key(),spair.right.key()}).data;
1113 
1114           const int Lndim = Lgamma->extent(0);
1115           const int Lmdim = Lgamma->extent(1);
1116           const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1117 
1118           const int Rndim = Rgamma1->extent(0);
1119           const int Rmdim = Rgamma1->extent(1);
1120           const int Rsize = Rgamma1->extent(0) * Rgamma1->extent(1);
1121 
1122           // fill in coulomb part
1123           assert(max_coulomb_size >= lnorb*rnorb*rnorb);
1124           fill_n(scratch.get(), Rsize * lnorb, 0.0);
1125           for (int a = 0; a < lnorb; ++a) {
1126             for (int p = 0; p < rnorb; ++p) {
1127               for (int q = 0; q < rnorb; ++q) {
1128                 coulomb[p + q*rnorb + a*rnorb*rnorb] = (mo2e[(i)+(p+roffset)*norb+(a+loffset)*norb*norb+(q+roffset)*norb*norb*norb] - mo2e[(i)+(a+loffset)*norb+(p+roffset)*norb*norb+(q+roffset)*norb*norb*norb]);
1129               }
1130             }
1131           }
1132 
1133           dgemm_("N", "N", Rsize, lnorb, rnorb*rnorb, 1.0, Rgamma1->data(), Rsize, coulomb.get(), rnorb*rnorb, 1.0, scratch.get(), Rsize);
1134           for (int a = 0; a < lnorb; ++a) {
1135             for (int p = 0; p < rnorb; ++p) {
1136               for (int q = 0; q < rnorb; ++q) {
1137                 coulomb[p + q*rnorb + a*rnorb*rnorb] = mo2e[(i)+(p+roffset)*norb+(a+loffset)*norb*norb+(q+roffset)*norb*norb*norb];
1138               }
1139             }
1140           }
1141 
1142           dgemm_("N", "N", Rsize, lnorb, rnorb*rnorb, 1.0, Rgamma2->data(), Rsize, coulomb.get(), rnorb*rnorb, 1.0, scratch.get(), Rsize);
1143           for (int a = 0; a < lnorb; ++a) {
1144             const double* ldata = Lgamma->data() + Lsize * a;
1145             const double* rdata = scratch.get() + Rsize * a;
1146             kronecker_product(1.0, false, Rndim, Rmdim, rdata, Rndim, false, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1147           }
1148         }
1149 
1150         // add to map if large enough
1151         if (out_block->rms() > thresh_)
1152           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1153       }
1154     }
1155 
1156     { // - 1.0 <L'| A^t |L> (x) <R'| B^t A |R>
1157       const BlockKey left_target(spair.left.nelea+1, spair.left.neleb);
1158       const BlockKey right_target(spair.right.nelea-1, spair.right.neleb+1);
1159 
1160       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1161         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1162       );
1163       if(iter!=target_pvec.end()) {
1164         DMRG::BlockPair tpair = *iter;
1165 
1166         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1167 
1168         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateAlpha}).at({tpair.left.key(),spair.left.key()}).data;
1169         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta, GammaSQ::AnnihilateAlpha}).at({tpair.right.key(),spair.right.key()}).data;
1170 
1171         const int Lndim = Lgamma->extent(0);
1172         const int Lmdim = Lgamma->extent(1);
1173         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1174 
1175         const int Rndim = Rgamma->extent(0);
1176         const int Rmdim = Rgamma->extent(1);
1177         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1178 
1179         // fill in coulomb part
1180         assert(max_coulomb_size >= lnorb*rnorb*rnorb);
1181         fill_n(scratch.get(), Rsize * lnorb, 0.0);
1182         for (int a = 0; a < lnorb; ++a) {
1183           for (int p = 0; p < rnorb; ++p) {
1184             for (int q = 0; q < rnorb; ++q) {
1185               coulomb[p + q*rnorb + a*rnorb*rnorb] = mo2e[(i)+(a+loffset)*norb+(p+roffset)*norb*norb+(q+roffset)*norb*norb*norb];
1186             }
1187           }
1188         }
1189 
1190         dgemm_("N", "N", Rsize, lnorb, rnorb*rnorb, -1.0, Rgamma->data(), Rsize, coulomb.get(), rnorb*rnorb, 1.0, scratch.get(), Rsize);
1191         for (int a = 0; a < lnorb; ++a) {
1192           const double* ldata = Lgamma->data() + Lsize * a;
1193           const double* rdata = scratch.get() + Rsize * a;
1194           kronecker_product(1.0, false, Rndim, Rmdim, rdata, Rndim, false, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1195         }
1196 
1197         // add to map if large enough
1198         if (out_block->rms() > thresh_)
1199           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1200       }
1201     }
1202 
1203     { // - 1.0 <L'| A |L> (x) <R'| B^t A^t |R>
1204       const BlockKey left_target(spair.left.nelea-1, spair.left.neleb);
1205       const BlockKey right_target(spair.right.nelea+1, spair.right.neleb+1);
1206 
1207       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1208         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1209       );
1210       if(iter!=target_pvec.end()) {
1211         DMRG::BlockPair tpair = *iter;
1212 
1213         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1214 
1215         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateAlpha}).at({spair.left.key(),tpair.left.key()}).data;
1216         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::AnnihilateBeta, GammaSQ::AnnihilateAlpha}).at({spair.right.key(),tpair.right.key()}).data;
1217 
1218         const int Lndim = Lgamma->extent(0);
1219         const int Lmdim = Lgamma->extent(1);
1220         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1221 
1222         const int Rndim = Rgamma->extent(0);
1223         const int Rmdim = Rgamma->extent(1);
1224         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1225 
1226         // fill in coulomb part
1227         assert(max_coulomb_size >= lnorb*rnorb*rnorb);
1228         fill_n(scratch.get(), Rsize * lnorb, 0.0);
1229         for (int a = 0; a < lnorb; ++a) {
1230           for (int p = 0; p < rnorb; ++p) {
1231             for (int q = 0; q < rnorb; ++q) {
1232               coulomb[p + q*rnorb + a*rnorb*rnorb] = mo2e[(i)+(q+roffset)*norb+(p+roffset)*norb*norb+(a+loffset)*norb*norb*norb];
1233             }
1234           }
1235         }
1236 
1237         dgemm_("N", "N", Rsize, lnorb, rnorb*rnorb, -1.0, Rgamma->data(), Rsize, coulomb.get(), rnorb*rnorb, 1.0, scratch.get(), Rsize);
1238         for (int a = 0; a < lnorb; ++a) {
1239           const double* ldata = Lgamma->data() + Lsize * a;
1240           const double* rdata = scratch.get() + Rsize * a;
1241           kronecker_product(1.0, true, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1242         }
1243 
1244         // add to map if large enough
1245         if (out_block->rms() > thresh_)
1246           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1247       }
1248     }
1249 
1250     { // I (x) S_b - 1.0 <L'| B^t B |L> (x) <R'| B^t |R> + 1.0 <L'| B^t B |L> (x) <R'| B^t |R> + 1.0 <L'| A^t A |L> (x) <R'| B^t |R>
1251       const BlockKey left_target = spair.left.key();
1252       const BlockKey right_target(spair.right.nelea, spair.right.neleb+1);
1253 
1254       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1255         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1256       );
1257       if(iter!=target_pvec.end()) {
1258         DMRG::BlockPair tpair = *iter;
1259 
1260         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1261 
1262         // I (x) S_b
1263         right_ops_->S_b_copy_to(scratch.get(), spair.right.key(), i);
1264         kronecker_product_A_I(left_phase, false, tpair.right.nstates, spair.right.nstates, scratch.get(), tpair.right.nstates, spair.left.nstates, out_block->data(), out_block->ndim());
1265 
1266         { // ["-1.0 <L'| B^t B |L> (x) <R'| B^t |R>", "1.0 <L'| A^t A |L> (x) <R'| B^t |R>"]
1267           shared_ptr<const btas::Tensor3<double>> Lgamma1 = blocks_->left_block()->coupling({GammaSQ::CreateBeta, GammaSQ::AnnihilateBeta}).at({tpair.left.key(),spair.left.key()}).data;
1268           shared_ptr<const btas::Tensor3<double>> Lgamma2 = blocks_->left_block()->coupling({GammaSQ::CreateAlpha, GammaSQ::AnnihilateAlpha}).at({tpair.left.key(),spair.left.key()}).data;
1269           shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta}).at({tpair.right.key(),spair.right.key()}).data;
1270 
1271           const int Lndim = Lgamma1->extent(0);
1272           const int Lmdim = Lgamma1->extent(1);
1273           const int Lsize = Lgamma1->extent(0) * Lgamma1->extent(1);
1274 
1275           const int Rndim = Rgamma->extent(0);
1276           const int Rmdim = Rgamma->extent(1);
1277           const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1278 
1279           // fill in coulomb part
1280           assert(max_coulomb_size >= rnorb*lnorb*lnorb);
1281           fill_n(scratch.get(), Lsize * rnorb, 0.0);
1282           for (int p = 0; p < rnorb; ++p) {
1283             for (int a = 0; a < lnorb; ++a) {
1284               for (int b = 0; b < lnorb; ++b) {
1285                 coulomb[a + b*lnorb + p*lnorb*lnorb] = (mo2e[(i)+(p+roffset)*norb+(a+loffset)*norb*norb+(b+loffset)*norb*norb*norb] - mo2e[(i)+(a+loffset)*norb+(p+roffset)*norb*norb+(b+loffset)*norb*norb*norb]);
1286               }
1287             }
1288           }
1289 
1290           dgemm_("N", "N", Lsize, rnorb, lnorb*lnorb, -1.0, Lgamma1->data(), Lsize, coulomb.get(), lnorb*lnorb, 1.0, scratch.get(), Lsize);
1291           for (int p = 0; p < rnorb; ++p) {
1292             for (int a = 0; a < lnorb; ++a) {
1293               for (int b = 0; b < lnorb; ++b) {
1294                 coulomb[a + b*lnorb + p*lnorb*lnorb] = mo2e[(i)+(a+loffset)*norb+(p+roffset)*norb*norb+(b+loffset)*norb*norb*norb];
1295               }
1296             }
1297           }
1298 
1299           dgemm_("N", "N", Lsize, rnorb, lnorb*lnorb, 1.0, Lgamma2->data(), Lsize, coulomb.get(), lnorb*lnorb, 1.0, scratch.get(), Lsize);
1300           for (int p = 0; p < rnorb; ++p) {
1301             const double* ldata = scratch.get() + Lsize * p;
1302             const double* rdata = Rgamma->data() + Rsize * p;
1303             kronecker_product(left_phase, false, Rndim, Rmdim, rdata, Rndim, false, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1304           }
1305         }
1306 
1307         // add to map if large enough
1308         if (out_block->rms() > thresh_)
1309           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1310       }
1311     }
1312 
1313     { // + 1.0 <L'| B^t B^t |L> (x) <R'| B |R>
1314       const BlockKey left_target(spair.left.nelea, spair.left.neleb+2);
1315       const BlockKey right_target(spair.right.nelea, spair.right.neleb-1);
1316 
1317       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1318         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1319       );
1320       if(iter!=target_pvec.end()) {
1321         DMRG::BlockPair tpair = *iter;
1322 
1323         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1324 
1325         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::AnnihilateBeta, GammaSQ::AnnihilateBeta}).at({spair.left.key(),tpair.left.key()}).data;
1326         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta}).at({spair.right.key(),tpair.right.key()}).data;
1327 
1328         const int Lndim = Lgamma->extent(0);
1329         const int Lmdim = Lgamma->extent(1);
1330         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1331 
1332         const int Rndim = Rgamma->extent(0);
1333         const int Rmdim = Rgamma->extent(1);
1334         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1335 
1336         // fill in coulomb part
1337         assert(max_coulomb_size >= rnorb*lnorb*lnorb);
1338         fill_n(scratch.get(), Lsize * rnorb, 0.0);
1339         for (int p = 0; p < rnorb; ++p) {
1340           for (int a = 0; a < lnorb; ++a) {
1341             for (int b = 0; b < lnorb; ++b) {
1342               coulomb[a + b*lnorb + p*lnorb*lnorb] = mo2e[(i)+(a+loffset)*norb+(b+loffset)*norb*norb+(p+roffset)*norb*norb*norb];
1343             }
1344           }
1345         }
1346 
1347         dgemm_("N", "N", Lsize, rnorb, lnorb*lnorb, 1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb*lnorb, 1.0, scratch.get(), Lsize);
1348         for (int p = 0; p < rnorb; ++p) {
1349           const double* ldata = scratch.get() + Lsize * p;
1350           const double* rdata = Rgamma->data() + Rsize * p;
1351           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1352         }
1353 
1354         // add to map if large enough
1355         if (out_block->rms() > thresh_)
1356           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1357       }
1358     }
1359   }
1360 
1361   return make_shared<BlockSparseMatrix>(target_info.nstates, source_info.nstates, out);
1362 }
1363 
1364 
Q_aa(BlockKey bk,const int i,const int j) const1365 shared_ptr<BlockSparseMatrix> BlockOperators2::Q_aa(BlockKey bk, const int i, const int j) const {
1366   const vector<DMRG::BlockPair>& pvec = blocks_->blockpairs(bk);
1367 
1368   const BlockInfo binfo = blocks_->blockinfo(bk);
1369 
1370   const int norb = jop_->nocc();
1371   const int lnorb = blocks_->left_block()->norb();
1372   const int rnorb = blocks_->right_block()->norb();
1373   const int loffset = norb - (lnorb + rnorb); // convenience variable for offset of left orbitals from zero
1374   const int roffset = norb - rnorb; // convenience variable for offset of right orbitals from zero
1375 
1376   const double* mo2e = jop_->mo2e()->data();
1377 
1378   const int max_L_M = max_element(blocks_->left_block()->blocks().begin(), blocks_->left_block()->blocks().end(),
1379                                    [] (const BlockInfo& a, const BlockInfo& b)
1380                                      { return a.nstates < b.nstates; })->nstates;
1381   const int max_R_M = max_element(blocks_->right_block()->blocks().begin(), blocks_->right_block()->blocks().end(),
1382                                    [] (const BlockInfo& a, const BlockInfo& b)
1383                                      { return a.nstates < b.nstates; })->nstates;
1384   const size_t max_L_intermediate = max_L_M * max_L_M * rnorb;
1385   const size_t max_R_intermediate = max_R_M * max_R_M * lnorb;
1386   const size_t max_intermediate = max(max_L_intermediate, max_R_intermediate);
1387   unique_ptr<double[]> scratch(new double[max_intermediate]);
1388 
1389   const size_t max_coulomb_size = rnorb*lnorb; // lnorb < rnorb ? rnorb*lnorb : lnorb*rnorb;
1390   unique_ptr<double[]> coulomb(new double[max_coulomb_size]);
1391   map<pair<size_t, size_t>, shared_ptr<Matrix>> out;
1392 
1393   for (auto& spair : pvec) {
1394     // phase accumulated by moving an operator past the whole left ket block
1395     const int left_phase = 1 - (((spair.left.nelea+spair.left.neleb)%2) << 1);
1396 
1397     { // I (x) Q_aa +  Q_aa (x) I
1398       auto out_block = make_shared<Matrix>(spair.nstates(), spair.nstates(), true);
1399 
1400       // I (x) Q_aa
1401       right_ops_->Q_aa_copy_to(scratch.get(), spair.right.key(), i, j);
1402       kronecker_product_A_I(1.0, false, spair.right.nstates, spair.right.nstates, scratch.get(), spair.right.nstates, spair.left.nstates, out_block->data(), out_block->ndim());
1403 
1404       // Q_aa (x) I
1405       left_ops_->Q_aa_copy_to(scratch.get(), spair.left.key(), i, j);
1406       kronecker_product_I_B(1.0, spair.right.nstates, false, spair.left.nstates, spair.left.nstates, scratch.get(), spair.left.nstates, out_block->data(), out_block->ndim());
1407 
1408       // add to map if large enough
1409       if (out_block->rms() > thresh_)
1410         out.emplace(make_pair<size_t, size_t>(spair.offset, spair.offset), out_block);
1411     }
1412 
1413     { // - 1.0 <L'| B |L> (x) <R'| B^t |R>
1414       const BlockKey left_target(spair.left.nelea, spair.left.neleb-1);
1415       const BlockKey right_target(spair.right.nelea, spair.right.neleb+1);
1416 
1417       auto iter = find_if(pvec.begin(), pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1418         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1419       );
1420       if(iter!=pvec.end()) {
1421         DMRG::BlockPair tpair = *iter;
1422 
1423         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1424 
1425         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta}).at({spair.left.key(),tpair.left.key()}).data;
1426         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta}).at({tpair.right.key(),spair.right.key()}).data;
1427 
1428         const int Lndim = Lgamma->extent(0);
1429         const int Lmdim = Lgamma->extent(1);
1430         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1431 
1432         const int Rndim = Rgamma->extent(0);
1433         const int Rmdim = Rgamma->extent(1);
1434         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1435 
1436         // fill in coulomb part
1437         assert(max_coulomb_size >= rnorb*lnorb);
1438         fill_n(scratch.get(), Lsize * rnorb, 0.0);
1439         for (int p = 0; p < rnorb; ++p) {
1440           for (int a = 0; a < lnorb; ++a) {
1441             coulomb[a + p*lnorb] = mo2e[(p+roffset)+(i)*norb+(a+loffset)*norb*norb+(j)*norb*norb*norb];
1442           }
1443         }
1444 
1445         dgemm_("N", "N", Lsize, rnorb, lnorb, -1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
1446         for (int p = 0; p < rnorb; ++p) {
1447           const double* ldata = scratch.get() + Lsize * p;
1448           const double* rdata = Rgamma->data() + Rsize * p;
1449           kronecker_product(left_phase, false, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1450         }
1451 
1452         // add to map if large enough
1453         if (out_block->rms() > thresh_)
1454           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1455       }
1456     }
1457 
1458     { // - 1.0 <L'| A |L> (x) <R'| A^t |R>
1459       const BlockKey left_target(spair.left.nelea-1, spair.left.neleb);
1460       const BlockKey right_target(spair.right.nelea+1, spair.right.neleb);
1461 
1462       auto iter = find_if(pvec.begin(), pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1463         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1464       );
1465       if(iter!=pvec.end()) {
1466         DMRG::BlockPair tpair = *iter;
1467 
1468         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1469 
1470         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateAlpha}).at({spair.left.key(),tpair.left.key()}).data;
1471         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateAlpha}).at({tpair.right.key(),spair.right.key()}).data;
1472 
1473         const int Lndim = Lgamma->extent(0);
1474         const int Lmdim = Lgamma->extent(1);
1475         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1476 
1477         const int Rndim = Rgamma->extent(0);
1478         const int Rmdim = Rgamma->extent(1);
1479         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1480 
1481         // fill in coulomb part
1482         assert(max_coulomb_size >= rnorb*lnorb);
1483         fill_n(scratch.get(), Lsize * rnorb, 0.0);
1484         for (int p = 0; p < rnorb; ++p) {
1485           for (int a = 0; a < lnorb; ++a) {
1486             coulomb[a + p*lnorb] = (mo2e[(p+roffset)+(i)*norb+(a+loffset)*norb*norb+(j)*norb*norb*norb] - mo2e[(i)+(p+roffset)*norb+(a+loffset)*norb*norb+(j)*norb*norb*norb]);
1487           }
1488         }
1489 
1490         dgemm_("N", "N", Lsize, rnorb, lnorb, -1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
1491         for (int p = 0; p < rnorb; ++p) {
1492           const double* ldata = scratch.get() + Lsize * p;
1493           const double* rdata = Rgamma->data() + Rsize * p;
1494           kronecker_product(left_phase, false, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1495         }
1496 
1497         // add to map if large enough
1498         if (out_block->rms() > thresh_)
1499           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1500       }
1501     }
1502 
1503     { // + 1.0 <L'| B^t |L> (x) <R'| B |R>
1504       const BlockKey left_target(spair.left.nelea, spair.left.neleb+1);
1505       const BlockKey right_target(spair.right.nelea, spair.right.neleb-1);
1506 
1507       auto iter = find_if(pvec.begin(), pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1508         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1509       );
1510       if(iter!=pvec.end()) {
1511         DMRG::BlockPair tpair = *iter;
1512 
1513         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1514 
1515         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta}).at({tpair.left.key(),spair.left.key()}).data;
1516         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta}).at({spair.right.key(),tpair.right.key()}).data;
1517 
1518         const int Lndim = Lgamma->extent(0);
1519         const int Lmdim = Lgamma->extent(1);
1520         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1521 
1522         const int Rndim = Rgamma->extent(0);
1523         const int Rmdim = Rgamma->extent(1);
1524         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1525 
1526         // fill in coulomb part
1527         assert(max_coulomb_size >= rnorb*lnorb);
1528         fill_n(scratch.get(), Lsize * rnorb, 0.0);
1529         for (int p = 0; p < rnorb; ++p) {
1530           for (int a = 0; a < lnorb; ++a) {
1531             coulomb[a + p*lnorb] = mo2e[(a+loffset)+(i)*norb+(p+roffset)*norb*norb+(j)*norb*norb*norb];
1532           }
1533         }
1534 
1535         dgemm_("N", "N", Lsize, rnorb, lnorb, 1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
1536         for (int p = 0; p < rnorb; ++p) {
1537           const double* ldata = scratch.get() + Lsize * p;
1538           const double* rdata = Rgamma->data() + Rsize * p;
1539           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, false, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1540         }
1541 
1542         // add to map if large enough
1543         if (out_block->rms() > thresh_)
1544           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1545       }
1546     }
1547 
1548     { // + 1.0 <L'| A^t |L> (x) <R'| A |R>
1549       const BlockKey left_target(spair.left.nelea+1, spair.left.neleb);
1550       const BlockKey right_target(spair.right.nelea-1, spair.right.neleb);
1551 
1552       auto iter = find_if(pvec.begin(), pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1553         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1554       );
1555       if(iter!=pvec.end()) {
1556         DMRG::BlockPair tpair = *iter;
1557 
1558         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1559 
1560         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateAlpha}).at({tpair.left.key(),spair.left.key()}).data;
1561         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateAlpha}).at({spair.right.key(),tpair.right.key()}).data;
1562 
1563         const int Lndim = Lgamma->extent(0);
1564         const int Lmdim = Lgamma->extent(1);
1565         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1566 
1567         const int Rndim = Rgamma->extent(0);
1568         const int Rmdim = Rgamma->extent(1);
1569         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1570 
1571         // fill in coulomb part
1572         assert(max_coulomb_size >= rnorb*lnorb);
1573         fill_n(scratch.get(), Lsize * rnorb, 0.0);
1574         for (int p = 0; p < rnorb; ++p) {
1575           for (int a = 0; a < lnorb; ++a) {
1576             coulomb[a + p*lnorb] = (mo2e[(a+loffset)+(i)*norb+(p+roffset)*norb*norb+(j)*norb*norb*norb] - mo2e[(i)+(a+loffset)*norb+(p+roffset)*norb*norb+(j)*norb*norb*norb]);
1577           }
1578         }
1579 
1580         dgemm_("N", "N", Lsize, rnorb, lnorb, 1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
1581         for (int p = 0; p < rnorb; ++p) {
1582           const double* ldata = scratch.get() + Lsize * p;
1583           const double* rdata = Rgamma->data() + Rsize * p;
1584           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, false, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1585         }
1586 
1587         // add to map if large enough
1588         if (out_block->rms() > thresh_)
1589           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1590       }
1591     }
1592   }
1593 
1594   return make_shared<BlockSparseMatrix>(binfo.nstates, binfo.nstates, out);
1595 }
1596 
1597 
Q_bb(BlockKey bk,const int i,const int j) const1598 shared_ptr<BlockSparseMatrix> BlockOperators2::Q_bb(BlockKey bk, const int i, const int j) const {
1599   const vector<DMRG::BlockPair>& pvec = blocks_->blockpairs(bk);
1600 
1601   const BlockInfo binfo = blocks_->blockinfo(bk);
1602 
1603   const int norb = jop_->nocc();
1604   const int lnorb = blocks_->left_block()->norb();
1605   const int rnorb = blocks_->right_block()->norb();
1606   const int loffset = norb - (lnorb + rnorb); // convenience variable for offset of left orbitals from zero
1607   const int roffset = norb - rnorb; // convenience variable for offset of right orbitals from zero
1608 
1609   const double* mo2e = jop_->mo2e()->data();
1610 
1611   const int max_L_M = max_element(blocks_->left_block()->blocks().begin(), blocks_->left_block()->blocks().end(),
1612                                    [] (const BlockInfo& a, const BlockInfo& b)
1613                                      { return a.nstates < b.nstates; })->nstates;
1614   const int max_R_M = max_element(blocks_->right_block()->blocks().begin(), blocks_->right_block()->blocks().end(),
1615                                    [] (const BlockInfo& a, const BlockInfo& b)
1616                                      { return a.nstates < b.nstates; })->nstates;
1617   const size_t max_L_intermediate = max_L_M * max_L_M * rnorb;
1618   const size_t max_R_intermediate = max_R_M * max_R_M * lnorb;
1619   const size_t max_intermediate = max(max_L_intermediate, max_R_intermediate);
1620   unique_ptr<double[]> scratch(new double[max_intermediate]);
1621 
1622   const size_t max_coulomb_size = rnorb*lnorb; // lnorb < rnorb ? rnorb*lnorb : lnorb*rnorb;
1623   unique_ptr<double[]> coulomb(new double[max_coulomb_size]);
1624   map<pair<size_t, size_t>, shared_ptr<Matrix>> out;
1625 
1626   for (auto& spair : pvec) {
1627     // phase accumulated by moving an operator past the whole left ket block
1628     const int left_phase = 1 - (((spair.left.nelea+spair.left.neleb)%2) << 1);
1629 
1630     { // I (x) Q_bb +  Q_bb (x) I
1631       auto out_block = make_shared<Matrix>(spair.nstates(), spair.nstates(), true);
1632 
1633       // I (x) Q_bb
1634       right_ops_->Q_bb_copy_to(scratch.get(), spair.right.key(), i, j);
1635       kronecker_product_A_I(1.0, false, spair.right.nstates, spair.right.nstates, scratch.get(), spair.right.nstates, spair.left.nstates, out_block->data(), out_block->ndim());
1636 
1637       // Q_bb (x) I
1638       left_ops_->Q_bb_copy_to(scratch.get(), spair.left.key(), i, j);
1639       kronecker_product_I_B(1.0, spair.right.nstates, false, spair.left.nstates, spair.left.nstates, scratch.get(), spair.left.nstates, out_block->data(), out_block->ndim());
1640 
1641       // add to map if large enough
1642       if (out_block->rms() > thresh_)
1643         out.emplace(make_pair<size_t, size_t>(spair.offset, spair.offset), out_block);
1644     }
1645 
1646     { // - 1.0 <L'| B |L> (x) <R'| B^t |R>
1647       const BlockKey left_target(spair.left.nelea, spair.left.neleb-1);
1648       const BlockKey right_target(spair.right.nelea, spair.right.neleb+1);
1649 
1650       auto iter = find_if(pvec.begin(), pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1651         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1652       );
1653       if(iter!=pvec.end()) {
1654         DMRG::BlockPair tpair = *iter;
1655 
1656         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1657 
1658         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta}).at({spair.left.key(),tpair.left.key()}).data;
1659         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta}).at({tpair.right.key(),spair.right.key()}).data;
1660 
1661         const int Lndim = Lgamma->extent(0);
1662         const int Lmdim = Lgamma->extent(1);
1663         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1664 
1665         const int Rndim = Rgamma->extent(0);
1666         const int Rmdim = Rgamma->extent(1);
1667         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1668 
1669         // fill in coulomb part
1670         assert(max_coulomb_size >= rnorb*lnorb);
1671         fill_n(scratch.get(), Lsize * rnorb, 0.0);
1672         for (int p = 0; p < rnorb; ++p) {
1673           for (int a = 0; a < lnorb; ++a) {
1674             coulomb[a + p*lnorb] = (mo2e[(p+roffset)+(i)*norb+(a+loffset)*norb*norb+(j)*norb*norb*norb] - mo2e[(i)+(p+roffset)*norb+(a+loffset)*norb*norb+(j)*norb*norb*norb]);
1675           }
1676         }
1677 
1678         dgemm_("N", "N", Lsize, rnorb, lnorb, -1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
1679         for (int p = 0; p < rnorb; ++p) {
1680           const double* ldata = scratch.get() + Lsize * p;
1681           const double* rdata = Rgamma->data() + Rsize * p;
1682           kronecker_product(left_phase, false, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1683         }
1684 
1685         // add to map if large enough
1686         if (out_block->rms() > thresh_)
1687           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1688       }
1689     }
1690 
1691     { // - 1.0 <L'| A |L> (x) <R'| A^t |R>
1692       const BlockKey left_target(spair.left.nelea-1, spair.left.neleb);
1693       const BlockKey right_target(spair.right.nelea+1, spair.right.neleb);
1694 
1695       auto iter = find_if(pvec.begin(), pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1696         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1697       );
1698       if(iter!=pvec.end()) {
1699         DMRG::BlockPair tpair = *iter;
1700 
1701         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1702 
1703         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateAlpha}).at({spair.left.key(),tpair.left.key()}).data;
1704         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateAlpha}).at({tpair.right.key(),spair.right.key()}).data;
1705 
1706         const int Lndim = Lgamma->extent(0);
1707         const int Lmdim = Lgamma->extent(1);
1708         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1709 
1710         const int Rndim = Rgamma->extent(0);
1711         const int Rmdim = Rgamma->extent(1);
1712         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1713 
1714         // fill in coulomb part
1715         assert(max_coulomb_size >= rnorb*lnorb);
1716         fill_n(scratch.get(), Lsize * rnorb, 0.0);
1717         for (int p = 0; p < rnorb; ++p) {
1718           for (int a = 0; a < lnorb; ++a) {
1719             coulomb[a + p*lnorb] = mo2e[(p+roffset)+(i)*norb+(a+loffset)*norb*norb+(j)*norb*norb*norb];
1720           }
1721         }
1722 
1723         dgemm_("N", "N", Lsize, rnorb, lnorb, -1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
1724         for (int p = 0; p < rnorb; ++p) {
1725           const double* ldata = scratch.get() + Lsize * p;
1726           const double* rdata = Rgamma->data() + Rsize * p;
1727           kronecker_product(left_phase, false, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1728         }
1729 
1730         // add to map if large enough
1731         if (out_block->rms() > thresh_)
1732           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1733       }
1734     }
1735 
1736     { // + 1.0 <L'| A^t |L> (x) <R'| A |R>
1737       const BlockKey left_target(spair.left.nelea+1, spair.left.neleb);
1738       const BlockKey right_target(spair.right.nelea-1, spair.right.neleb);
1739 
1740       auto iter = find_if(pvec.begin(), pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1741         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1742       );
1743       if(iter!=pvec.end()) {
1744         DMRG::BlockPair tpair = *iter;
1745 
1746         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1747 
1748         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateAlpha}).at({tpair.left.key(),spair.left.key()}).data;
1749         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateAlpha}).at({spair.right.key(),tpair.right.key()}).data;
1750 
1751         const int Lndim = Lgamma->extent(0);
1752         const int Lmdim = Lgamma->extent(1);
1753         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1754 
1755         const int Rndim = Rgamma->extent(0);
1756         const int Rmdim = Rgamma->extent(1);
1757         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1758 
1759         // fill in coulomb part
1760         assert(max_coulomb_size >= rnorb*lnorb);
1761         fill_n(scratch.get(), Lsize * rnorb, 0.0);
1762         for (int p = 0; p < rnorb; ++p) {
1763           for (int a = 0; a < lnorb; ++a) {
1764             coulomb[a + p*lnorb] = mo2e[(a+loffset)+(i)*norb+(p+roffset)*norb*norb+(j)*norb*norb*norb];
1765           }
1766         }
1767 
1768         dgemm_("N", "N", Lsize, rnorb, lnorb, 1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
1769         for (int p = 0; p < rnorb; ++p) {
1770           const double* ldata = scratch.get() + Lsize * p;
1771           const double* rdata = Rgamma->data() + Rsize * p;
1772           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, false, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1773         }
1774 
1775         // add to map if large enough
1776         if (out_block->rms() > thresh_)
1777           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1778       }
1779     }
1780 
1781     { // + 1.0 <L'| B^t |L> (x) <R'| B |R>
1782       const BlockKey left_target(spair.left.nelea, spair.left.neleb+1);
1783       const BlockKey right_target(spair.right.nelea, spair.right.neleb-1);
1784 
1785       auto iter = find_if(pvec.begin(), pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1786         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1787       );
1788       if(iter!=pvec.end()) {
1789         DMRG::BlockPair tpair = *iter;
1790 
1791         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1792 
1793         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta}).at({tpair.left.key(),spair.left.key()}).data;
1794         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta}).at({spair.right.key(),tpair.right.key()}).data;
1795 
1796         const int Lndim = Lgamma->extent(0);
1797         const int Lmdim = Lgamma->extent(1);
1798         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1799 
1800         const int Rndim = Rgamma->extent(0);
1801         const int Rmdim = Rgamma->extent(1);
1802         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1803 
1804         // fill in coulomb part
1805         assert(max_coulomb_size >= rnorb*lnorb);
1806         fill_n(scratch.get(), Lsize * rnorb, 0.0);
1807         for (int p = 0; p < rnorb; ++p) {
1808           for (int a = 0; a < lnorb; ++a) {
1809             coulomb[a + p*lnorb] = (mo2e[(a+loffset)+(i)*norb+(p+roffset)*norb*norb+(j)*norb*norb*norb] - mo2e[(i)+(a+loffset)*norb+(p+roffset)*norb*norb+(j)*norb*norb*norb]);
1810           }
1811         }
1812 
1813         dgemm_("N", "N", Lsize, rnorb, lnorb, 1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
1814         for (int p = 0; p < rnorb; ++p) {
1815           const double* ldata = scratch.get() + Lsize * p;
1816           const double* rdata = Rgamma->data() + Rsize * p;
1817           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, false, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1818         }
1819 
1820         // add to map if large enough
1821         if (out_block->rms() > thresh_)
1822           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1823       }
1824     }
1825   }
1826 
1827   return make_shared<BlockSparseMatrix>(binfo.nstates, binfo.nstates, out);
1828 }
1829 
1830 
Q_ab(BlockKey bk,const int i,const int j) const1831 shared_ptr<BlockSparseMatrix> BlockOperators2::Q_ab(BlockKey bk, const int i, const int j) const {
1832   BlockKey target_bk(bk.nelea-1,bk.neleb+1);
1833   assert(blocks_->contains(target_bk));
1834 
1835   const vector<DMRG::BlockPair>& source_pvec = blocks_->blockpairs(bk);
1836   const vector<DMRG::BlockPair>& target_pvec = blocks_->blockpairs(target_bk);
1837 
1838   const BlockInfo source_info = blocks_->blockinfo(bk);
1839   const BlockInfo target_info = blocks_->blockinfo(target_bk);
1840 
1841   const int norb = jop_->nocc();
1842   const int lnorb = blocks_->left_block()->norb();
1843   const int rnorb = blocks_->right_block()->norb();
1844   const int loffset = norb - (lnorb + rnorb); // convenience variable for offset of left orbitals from zero
1845   const int roffset = norb - rnorb; // convenience variable for offset of right orbitals from zero
1846 
1847   const double* mo2e = jop_->mo2e()->data();
1848 
1849   const int max_L_M = max_element(blocks_->left_block()->blocks().begin(), blocks_->left_block()->blocks().end(),
1850                                    [] (const BlockInfo& a, const BlockInfo& b)
1851                                      { return a.nstates < b.nstates; })->nstates;
1852   const int max_R_M = max_element(blocks_->right_block()->blocks().begin(), blocks_->right_block()->blocks().end(),
1853                                    [] (const BlockInfo& a, const BlockInfo& b)
1854                                      { return a.nstates < b.nstates; })->nstates;
1855   const size_t max_L_intermediate = max_L_M * max_L_M * rnorb;
1856   const size_t max_R_intermediate = max_R_M * max_R_M * lnorb;
1857   const size_t max_intermediate = max(max_L_intermediate, max_R_intermediate);
1858   unique_ptr<double[]> scratch(new double[max_intermediate]);
1859 
1860   const size_t max_coulomb_size = rnorb*lnorb; // lnorb < rnorb ? rnorb*lnorb : lnorb*rnorb;
1861   unique_ptr<double[]> coulomb(new double[max_coulomb_size]);
1862   map<pair<size_t, size_t>, shared_ptr<Matrix>> out;
1863 
1864   for (auto& spair : source_pvec) {
1865     // phase accumulated by moving an operator past the whole left ket block
1866     const int left_phase = 1 - (((spair.left.nelea+spair.left.neleb)%2) << 1);
1867 
1868     { // I (x) Q_ab
1869       const BlockKey left_target = spair.left.key();
1870       const BlockKey right_target(spair.right.nelea-1, spair.right.neleb+1);
1871 
1872       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1873         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1874       );
1875       if(iter!=target_pvec.end()) {
1876         DMRG::BlockPair tpair = *iter;
1877 
1878         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1879 
1880         // I (x) Q_ab
1881         right_ops_->Q_ab_copy_to(scratch.get(), spair.right.key(), i, j);
1882         kronecker_product_A_I(1.0, false, tpair.right.nstates, spair.right.nstates, scratch.get(), tpair.right.nstates, spair.left.nstates, out_block->data(), out_block->ndim());
1883 
1884         // add to map if large enough
1885         if (out_block->rms() > thresh_)
1886           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1887       }
1888     }
1889 
1890     { // + 1.0 <L'| A |L> (x) <R'| B^t |R>
1891       const BlockKey left_target(spair.left.nelea-1, spair.left.neleb);
1892       const BlockKey right_target(spair.right.nelea, spair.right.neleb+1);
1893 
1894       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1895         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1896       );
1897       if(iter!=target_pvec.end()) {
1898         DMRG::BlockPair tpair = *iter;
1899 
1900         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1901 
1902         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateAlpha}).at({spair.left.key(),tpair.left.key()}).data;
1903         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta}).at({tpair.right.key(),spair.right.key()}).data;
1904 
1905         const int Lndim = Lgamma->extent(0);
1906         const int Lmdim = Lgamma->extent(1);
1907         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1908 
1909         const int Rndim = Rgamma->extent(0);
1910         const int Rmdim = Rgamma->extent(1);
1911         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1912 
1913         // fill in coulomb part
1914         assert(max_coulomb_size >= rnorb*lnorb);
1915         fill_n(scratch.get(), Lsize * rnorb, 0.0);
1916         for (int p = 0; p < rnorb; ++p) {
1917           for (int a = 0; a < lnorb; ++a) {
1918             coulomb[a + p*lnorb] = mo2e[(p+roffset)+(a+loffset)*norb+(j)*norb*norb+(i)*norb*norb*norb];
1919           }
1920         }
1921 
1922         dgemm_("N", "N", Lsize, rnorb, lnorb, 1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
1923         for (int p = 0; p < rnorb; ++p) {
1924           const double* ldata = scratch.get() + Lsize * p;
1925           const double* rdata = Rgamma->data() + Rsize * p;
1926           kronecker_product(left_phase, false, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1927         }
1928 
1929         // add to map if large enough
1930         if (out_block->rms() > thresh_)
1931           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1932       }
1933     }
1934 
1935     { // Q_ab (x) I
1936       const BlockKey left_target(spair.left.nelea-1, spair.left.neleb+1);
1937       const BlockKey right_target = spair.right.key();
1938 
1939       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1940         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1941       );
1942       if(iter!=target_pvec.end()) {
1943         DMRG::BlockPair tpair = *iter;
1944 
1945         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1946 
1947         // Q_ab (x) I
1948         left_ops_->Q_ab_copy_to(scratch.get(), spair.left.key(), i, j);
1949         kronecker_product_I_B(1.0, spair.right.nstates, false, tpair.left.nstates, spair.left.nstates, scratch.get(), tpair.left.nstates, out_block->data(), out_block->ndim());
1950 
1951         // add to map if large enough
1952         if (out_block->rms() > thresh_)
1953           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1954       }
1955     }
1956 
1957     { // - 1.0 <L'| B^t |L> (x) <R'| A |R>
1958       const BlockKey left_target(spair.left.nelea, spair.left.neleb+1);
1959       const BlockKey right_target(spair.right.nelea-1, spair.right.neleb);
1960 
1961       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
1962         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
1963       );
1964       if(iter!=target_pvec.end()) {
1965         DMRG::BlockPair tpair = *iter;
1966 
1967         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
1968 
1969         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta}).at({tpair.left.key(),spair.left.key()}).data;
1970         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateAlpha}).at({spair.right.key(),tpair.right.key()}).data;
1971 
1972         const int Lndim = Lgamma->extent(0);
1973         const int Lmdim = Lgamma->extent(1);
1974         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
1975 
1976         const int Rndim = Rgamma->extent(0);
1977         const int Rmdim = Rgamma->extent(1);
1978         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
1979 
1980         // fill in coulomb part
1981         assert(max_coulomb_size >= rnorb*lnorb);
1982         fill_n(scratch.get(), Lsize * rnorb, 0.0);
1983         for (int p = 0; p < rnorb; ++p) {
1984           for (int a = 0; a < lnorb; ++a) {
1985             coulomb[a + p*lnorb] = mo2e[(a+loffset)+(p+roffset)*norb+(j)*norb*norb+(i)*norb*norb*norb];
1986           }
1987         }
1988 
1989         dgemm_("N", "N", Lsize, rnorb, lnorb, -1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
1990         for (int p = 0; p < rnorb; ++p) {
1991           const double* ldata = scratch.get() + Lsize * p;
1992           const double* rdata = Rgamma->data() + Rsize * p;
1993           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, false, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
1994         }
1995 
1996         // add to map if large enough
1997         if (out_block->rms() > thresh_)
1998           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
1999       }
2000     }
2001   }
2002 
2003   return make_shared<BlockSparseMatrix>(target_info.nstates, source_info.nstates, out);
2004 }
2005 
2006 
P_aa(BlockKey bk,const int i,const int j) const2007 shared_ptr<BlockSparseMatrix> BlockOperators2::P_aa(BlockKey bk, const int i, const int j) const {
2008   BlockKey target_bk(bk.nelea-2,bk.neleb);
2009   assert(blocks_->contains(target_bk));
2010 
2011   const vector<DMRG::BlockPair>& source_pvec = blocks_->blockpairs(bk);
2012   const vector<DMRG::BlockPair>& target_pvec = blocks_->blockpairs(target_bk);
2013 
2014   const BlockInfo source_info = blocks_->blockinfo(bk);
2015   const BlockInfo target_info = blocks_->blockinfo(target_bk);
2016 
2017   const int norb = jop_->nocc();
2018   const int lnorb = blocks_->left_block()->norb();
2019   const int rnorb = blocks_->right_block()->norb();
2020   const int loffset = norb - (lnorb + rnorb); // convenience variable for offset of left orbitals from zero
2021   const int roffset = norb - rnorb; // convenience variable for offset of right orbitals from zero
2022 
2023   const double* mo2e = jop_->mo2e()->data();
2024 
2025   const int max_L_M = max_element(blocks_->left_block()->blocks().begin(), blocks_->left_block()->blocks().end(),
2026                                    [] (const BlockInfo& a, const BlockInfo& b)
2027                                      { return a.nstates < b.nstates; })->nstates;
2028   const int max_R_M = max_element(blocks_->right_block()->blocks().begin(), blocks_->right_block()->blocks().end(),
2029                                    [] (const BlockInfo& a, const BlockInfo& b)
2030                                      { return a.nstates < b.nstates; })->nstates;
2031   const size_t max_L_intermediate = max_L_M * max_L_M * rnorb;
2032   const size_t max_R_intermediate = max_R_M * max_R_M * lnorb;
2033   const size_t max_intermediate = max(max_L_intermediate, max_R_intermediate);
2034   unique_ptr<double[]> scratch(new double[max_intermediate]);
2035 
2036   const size_t max_coulomb_size = rnorb*lnorb; // lnorb < rnorb ? rnorb*lnorb : lnorb*rnorb;
2037   unique_ptr<double[]> coulomb(new double[max_coulomb_size]);
2038   map<pair<size_t, size_t>, shared_ptr<Matrix>> out;
2039 
2040   for (auto& spair : source_pvec) {
2041     // phase accumulated by moving an operator past the whole left ket block
2042     const int left_phase = 1 - (((spair.left.nelea+spair.left.neleb)%2) << 1);
2043 
2044     { // + 0.5 <L'| A |L> (x) <R'| A |R> - 0.5 <L'| A |L> (x) <R'| A |R>
2045       const BlockKey left_target(spair.left.nelea-1, spair.left.neleb);
2046       const BlockKey right_target(spair.right.nelea-1, spair.right.neleb);
2047 
2048       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
2049         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
2050       );
2051       if(iter!=target_pvec.end()) {
2052         DMRG::BlockPair tpair = *iter;
2053 
2054         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
2055 
2056         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateAlpha}).at({spair.left.key(),tpair.left.key()}).data;
2057         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateAlpha}).at({spair.right.key(),tpair.right.key()}).data;
2058 
2059         const int Lndim = Lgamma->extent(0);
2060         const int Lmdim = Lgamma->extent(1);
2061         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
2062 
2063         const int Rndim = Rgamma->extent(0);
2064         const int Rmdim = Rgamma->extent(1);
2065         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
2066 
2067         // fill in coulomb part
2068         assert(max_coulomb_size >= rnorb*lnorb);
2069         fill_n(scratch.get(), Lsize * rnorb, 0.0);
2070         for (int p = 0; p < rnorb; ++p) {
2071           for (int a = 0; a < lnorb; ++a) {
2072             coulomb[a + p*lnorb] = (mo2e[(a+loffset)+(p+roffset)*norb+(j)*norb*norb+(i)*norb*norb*norb] - mo2e[(a+loffset)+(p+roffset)*norb+(i)*norb*norb+(j)*norb*norb*norb]);
2073           }
2074         }
2075 
2076         dgemm_("N", "N", Lsize, rnorb, lnorb, 0.5, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
2077         for (int p = 0; p < rnorb; ++p) {
2078           const double* ldata = scratch.get() + Lsize * p;
2079           const double* rdata = Rgamma->data() + Rsize * p;
2080           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
2081         }
2082 
2083         // add to map if large enough
2084         if (out_block->rms() > thresh_)
2085           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
2086       }
2087     }
2088 
2089     { // I (x) P_aa
2090       const BlockKey left_target = spair.left.key();
2091       const BlockKey right_target(spair.right.nelea-2, spair.right.neleb);
2092 
2093       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
2094         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
2095       );
2096       if(iter!=target_pvec.end()) {
2097         DMRG::BlockPair tpair = *iter;
2098 
2099         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
2100 
2101         // I (x) P_aa
2102         right_ops_->P_aa_copy_to(scratch.get(), spair.right.key(), i, j);
2103         kronecker_product_A_I(1.0, false, tpair.right.nstates, spair.right.nstates, scratch.get(), tpair.right.nstates, spair.left.nstates, out_block->data(), out_block->ndim());
2104 
2105         // add to map if large enough
2106         if (out_block->rms() > thresh_)
2107           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
2108       }
2109     }
2110 
2111     { // P_aa (x) I
2112       const BlockKey left_target(spair.left.nelea-2, spair.left.neleb);
2113       const BlockKey right_target = spair.right.key();
2114 
2115       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
2116         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
2117       );
2118       if(iter!=target_pvec.end()) {
2119         DMRG::BlockPair tpair = *iter;
2120 
2121         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
2122 
2123         // P_aa (x) I
2124         left_ops_->P_aa_copy_to(scratch.get(), spair.left.key(), i, j);
2125         kronecker_product_I_B(1.0, spair.right.nstates, false, tpair.left.nstates, spair.left.nstates, scratch.get(), tpair.left.nstates, out_block->data(), out_block->ndim());
2126 
2127         // add to map if large enough
2128         if (out_block->rms() > thresh_)
2129           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
2130       }
2131     }
2132   }
2133 
2134   return make_shared<BlockSparseMatrix>(target_info.nstates, source_info.nstates, out);
2135 }
2136 
2137 
P_bb(BlockKey bk,const int i,const int j) const2138 shared_ptr<BlockSparseMatrix> BlockOperators2::P_bb(BlockKey bk, const int i, const int j) const {
2139   BlockKey target_bk(bk.nelea,bk.neleb-2);
2140   assert(blocks_->contains(target_bk));
2141 
2142   const vector<DMRG::BlockPair>& source_pvec = blocks_->blockpairs(bk);
2143   const vector<DMRG::BlockPair>& target_pvec = blocks_->blockpairs(target_bk);
2144 
2145   const BlockInfo source_info = blocks_->blockinfo(bk);
2146   const BlockInfo target_info = blocks_->blockinfo(target_bk);
2147 
2148   const int norb = jop_->nocc();
2149   const int lnorb = blocks_->left_block()->norb();
2150   const int rnorb = blocks_->right_block()->norb();
2151   const int loffset = norb - (lnorb + rnorb); // convenience variable for offset of left orbitals from zero
2152   const int roffset = norb - rnorb; // convenience variable for offset of right orbitals from zero
2153 
2154   const double* mo2e = jop_->mo2e()->data();
2155 
2156   const int max_L_M = max_element(blocks_->left_block()->blocks().begin(), blocks_->left_block()->blocks().end(),
2157                                    [] (const BlockInfo& a, const BlockInfo& b)
2158                                      { return a.nstates < b.nstates; })->nstates;
2159   const int max_R_M = max_element(blocks_->right_block()->blocks().begin(), blocks_->right_block()->blocks().end(),
2160                                    [] (const BlockInfo& a, const BlockInfo& b)
2161                                      { return a.nstates < b.nstates; })->nstates;
2162   const size_t max_L_intermediate = max_L_M * max_L_M * rnorb;
2163   const size_t max_R_intermediate = max_R_M * max_R_M * lnorb;
2164   const size_t max_intermediate = max(max_L_intermediate, max_R_intermediate);
2165   unique_ptr<double[]> scratch(new double[max_intermediate]);
2166 
2167   const size_t max_coulomb_size = rnorb*lnorb; // lnorb < rnorb ? rnorb*lnorb : lnorb*rnorb;
2168   unique_ptr<double[]> coulomb(new double[max_coulomb_size]);
2169   map<pair<size_t, size_t>, shared_ptr<Matrix>> out;
2170 
2171   for (auto& spair : source_pvec) {
2172     // phase accumulated by moving an operator past the whole left ket block
2173     const int left_phase = 1 - (((spair.left.nelea+spair.left.neleb)%2) << 1);
2174 
2175     { // + 0.5 <L'| B |L> (x) <R'| B |R> - 0.5 <L'| B |L> (x) <R'| B |R>
2176       const BlockKey left_target(spair.left.nelea, spair.left.neleb-1);
2177       const BlockKey right_target(spair.right.nelea, spair.right.neleb-1);
2178 
2179       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
2180         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
2181       );
2182       if(iter!=target_pvec.end()) {
2183         DMRG::BlockPair tpair = *iter;
2184 
2185         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
2186 
2187         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta}).at({spair.left.key(),tpair.left.key()}).data;
2188         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta}).at({spair.right.key(),tpair.right.key()}).data;
2189 
2190         const int Lndim = Lgamma->extent(0);
2191         const int Lmdim = Lgamma->extent(1);
2192         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
2193 
2194         const int Rndim = Rgamma->extent(0);
2195         const int Rmdim = Rgamma->extent(1);
2196         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
2197 
2198         // fill in coulomb part
2199         assert(max_coulomb_size >= rnorb*lnorb);
2200         fill_n(scratch.get(), Lsize * rnorb, 0.0);
2201         for (int p = 0; p < rnorb; ++p) {
2202           for (int a = 0; a < lnorb; ++a) {
2203             coulomb[a + p*lnorb] = (mo2e[(a+loffset)+(p+roffset)*norb+(j)*norb*norb+(i)*norb*norb*norb] - mo2e[(a+loffset)+(p+roffset)*norb+(i)*norb*norb+(j)*norb*norb*norb]);
2204           }
2205         }
2206 
2207         dgemm_("N", "N", Lsize, rnorb, lnorb, 0.5, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
2208         for (int p = 0; p < rnorb; ++p) {
2209           const double* ldata = scratch.get() + Lsize * p;
2210           const double* rdata = Rgamma->data() + Rsize * p;
2211           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
2212         }
2213 
2214         // add to map if large enough
2215         if (out_block->rms() > thresh_)
2216           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
2217       }
2218     }
2219 
2220     { // I (x) P_bb
2221       const BlockKey left_target = spair.left.key();
2222       const BlockKey right_target(spair.right.nelea, spair.right.neleb-2);
2223 
2224       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
2225         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
2226       );
2227       if(iter!=target_pvec.end()) {
2228         DMRG::BlockPair tpair = *iter;
2229 
2230         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
2231 
2232         // I (x) P_bb
2233         right_ops_->P_bb_copy_to(scratch.get(), spair.right.key(), i, j);
2234         kronecker_product_A_I(1.0, false, tpair.right.nstates, spair.right.nstates, scratch.get(), tpair.right.nstates, spair.left.nstates, out_block->data(), out_block->ndim());
2235 
2236         // add to map if large enough
2237         if (out_block->rms() > thresh_)
2238           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
2239       }
2240     }
2241 
2242     { // P_bb (x) I
2243       const BlockKey left_target(spair.left.nelea, spair.left.neleb-2);
2244       const BlockKey right_target = spair.right.key();
2245 
2246       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
2247         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
2248       );
2249       if(iter!=target_pvec.end()) {
2250         DMRG::BlockPair tpair = *iter;
2251 
2252         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
2253 
2254         // P_bb (x) I
2255         left_ops_->P_bb_copy_to(scratch.get(), spair.left.key(), i, j);
2256         kronecker_product_I_B(1.0, spair.right.nstates, false, tpair.left.nstates, spair.left.nstates, scratch.get(), tpair.left.nstates, out_block->data(), out_block->ndim());
2257 
2258         // add to map if large enough
2259         if (out_block->rms() > thresh_)
2260           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
2261       }
2262     }
2263   }
2264 
2265   return make_shared<BlockSparseMatrix>(target_info.nstates, source_info.nstates, out);
2266 }
2267 
2268 
P_ab(BlockKey bk,const int i,const int j) const2269 shared_ptr<BlockSparseMatrix> BlockOperators2::P_ab(BlockKey bk, const int i, const int j) const {
2270   BlockKey target_bk(bk.nelea-1,bk.neleb-1);
2271   assert(blocks_->contains(target_bk));
2272 
2273   const vector<DMRG::BlockPair>& source_pvec = blocks_->blockpairs(bk);
2274   const vector<DMRG::BlockPair>& target_pvec = blocks_->blockpairs(target_bk);
2275 
2276   const BlockInfo source_info = blocks_->blockinfo(bk);
2277   const BlockInfo target_info = blocks_->blockinfo(target_bk);
2278 
2279   const int norb = jop_->nocc();
2280   const int lnorb = blocks_->left_block()->norb();
2281   const int rnorb = blocks_->right_block()->norb();
2282   const int loffset = norb - (lnorb + rnorb); // convenience variable for offset of left orbitals from zero
2283   const int roffset = norb - rnorb; // convenience variable for offset of right orbitals from zero
2284 
2285   const double* mo2e = jop_->mo2e()->data();
2286 
2287   const int max_L_M = max_element(blocks_->left_block()->blocks().begin(), blocks_->left_block()->blocks().end(),
2288                                    [] (const BlockInfo& a, const BlockInfo& b)
2289                                      { return a.nstates < b.nstates; })->nstates;
2290   const int max_R_M = max_element(blocks_->right_block()->blocks().begin(), blocks_->right_block()->blocks().end(),
2291                                    [] (const BlockInfo& a, const BlockInfo& b)
2292                                      { return a.nstates < b.nstates; })->nstates;
2293   const size_t max_L_intermediate = max_L_M * max_L_M * rnorb;
2294   const size_t max_R_intermediate = max_R_M * max_R_M * lnorb;
2295   const size_t max_intermediate = max(max_L_intermediate, max_R_intermediate);
2296   unique_ptr<double[]> scratch(new double[max_intermediate]);
2297 
2298   const size_t max_coulomb_size = rnorb*lnorb; // lnorb < rnorb ? rnorb*lnorb : lnorb*rnorb;
2299   unique_ptr<double[]> coulomb(new double[max_coulomb_size]);
2300   map<pair<size_t, size_t>, shared_ptr<Matrix>> out;
2301 
2302   for (auto& spair : source_pvec) {
2303     // phase accumulated by moving an operator past the whole left ket block
2304     const int left_phase = 1 - (((spair.left.nelea+spair.left.neleb)%2) << 1);
2305 
2306     { // P_ab (x) I
2307       const BlockKey left_target(spair.left.nelea-1, spair.left.neleb-1);
2308       const BlockKey right_target = spair.right.key();
2309 
2310       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
2311         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
2312       );
2313       if(iter!=target_pvec.end()) {
2314         DMRG::BlockPair tpair = *iter;
2315 
2316         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
2317 
2318         // P_ab (x) I
2319         left_ops_->P_ab_copy_to(scratch.get(), spair.left.key(), i, j);
2320         kronecker_product_I_B(1.0, spair.right.nstates, false, tpair.left.nstates, spair.left.nstates, scratch.get(), tpair.left.nstates, out_block->data(), out_block->ndim());
2321 
2322         // add to map if large enough
2323         if (out_block->rms() > thresh_)
2324           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
2325       }
2326     }
2327 
2328     { // - 1.0 <L'| A |L> (x) <R'| B |R>
2329       const BlockKey left_target(spair.left.nelea-1, spair.left.neleb);
2330       const BlockKey right_target(spair.right.nelea, spair.right.neleb-1);
2331 
2332       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
2333         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
2334       );
2335       if(iter!=target_pvec.end()) {
2336         DMRG::BlockPair tpair = *iter;
2337 
2338         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
2339 
2340         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateAlpha}).at({spair.left.key(),tpair.left.key()}).data;
2341         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateBeta}).at({spair.right.key(),tpair.right.key()}).data;
2342 
2343         const int Lndim = Lgamma->extent(0);
2344         const int Lmdim = Lgamma->extent(1);
2345         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
2346 
2347         const int Rndim = Rgamma->extent(0);
2348         const int Rmdim = Rgamma->extent(1);
2349         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
2350 
2351         // fill in coulomb part
2352         assert(max_coulomb_size >= rnorb*lnorb);
2353         fill_n(scratch.get(), Lsize * rnorb, 0.0);
2354         for (int p = 0; p < rnorb; ++p) {
2355           for (int a = 0; a < lnorb; ++a) {
2356             coulomb[a + p*lnorb] = mo2e[(p+roffset)+(a+loffset)*norb+(j)*norb*norb+(i)*norb*norb*norb];
2357           }
2358         }
2359 
2360         dgemm_("N", "N", Lsize, rnorb, lnorb, -1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
2361         for (int p = 0; p < rnorb; ++p) {
2362           const double* ldata = scratch.get() + Lsize * p;
2363           const double* rdata = Rgamma->data() + Rsize * p;
2364           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
2365         }
2366 
2367         // add to map if large enough
2368         if (out_block->rms() > thresh_)
2369           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
2370       }
2371     }
2372 
2373     { // I (x) P_ab
2374       const BlockKey left_target = spair.left.key();
2375       const BlockKey right_target(spair.right.nelea-1, spair.right.neleb-1);
2376 
2377       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
2378         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
2379       );
2380       if(iter!=target_pvec.end()) {
2381         DMRG::BlockPair tpair = *iter;
2382 
2383         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
2384 
2385         // I (x) P_ab
2386         right_ops_->P_ab_copy_to(scratch.get(), spair.right.key(), i, j);
2387         kronecker_product_A_I(1.0, false, tpair.right.nstates, spair.right.nstates, scratch.get(), tpair.right.nstates, spair.left.nstates, out_block->data(), out_block->ndim());
2388 
2389         // add to map if large enough
2390         if (out_block->rms() > thresh_)
2391           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
2392       }
2393     }
2394 
2395     { // + 1.0 <L'| B |L> (x) <R'| A |R>
2396       const BlockKey left_target(spair.left.nelea, spair.left.neleb-1);
2397       const BlockKey right_target(spair.right.nelea-1, spair.right.neleb);
2398 
2399       auto iter = find_if(target_pvec.begin(), target_pvec.end(), [&left_target, &right_target] (DMRG::BlockPair bp)
2400         { return make_pair(bp.left.key(), bp.right.key())==make_pair(left_target, right_target); }
2401       );
2402       if(iter!=target_pvec.end()) {
2403         DMRG::BlockPair tpair = *iter;
2404 
2405         auto out_block = make_shared<Matrix>(tpair.nstates(), spair.nstates(), true);
2406 
2407         shared_ptr<const btas::Tensor3<double>> Lgamma = blocks_->left_block()->coupling({GammaSQ::CreateBeta}).at({spair.left.key(),tpair.left.key()}).data;
2408         shared_ptr<const btas::Tensor3<double>> Rgamma = blocks_->right_block()->coupling({GammaSQ::CreateAlpha}).at({spair.right.key(),tpair.right.key()}).data;
2409 
2410         const int Lndim = Lgamma->extent(0);
2411         const int Lmdim = Lgamma->extent(1);
2412         const int Lsize = Lgamma->extent(0) * Lgamma->extent(1);
2413 
2414         const int Rndim = Rgamma->extent(0);
2415         const int Rmdim = Rgamma->extent(1);
2416         const int Rsize = Rgamma->extent(0) * Rgamma->extent(1);
2417 
2418         // fill in coulomb part
2419         assert(max_coulomb_size >= rnorb*lnorb);
2420         fill_n(scratch.get(), Lsize * rnorb, 0.0);
2421         for (int p = 0; p < rnorb; ++p) {
2422           for (int a = 0; a < lnorb; ++a) {
2423             coulomb[a + p*lnorb] = mo2e[(a+loffset)+(p+roffset)*norb+(j)*norb*norb+(i)*norb*norb*norb];
2424           }
2425         }
2426 
2427         dgemm_("N", "N", Lsize, rnorb, lnorb, 1.0, Lgamma->data(), Lsize, coulomb.get(), lnorb, 1.0, scratch.get(), Lsize);
2428         for (int p = 0; p < rnorb; ++p) {
2429           const double* ldata = scratch.get() + Lsize * p;
2430           const double* rdata = Rgamma->data() + Rsize * p;
2431           kronecker_product(left_phase, true, Rndim, Rmdim, rdata, Rndim, true, Lndim, Lmdim, ldata, Lndim, out_block->data(), out_block->ndim());
2432         }
2433 
2434         // add to map if large enough
2435         if (out_block->rms() > thresh_)
2436           out.emplace(make_pair<size_t, size_t>(tpair.offset, spair.offset), out_block);
2437       }
2438     }
2439   }
2440 
2441   return make_shared<BlockSparseMatrix>(target_info.nstates, source_info.nstates, out);
2442 }
2443