1 //
2 // BAGEL - Brilliantly Advanced General Electronic Structure Library
3 // Filename: MSCASPT2_tasks12.cc
4 // Copyright (C) 2014 Toru Shiozaki
5 //
6 // Author: Toru Shiozaki <shiozaki@northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
23 //
24 
25 #include <bagel_config.h>
26 #ifdef COMPILE_SMITH
27 
28 #include <src/smith/caspt2/MSCASPT2_tasks12.h>
29 
30 using namespace std;
31 using namespace bagel;
32 using namespace bagel::SMITH;
33 using namespace bagel::SMITH::MSCASPT2;
34 
compute()35 void Task550::Task_local::compute() {
36   const Index x2 = b(0);
37   const Index x3 = b(1);
38   const Index x0 = b(2);
39   const Index x1 = b(3);
40   // tensor label: I832
41   std::unique_ptr<double[]> odata(new double[out()->get_size(x2, x3, x0, x1)]);
42   std::fill_n(odata.get(), out()->get_size(x2, x3, x0, x1), 0.0);
43   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x2, x3, x0, x1)]);
44   std::fill_n(odata_sorted.get(), out()->get_size(x2, x3, x0, x1), 0.0);
45   for (auto& c1 : *range_[0]) {
46     for (auto& a2 : *range_[2]) {
47       // tensor label: v2
48       std::unique_ptr<double[]> i0data = in(0)->get_block(c1, a2, x0, x1);
49       std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(c1, a2, x0, x1)]);
50       sort_indices<0,1,2,3,0,1,1,1>(i0data, i0data_sorted, c1.size(), a2.size(), x0.size(), x1.size());
51       // tensor label: l2
52       std::unique_ptr<double[]> i1data = in(1)->get_block(c1, a2, x3, x2);
53       std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(c1, a2, x3, x2)]);
54       sort_indices<0,1,2,3,0,1,1,1>(i1data, i1data_sorted, c1.size(), a2.size(), x3.size(), x2.size());
55       dgemm_("T", "N", x0.size()*x1.size(), x2.size()*x3.size(), a2.size()*c1.size(),
56              1.0, i0data_sorted, a2.size()*c1.size(), i1data_sorted, a2.size()*c1.size(),
57              1.0, odata_sorted, x0.size()*x1.size());
58     }
59   }
60   sort_indices<3,2,0,1,1,1,1,1>(odata_sorted, odata, x0.size(), x1.size(), x3.size(), x2.size());
61   out()->add_block(odata, x2, x3, x0, x1);
62 }
63 
compute()64 void Task551::Task_local::compute() {
65   const Index x2 = b(0);
66   const Index x3 = b(1);
67   const Index x0 = b(2);
68   const Index x1 = b(3);
69   // tensor label: I832
70   std::unique_ptr<double[]> odata(new double[out()->get_size(x2, x3, x0, x1)]);
71   std::fill_n(odata.get(), out()->get_size(x2, x3, x0, x1), 0.0);
72   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x2, x3, x0, x1)]);
73   std::fill_n(odata_sorted.get(), out()->get_size(x2, x3, x0, x1), 0.0);
74   for (auto& a2 : *range_[2]) {
75     for (auto& c1 : *range_[0]) {
76       // tensor label: v2
77       std::unique_ptr<double[]> i0data = in(0)->get_block(x0, a2, c1, x1);
78       std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x0, a2, c1, x1)]);
79       sort_indices<1,2,0,3,0,1,1,1>(i0data, i0data_sorted, x0.size(), a2.size(), c1.size(), x1.size());
80       // tensor label: l2
81       std::unique_ptr<double[]> i1data = in(1)->get_block(c1, a2, x3, x2);
82       std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(c1, a2, x3, x2)]);
83       sort_indices<1,0,2,3,0,1,-1,2>(i1data, i1data_sorted, c1.size(), a2.size(), x3.size(), x2.size());
84       dgemm_("T", "N", x0.size()*x1.size(), x2.size()*x3.size(), a2.size()*c1.size(),
85              1.0, i0data_sorted, a2.size()*c1.size(), i1data_sorted, a2.size()*c1.size(),
86              1.0, odata_sorted, x0.size()*x1.size());
87     }
88   }
89   sort_indices<3,2,0,1,1,1,1,1>(odata_sorted, odata, x0.size(), x1.size(), x3.size(), x2.size());
90   out()->add_block(odata, x2, x3, x0, x1);
91 }
92 
compute()93 void Task552::Task_local::compute() {
94   const Index x2 = b(0);
95   const Index x3 = b(1);
96   const Index x0 = b(2);
97   const Index x1 = b(3);
98   // tensor label: I832
99   std::unique_ptr<double[]> odata(new double[out()->get_size(x2, x3, x0, x1)]);
100   std::fill_n(odata.get(), out()->get_size(x2, x3, x0, x1), 0.0);
101   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x2, x3, x0, x1)]);
102   std::fill_n(odata_sorted.get(), out()->get_size(x2, x3, x0, x1), 0.0);
103   for (auto& c1 : *range_[0]) {
104     for (auto& a2 : *range_[2]) {
105       // tensor label: v2
106       std::unique_ptr<double[]> i0data = in(0)->get_block(x0, x1, c1, a2);
107       std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x0, x1, c1, a2)]);
108       sort_indices<2,3,0,1,0,1,1,1>(i0data, i0data_sorted, x0.size(), x1.size(), c1.size(), a2.size());
109       // tensor label: l2
110       std::unique_ptr<double[]> i1data = in(1)->get_block(c1, a2, x3, x2);
111       std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(c1, a2, x3, x2)]);
112       sort_indices<0,1,2,3,0,1,1,1>(i1data, i1data_sorted, c1.size(), a2.size(), x3.size(), x2.size());
113       dgemm_("T", "N", x0.size()*x1.size(), x2.size()*x3.size(), a2.size()*c1.size(),
114              1.0, i0data_sorted, a2.size()*c1.size(), i1data_sorted, a2.size()*c1.size(),
115              1.0, odata_sorted, x0.size()*x1.size());
116     }
117   }
118   sort_indices<3,2,0,1,1,1,1,1>(odata_sorted, odata, x0.size(), x1.size(), x3.size(), x2.size());
119   out()->add_block(odata, x2, x3, x0, x1);
120 }
121 
compute()122 void Task553::Task_local::compute() {
123   const Index x3 = b(0);
124   const Index x1 = b(1);
125   const Index x0 = b(2);
126   const Index x2 = b(3);
127   // tensor label: I835
128   std::unique_ptr<double[]> i0data = in(0)->get_block(x2, x3, x0, x1);
129   std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x2, x3, x0, x1)]);
130   sort_indices<1,3,2,0,0,1,1,1>(i0data, i0data_sorted, x2.size(), x3.size(), x0.size(), x1.size());
131   // tensor label (calculated on-the-fly): Gamma132
132   {
133     if (x0 == x1) {
134       std::unique_ptr<double[]> o1data(new double[out(1)->get_size(x3, x2)]);
135       std::fill_n(o1data.get(), out(1)->get_size(x3, x2), 0.0);
136       for (int ix2 = 0; ix2 != x2.size(); ++ix2) {
137         for (int ix1 = 0; ix1 != x1.size(); ++ix1) {
138           for (int ix3 = 0; ix3 != x3.size(); ++ix3) {
139             o1data[ix3+x3.size()*(ix2)] +=
140               1.0 * i0data_sorted[ix3+x3.size()*(ix1+x1.size()*(ix1+x0.size()*(ix2)))];
141           }
142         }
143       }
144       out(1)->add_block(o1data, x3, x2);
145     }
146   }
147   {
148     if (x0 == x2) {
149       std::unique_ptr<double[]> o1data(new double[out(1)->get_size(x3, x1)]);
150       std::fill_n(o1data.get(), out(1)->get_size(x3, x1), 0.0);
151       for (int ix2 = 0; ix2 != x2.size(); ++ix2) {
152         for (int ix1 = 0; ix1 != x1.size(); ++ix1) {
153           for (int ix3 = 0; ix3 != x3.size(); ++ix3) {
154             o1data[ix3+x3.size()*(ix1)] +=
155               -2.0 * i0data_sorted[ix3+x3.size()*(ix1+x1.size()*(ix2+x0.size()*(ix2)))];
156           }
157         }
158       }
159       out(1)->add_block(o1data, x3, x1);
160     }
161   }
162   {
163     std::unique_ptr<double[]> o2data(new double[out(2)->get_size(x3, x1, x0, x2)]);
164     std::fill_n(o2data.get(), out(2)->get_size(x3, x1, x0, x2), 0.0);
165     sort_indices<0,1,2,3,1,1,1,1>(i0data_sorted, o2data, x3.size(), x1.size(), x0.size(), x2.size());
166     out(2)->add_block(o2data, x3, x1, x0, x2);
167   }
168 }
169 
compute()170 void Task554::Task_local::compute() {
171   const Index x2 = b(0);
172   const Index x3 = b(1);
173   const Index x0 = b(2);
174   const Index x1 = b(3);
175   // tensor label: I835
176   std::unique_ptr<double[]> odata(new double[out()->get_size(x2, x3, x0, x1)]);
177   std::fill_n(odata.get(), out()->get_size(x2, x3, x0, x1), 0.0);
178   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x2, x3, x0, x1)]);
179   std::fill_n(odata_sorted.get(), out()->get_size(x2, x3, x0, x1), 0.0);
180   for (auto& c2 : *range_[0]) {
181     for (auto& a1 : *range_[2]) {
182       // tensor label: v2
183       std::unique_ptr<double[]> i0data = in(0)->get_block(c2, x0, x1, a1);
184       std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(c2, x0, x1, a1)]);
185       sort_indices<0,3,1,2,0,1,1,1>(i0data, i0data_sorted, c2.size(), x0.size(), x1.size(), a1.size());
186       // tensor label: l2
187       std::unique_ptr<double[]> i1data = in(1)->get_block(x3, a1, c2, x2);
188       std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(x3, a1, c2, x2)]);
189       sort_indices<2,1,0,3,0,1,-1,2>(i1data, i1data_sorted, x3.size(), a1.size(), c2.size(), x2.size());
190       dgemm_("T", "N", x0.size()*x1.size(), x2.size()*x3.size(), c2.size()*a1.size(),
191              1.0, i0data_sorted, c2.size()*a1.size(), i1data_sorted, c2.size()*a1.size(),
192              1.0, odata_sorted, x0.size()*x1.size());
193     }
194   }
195   sort_indices<3,2,0,1,1,1,1,1>(odata_sorted, odata, x0.size(), x1.size(), x3.size(), x2.size());
196   out()->add_block(odata, x2, x3, x0, x1);
197 }
198 
compute()199 void Task555::Task_local::compute() {
200   const Index x3 = b(0);
201   const Index x0 = b(1);
202   const Index x1 = b(2);
203   const Index x2 = b(3);
204   // tensor label: I838
205   std::unique_ptr<double[]> i0data = in(0)->get_block(x2, x3, x0, x1);
206   std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x2, x3, x0, x1)]);
207   sort_indices<1,2,3,0,0,1,1,1>(i0data, i0data_sorted, x2.size(), x3.size(), x0.size(), x1.size());
208   // tensor label (calculated on-the-fly): Gamma142
209   {
210     if (x1 == x2) {
211       std::unique_ptr<double[]> o1data(new double[out(1)->get_size(x3, x0)]);
212       std::fill_n(o1data.get(), out(1)->get_size(x3, x0), 0.0);
213       for (int ix2 = 0; ix2 != x2.size(); ++ix2) {
214         for (int ix0 = 0; ix0 != x0.size(); ++ix0) {
215           for (int ix3 = 0; ix3 != x3.size(); ++ix3) {
216             o1data[ix3+x3.size()*(ix0)] +=
217               2.0 * i0data_sorted[ix3+x3.size()*(ix0+x0.size()*(ix2+x1.size()*(ix2)))];
218           }
219         }
220       }
221       out(1)->add_block(o1data, x3, x0);
222     }
223   }
224   {
225     std::unique_ptr<double[]> o2data(new double[out(2)->get_size(x3, x0, x1, x2)]);
226     std::fill_n(o2data.get(), out(2)->get_size(x3, x0, x1, x2), 0.0);
227     sort_indices<0,1,2,3,1,1,-1,1>(i0data_sorted, o2data, x3.size(), x0.size(), x1.size(), x2.size());
228     out(2)->add_block(o2data, x3, x0, x1, x2);
229   }
230 }
231 
compute()232 void Task556::Task_local::compute() {
233   const Index x2 = b(0);
234   const Index x3 = b(1);
235   const Index x0 = b(2);
236   const Index x1 = b(3);
237   // tensor label: I838
238   std::unique_ptr<double[]> odata(new double[out()->get_size(x2, x3, x0, x1)]);
239   std::fill_n(odata.get(), out()->get_size(x2, x3, x0, x1), 0.0);
240   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x2, x3, x0, x1)]);
241   std::fill_n(odata_sorted.get(), out()->get_size(x2, x3, x0, x1), 0.0);
242   for (auto& a1 : *range_[2]) {
243     for (auto& c2 : *range_[0]) {
244       // tensor label: v2
245       std::unique_ptr<double[]> i0data = in(0)->get_block(x0, a1, c2, x1);
246       std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x0, a1, c2, x1)]);
247       sort_indices<1,2,0,3,0,1,1,1>(i0data, i0data_sorted, x0.size(), a1.size(), c2.size(), x1.size());
248       // tensor label: l2
249       std::unique_ptr<double[]> i1data = in(1)->get_block(x3, a1, c2, x2);
250       std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(x3, a1, c2, x2)]);
251       sort_indices<1,2,0,3,0,1,1,2>(i1data, i1data_sorted, x3.size(), a1.size(), c2.size(), x2.size());
252       dgemm_("T", "N", x0.size()*x1.size(), x2.size()*x3.size(), c2.size()*a1.size(),
253              1.0, i0data_sorted, c2.size()*a1.size(), i1data_sorted, c2.size()*a1.size(),
254              1.0, odata_sorted, x0.size()*x1.size());
255     }
256   }
257   sort_indices<3,2,0,1,1,1,1,1>(odata_sorted, odata, x0.size(), x1.size(), x3.size(), x2.size());
258   out()->add_block(odata, x2, x3, x0, x1);
259 }
260 
compute()261 void Task557::Task_local::compute() {
262   const Index x3 = b(0);
263   const Index x2 = b(1);
264   const Index x0 = b(2);
265   const Index x1 = b(3);
266   // tensor label: I847
267   std::unique_ptr<double[]> i0data = in(0)->get_block(x2, x3, x0, x1);
268   std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x2, x3, x0, x1)]);
269   sort_indices<1,0,2,3,0,1,1,1>(i0data, i0data_sorted, x2.size(), x3.size(), x0.size(), x1.size());
270   // tensor label (calculated on-the-fly): Gamma122
271   {
272     if (x0 == x1) {
273       std::unique_ptr<double[]> o1data(new double[out(1)->get_size(x3, x2)]);
274       std::fill_n(o1data.get(), out(1)->get_size(x3, x2), 0.0);
275       for (int ix1 = 0; ix1 != x1.size(); ++ix1) {
276         for (int ix2 = 0; ix2 != x2.size(); ++ix2) {
277           for (int ix3 = 0; ix3 != x3.size(); ++ix3) {
278             o1data[ix3+x3.size()*(ix2)] +=
279               2.0 * i0data_sorted[ix3+x3.size()*(ix2+x2.size()*(ix1+x0.size()*(ix1)))];
280           }
281         }
282       }
283       out(1)->add_block(o1data, x3, x2);
284     }
285   }
286   {
287     if (x0 == x2) {
288       std::unique_ptr<double[]> o1data(new double[out(1)->get_size(x3, x1)]);
289       std::fill_n(o1data.get(), out(1)->get_size(x3, x1), 0.0);
290       for (int ix1 = 0; ix1 != x1.size(); ++ix1) {
291         for (int ix2 = 0; ix2 != x2.size(); ++ix2) {
292           for (int ix3 = 0; ix3 != x3.size(); ++ix3) {
293             o1data[ix3+x3.size()*(ix1)] +=
294               -1.0 * i0data_sorted[ix3+x3.size()*(ix2+x2.size()*(ix2+x0.size()*(ix1)))];
295           }
296         }
297       }
298       out(1)->add_block(o1data, x3, x1);
299     }
300   }
301   {
302     std::unique_ptr<double[]> o2data(new double[out(2)->get_size(x3, x2, x0, x1)]);
303     std::fill_n(o2data.get(), out(2)->get_size(x3, x2, x0, x1), 0.0);
304     sort_indices<0,1,2,3,1,1,-1,1>(i0data_sorted, o2data, x3.size(), x2.size(), x0.size(), x1.size());
305     out(2)->add_block(o2data, x3, x2, x0, x1);
306   }
307 }
308 
compute()309 void Task558::Task_local::compute() {
310   const Index x2 = b(0);
311   const Index x3 = b(1);
312   const Index x0 = b(2);
313   const Index x1 = b(3);
314   // tensor label: I847
315   std::unique_ptr<double[]> odata(new double[out()->get_size(x2, x3, x0, x1)]);
316   std::fill_n(odata.get(), out()->get_size(x2, x3, x0, x1), 0.0);
317   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x2, x3, x0, x1)]);
318   std::fill_n(odata_sorted.get(), out()->get_size(x2, x3, x0, x1), 0.0);
319   for (auto& c1 : *range_[0]) {
320     for (auto& a2 : *range_[2]) {
321       // tensor label: v2
322       std::unique_ptr<double[]> i0data = in(0)->get_block(c1, x0, x1, a2);
323       std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(c1, x0, x1, a2)]);
324       sort_indices<0,3,1,2,0,1,1,1>(i0data, i0data_sorted, c1.size(), x0.size(), x1.size(), a2.size());
325       // tensor label: l2
326       std::unique_ptr<double[]> i1data = in(1)->get_block(c1, a2, x3, x2);
327       std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(c1, a2, x3, x2)]);
328       sort_indices<0,1,2,3,0,1,1,2>(i1data, i1data_sorted, c1.size(), a2.size(), x3.size(), x2.size());
329       dgemm_("T", "N", x0.size()*x1.size(), x2.size()*x3.size(), a2.size()*c1.size(),
330              1.0, i0data_sorted, a2.size()*c1.size(), i1data_sorted, a2.size()*c1.size(),
331              1.0, odata_sorted, x0.size()*x1.size());
332     }
333   }
334   sort_indices<3,2,0,1,1,1,1,1>(odata_sorted, odata, x0.size(), x1.size(), x3.size(), x2.size());
335   out()->add_block(odata, x2, x3, x0, x1);
336 }
337 
compute()338 void Task559::Task_local::compute() {
339   const Index x2 = b(0);
340   const Index x3 = b(1);
341   const Index x0 = b(2);
342   const Index x1 = b(3);
343   // tensor label: I847
344   std::unique_ptr<double[]> odata(new double[out()->get_size(x2, x3, x0, x1)]);
345   std::fill_n(odata.get(), out()->get_size(x2, x3, x0, x1), 0.0);
346   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x2, x3, x0, x1)]);
347   std::fill_n(odata_sorted.get(), out()->get_size(x2, x3, x0, x1), 0.0);
348   for (auto& c1 : *range_[0]) {
349     // tensor label: h1
350     std::unique_ptr<double[]> i0data = in(0)->get_block(c1, x0);
351     std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(c1, x0)]);
352     sort_indices<0,1,0,1,1,1>(i0data, i0data_sorted, c1.size(), x0.size());
353     // tensor label: l2
354     std::unique_ptr<double[]> i1data = in(1)->get_block(x3, x2, c1, x1);
355     std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(x3, x2, c1, x1)]);
356     sort_indices<2,0,1,3,0,1,1,1>(i1data, i1data_sorted, x3.size(), x2.size(), c1.size(), x1.size());
357     dgemm_("T", "N", x0.size(), x1.size()*x2.size()*x3.size(), c1.size(),
358            1.0, i0data_sorted, c1.size(), i1data_sorted, c1.size(),
359            1.0, odata_sorted, x0.size());
360   }
361   sort_indices<2,1,0,3,1,1,1,1>(odata_sorted, odata, x0.size(), x3.size(), x2.size(), x1.size());
362   out()->add_block(odata, x2, x3, x0, x1);
363 }
364 
compute()365 void Task560::Task_local::compute() {
366   const Index x5 = b(0);
367   const Index x0 = b(1);
368   const Index x4 = b(2);
369   const Index x3 = b(3);
370   const Index x2 = b(4);
371   const Index x1 = b(5);
372   // tensor label: I856
373   std::unique_ptr<double[]> i0data = in(0)->get_block(x3, x4, x5, x0, x1, x2);
374   std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x3, x4, x5, x0, x1, x2)]);
375   sort_indices<2,3,1,0,5,4,0,1,1,1>(i0data, i0data_sorted, x3.size(), x4.size(), x5.size(), x0.size(), x1.size(), x2.size());
376   // tensor label (calculated on-the-fly): Gamma169
377   {
378     if (x2 == x3) {
379       std::unique_ptr<double[]> o2data(new double[out(2)->get_size(x5, x0, x4, x1)]);
380       std::fill_n(o2data.get(), out(2)->get_size(x5, x0, x4, x1), 0.0);
381       for (int ix1 = 0; ix1 != x1.size(); ++ix1) {
382         for (int ix3 = 0; ix3 != x3.size(); ++ix3) {
383           for (int ix4 = 0; ix4 != x4.size(); ++ix4) {
384             for (int ix0 = 0; ix0 != x0.size(); ++ix0) {
385               for (int ix5 = 0; ix5 != x5.size(); ++ix5) {
386                 o2data[ix5+x5.size()*(ix0+x0.size()*(ix4+x4.size()*(ix1)))] +=
387                   1.0 * i0data_sorted[ix5+x5.size()*(ix0+x0.size()*(ix4+x4.size()*(ix3+x3.size()*(ix3+x2.size()*(ix1)))))];
388               }
389             }
390           }
391         }
392       }
393       out(2)->add_block(o2data, x5, x0, x4, x1);
394     }
395   }
396   {
397     std::unique_ptr<double[]> o3data(new double[out(3)->get_size(x5, x0, x4, x3, x2, x1)]);
398     std::fill_n(o3data.get(), out(3)->get_size(x5, x0, x4, x3, x2, x1), 0.0);
399     sort_indices<0,1,2,3,4,5,1,1,1,1>(i0data_sorted, o3data, x5.size(), x0.size(), x4.size(), x3.size(), x2.size(), x1.size());
400     out(3)->add_block(o3data, x5, x0, x4, x3, x2, x1);
401   }
402 }
403 
compute()404 void Task561::Task_local::compute() {
405   const Index x3 = b(0);
406   const Index x4 = b(1);
407   const Index x5 = b(2);
408   const Index x0 = b(3);
409   const Index x1 = b(4);
410   const Index x2 = b(5);
411   // tensor label: I856
412   std::unique_ptr<double[]> odata(new double[out()->get_size(x3, x4, x5, x0, x1, x2)]);
413   std::fill_n(odata.get(), out()->get_size(x3, x4, x5, x0, x1, x2), 0.0);
414   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x3, x4, x5, x0, x1, x2)]);
415   std::fill_n(odata_sorted.get(), out()->get_size(x3, x4, x5, x0, x1, x2), 0.0);
416   for (auto& a1 : *range_[2]) {
417     // tensor label: v2
418     std::unique_ptr<double[]> i0data = in(0)->get_block(x0, a1, x1, x2);
419     std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x0, a1, x1, x2)]);
420     sort_indices<1,0,2,3,0,1,1,1>(i0data, i0data_sorted, x0.size(), a1.size(), x1.size(), x2.size());
421     // tensor label: l2
422     std::unique_ptr<double[]> i1data = in(1)->get_block(x5, a1, x4, x3);
423     std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(x5, a1, x4, x3)]);
424     sort_indices<1,0,2,3,0,1,1,2>(i1data, i1data_sorted, x5.size(), a1.size(), x4.size(), x3.size());
425     dgemm_("T", "N", x0.size()*x1.size()*x2.size(), x3.size()*x4.size()*x5.size(), a1.size(),
426            1.0, i0data_sorted, a1.size(), i1data_sorted, a1.size(),
427            1.0, odata_sorted, x0.size()*x1.size()*x2.size());
428   }
429   sort_indices<5,4,3,0,1,2,1,1,1,1>(odata_sorted, odata, x0.size(), x1.size(), x2.size(), x5.size(), x4.size(), x3.size());
430   out()->add_block(odata, x3, x4, x5, x0, x1, x2);
431 }
432 
compute()433 void Task562::Task_local::compute() {
434   const Index x5 = b(0);
435   const Index x2 = b(1);
436   const Index x4 = b(2);
437   const Index x3 = b(3);
438   const Index x1 = b(4);
439   const Index x0 = b(5);
440   // tensor label: I859
441   std::unique_ptr<double[]> i0data = in(0)->get_block(x3, x4, x5, x0, x1, x2);
442   std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x3, x4, x5, x0, x1, x2)]);
443   sort_indices<2,5,1,0,4,3,0,1,1,1>(i0data, i0data_sorted, x3.size(), x4.size(), x5.size(), x0.size(), x1.size(), x2.size());
444   // tensor label (calculated on-the-fly): Gamma161
445   {
446     if (x1 == x2) {
447       std::unique_ptr<double[]> o2data(new double[out(2)->get_size(x5, x0, x4, x3)]);
448       std::fill_n(o2data.get(), out(2)->get_size(x5, x0, x4, x3), 0.0);
449       for (int ix0 = 0; ix0 != x0.size(); ++ix0) {
450         for (int ix3 = 0; ix3 != x3.size(); ++ix3) {
451           for (int ix4 = 0; ix4 != x4.size(); ++ix4) {
452             for (int ix2 = 0; ix2 != x2.size(); ++ix2) {
453               for (int ix5 = 0; ix5 != x5.size(); ++ix5) {
454                 o2data[ix5+x5.size()*(ix0+x0.size()*(ix4+x4.size()*(ix3)))] +=
455                   1.0 * i0data_sorted[ix5+x5.size()*(ix2+x2.size()*(ix4+x4.size()*(ix3+x3.size()*(ix2+x1.size()*(ix0)))))];
456               }
457             }
458           }
459         }
460       }
461       out(2)->add_block(o2data, x5, x0, x4, x3);
462     }
463   }
464   {
465     if (x1 == x3) {
466       std::unique_ptr<double[]> o2data(new double[out(2)->get_size(x5, x2, x4, x0)]);
467       std::fill_n(o2data.get(), out(2)->get_size(x5, x2, x4, x0), 0.0);
468       for (int ix0 = 0; ix0 != x0.size(); ++ix0) {
469         for (int ix3 = 0; ix3 != x3.size(); ++ix3) {
470           for (int ix4 = 0; ix4 != x4.size(); ++ix4) {
471             for (int ix2 = 0; ix2 != x2.size(); ++ix2) {
472               for (int ix5 = 0; ix5 != x5.size(); ++ix5) {
473                 o2data[ix5+x5.size()*(ix2+x2.size()*(ix4+x4.size()*(ix0)))] +=
474                   1.0 * i0data_sorted[ix5+x5.size()*(ix2+x2.size()*(ix4+x4.size()*(ix3+x3.size()*(ix3+x1.size()*(ix0)))))];
475               }
476             }
477           }
478         }
479       }
480       out(2)->add_block(o2data, x5, x2, x4, x0);
481     }
482   }
483   {
484     std::unique_ptr<double[]> o3data(new double[out(3)->get_size(x5, x2, x4, x3, x1, x0)]);
485     std::fill_n(o3data.get(), out(3)->get_size(x5, x2, x4, x3, x1, x0), 0.0);
486     sort_indices<0,1,2,3,4,5,1,1,1,1>(i0data_sorted, o3data, x5.size(), x2.size(), x4.size(), x3.size(), x1.size(), x0.size());
487     out(3)->add_block(o3data, x5, x2, x4, x3, x1, x0);
488   }
489 }
490 
compute()491 void Task563::Task_local::compute() {
492   const Index x3 = b(0);
493   const Index x4 = b(1);
494   const Index x5 = b(2);
495   const Index x0 = b(3);
496   const Index x1 = b(4);
497   const Index x2 = b(5);
498   // tensor label: I859
499   std::unique_ptr<double[]> odata(new double[out()->get_size(x3, x4, x5, x0, x1, x2)]);
500   std::fill_n(odata.get(), out()->get_size(x3, x4, x5, x0, x1, x2), 0.0);
501   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x3, x4, x5, x0, x1, x2)]);
502   std::fill_n(odata_sorted.get(), out()->get_size(x3, x4, x5, x0, x1, x2), 0.0);
503   for (auto& a1 : *range_[2]) {
504     // tensor label: v2
505     std::unique_ptr<double[]> i0data = in(0)->get_block(x0, x1, x2, a1);
506     std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x0, x1, x2, a1)]);
507     sort_indices<3,0,1,2,0,1,1,1>(i0data, i0data_sorted, x0.size(), x1.size(), x2.size(), a1.size());
508     // tensor label: l2
509     std::unique_ptr<double[]> i1data = in(1)->get_block(x5, a1, x4, x3);
510     std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(x5, a1, x4, x3)]);
511     sort_indices<1,0,2,3,0,1,1,2>(i1data, i1data_sorted, x5.size(), a1.size(), x4.size(), x3.size());
512     dgemm_("T", "N", x0.size()*x1.size()*x2.size(), x3.size()*x4.size()*x5.size(), a1.size(),
513            1.0, i0data_sorted, a1.size(), i1data_sorted, a1.size(),
514            1.0, odata_sorted, x0.size()*x1.size()*x2.size());
515   }
516   sort_indices<5,4,3,0,1,2,1,1,1,1>(odata_sorted, odata, x0.size(), x1.size(), x2.size(), x5.size(), x4.size(), x3.size());
517   out()->add_block(odata, x3, x4, x5, x0, x1, x2);
518 }
519 
compute()520 void Task564::Task_local::compute() {
521   // tensor label: I816
522   std::unique_ptr<double[]> odata(new double[out()->get_size()]);
523   std::fill_n(odata.get(), out()->get_size(), 0.0);
524   // tensor label: I862
525   std::unique_ptr<double[]> i0data = in(0)->get_block();
526   odata[0] += -2.0 * i0data[0];
527   out()->add_block(odata);
528 }
529 
compute()530 void Task565::Task_local::compute() {
531   // tensor label: I862
532   std::unique_ptr<double[]> odata(new double[out()->get_size()]);
533   std::fill_n(odata.get(), out()->get_size(), 0.0);
534   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size()]);
535   std::fill_n(odata_sorted.get(), out()->get_size(), 0.0);
536   const Index c1 = b(0);
537   const Index a4 = b(1);
538   const Index c3 = b(2);
539   const Index a2 = b(3);
540   // tensor label: v2
541   std::unique_ptr<double[]> i0data = in(0)->get_block(c1, a4, c3, a2);
542   std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(c1, a4, c3, a2)]);
543   sort_indices<0,1,2,3,0,1,1,1>(i0data, i0data_sorted, c1.size(), a4.size(), c3.size(), a2.size());
544   // tensor label: l2
545   std::unique_ptr<double[]> i1data = in(1)->get_block(c1, a2, c3, a4);
546   std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(c1, a2, c3, a4)]);
547   sort_indices<0,3,2,1,0,1,-2,1>(i1data, i1data_sorted, c1.size(), a2.size(), c3.size(), a4.size());
548   odata_sorted[0] += ddot_(a4.size()*c3.size()*a2.size()*c1.size(), i0data_sorted, 1, i1data_sorted, 1);
549   sort_indices<1,1,1,1>(odata_sorted, odata);
550   out()->add_block(odata);
551 }
552 
compute()553 void Task566::Task_local::compute() {
554   // tensor label: I816
555   std::unique_ptr<double[]> odata(new double[out()->get_size()]);
556   std::fill_n(odata.get(), out()->get_size(), 0.0);
557   // tensor label: I865
558   std::unique_ptr<double[]> i0data = in(0)->get_block();
559   odata[0] += 4.0 * i0data[0];
560   out()->add_block(odata);
561 }
562 
compute()563 void Task567::Task_local::compute() {
564   // tensor label: I865
565   std::unique_ptr<double[]> odata(new double[out()->get_size()]);
566   std::fill_n(odata.get(), out()->get_size(), 0.0);
567   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size()]);
568   std::fill_n(odata_sorted.get(), out()->get_size(), 0.0);
569   const Index c1 = b(0);
570   const Index a2 = b(1);
571   const Index c3 = b(2);
572   const Index a4 = b(3);
573   // tensor label: v2
574   std::unique_ptr<double[]> i0data = in(0)->get_block(c1, a2, c3, a4);
575   std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(c1, a2, c3, a4)]);
576   sort_indices<0,1,2,3,0,1,1,1>(i0data, i0data_sorted, c1.size(), a2.size(), c3.size(), a4.size());
577   // tensor label: l2
578   std::unique_ptr<double[]> i1data = in(1)->get_block(c1, a2, c3, a4);
579   std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(c1, a2, c3, a4)]);
580   sort_indices<0,1,2,3,0,1,4,1>(i1data, i1data_sorted, c1.size(), a2.size(), c3.size(), a4.size());
581   odata_sorted[0] += ddot_(a4.size()*c3.size()*a2.size()*c1.size(), i0data_sorted, 1, i1data_sorted, 1);
582   sort_indices<1,1,1,1>(odata_sorted, odata);
583   out()->add_block(odata);
584 }
585 
compute()586 void Task568::Task_local::compute() {
587   const Index x1 = b(0);
588   const Index x0 = b(1);
589   // tensor label: I868
590   std::unique_ptr<double[]> i0data = in(0)->get_block(x1, x0);
591   std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x1, x0)]);
592   sort_indices<0,1,0,1,1,1>(i0data, i0data_sorted, x1.size(), x0.size());
593   // tensor label (calculated on-the-fly): Gamma148
594   {
595     std::unique_ptr<double[]> o1data(new double[out(1)->get_size(x1, x0)]);
596     std::fill_n(o1data.get(), out(1)->get_size(x1, x0), 0.0);
597     sort_indices<0,1,1,1,1,1>(i0data_sorted, o1data, x1.size(), x0.size());
598     out(1)->add_block(o1data, x1, x0);
599   }
600 }
601 
compute()602 void Task569::Task_local::compute() {
603   const Index x1 = b(0);
604   const Index x0 = b(1);
605   // tensor label: I868
606   std::unique_ptr<double[]> odata(new double[out()->get_size(x1, x0)]);
607   std::fill_n(odata.get(), out()->get_size(x1, x0), 0.0);
608   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x1, x0)]);
609   std::fill_n(odata_sorted.get(), out()->get_size(x1, x0), 0.0);
610   for (auto& a3 : *range_[2]) {
611     for (auto& c2 : *range_[0]) {
612       for (auto& a1 : *range_[2]) {
613         // tensor label: v2
614         std::unique_ptr<double[]> i0data = in(0)->get_block(x0, a3, c2, a1);
615         std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x0, a3, c2, a1)]);
616         sort_indices<1,2,3,0,0,1,1,1>(i0data, i0data_sorted, x0.size(), a3.size(), c2.size(), a1.size());
617         // tensor label: l2
618         std::unique_ptr<double[]> i1data = in(1)->get_block(x1, a1, c2, a3);
619         std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(x1, a1, c2, a3)]);
620         sort_indices<3,2,1,0,0,1,-1,1>(i1data, i1data_sorted, x1.size(), a1.size(), c2.size(), a3.size());
621         dgemm_("T", "N", x0.size(), x1.size(), a3.size()*c2.size()*a1.size(),
622                1.0, i0data_sorted, a3.size()*c2.size()*a1.size(), i1data_sorted, a3.size()*c2.size()*a1.size(),
623                1.0, odata_sorted, x0.size());
624       }
625     }
626   }
627   sort_indices<1,0,1,1,1,1>(odata_sorted, odata, x0.size(), x1.size());
628   out()->add_block(odata, x1, x0);
629 }
630 
compute()631 void Task570::Task_local::compute() {
632   const Index x1 = b(0);
633   const Index x0 = b(1);
634   // tensor label: I868
635   std::unique_ptr<double[]> odata(new double[out()->get_size(x1, x0)]);
636   std::fill_n(odata.get(), out()->get_size(x1, x0), 0.0);
637   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x1, x0)]);
638   std::fill_n(odata_sorted.get(), out()->get_size(x1, x0), 0.0);
639   for (auto& a1 : *range_[2]) {
640     for (auto& c2 : *range_[0]) {
641       for (auto& a3 : *range_[2]) {
642         // tensor label: v2
643         std::unique_ptr<double[]> i0data = in(0)->get_block(x0, a1, c2, a3);
644         std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x0, a1, c2, a3)]);
645         sort_indices<1,2,3,0,0,1,1,1>(i0data, i0data_sorted, x0.size(), a1.size(), c2.size(), a3.size());
646         // tensor label: l2
647         std::unique_ptr<double[]> i1data = in(1)->get_block(x1, a1, c2, a3);
648         std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(x1, a1, c2, a3)]);
649         sort_indices<1,2,3,0,0,1,2,1>(i1data, i1data_sorted, x1.size(), a1.size(), c2.size(), a3.size());
650         dgemm_("T", "N", x0.size(), x1.size(), a3.size()*c2.size()*a1.size(),
651                1.0, i0data_sorted, a3.size()*c2.size()*a1.size(), i1data_sorted, a3.size()*c2.size()*a1.size(),
652                1.0, odata_sorted, x0.size());
653       }
654     }
655   }
656   sort_indices<1,0,1,1,1,1>(odata_sorted, odata, x0.size(), x1.size());
657   out()->add_block(odata, x1, x0);
658 }
659 
compute()660 void Task571::Task_local::compute() {
661   const Index x1 = b(0);
662   const Index x0 = b(1);
663   // tensor label: I868
664   std::unique_ptr<double[]> odata(new double[out()->get_size(x1, x0)]);
665   std::fill_n(odata.get(), out()->get_size(x1, x0), 0.0);
666   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x1, x0)]);
667   std::fill_n(odata_sorted.get(), out()->get_size(x1, x0), 0.0);
668   for (auto& c2 : *range_[0]) {
669     for (auto& a1 : *range_[2]) {
670       // tensor label: h1
671       std::unique_ptr<double[]> i0data = in(0)->get_block(c2, a1);
672       std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(c2, a1)]);
673       sort_indices<0,1,0,1,1,1>(i0data, i0data_sorted, c2.size(), a1.size());
674       // tensor label: l2
675       std::unique_ptr<double[]> i1data = in(1)->get_block(x1, a1, c2, x0);
676       std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(x1, a1, c2, x0)]);
677       sort_indices<2,1,0,3,0,1,-1,1>(i1data, i1data_sorted, x1.size(), a1.size(), c2.size(), x0.size());
678       dgemm_("T", "N", 1, x0.size()*x1.size(), c2.size()*a1.size(),
679              1.0, i0data_sorted, c2.size()*a1.size(), i1data_sorted, c2.size()*a1.size(),
680              1.0, odata_sorted, 1);
681     }
682   }
683   sort_indices<0,1,1,1,1,1>(odata_sorted, odata, x1.size(), x0.size());
684   out()->add_block(odata, x1, x0);
685 }
686 
compute()687 void Task572::Task_local::compute() {
688   const Index x1 = b(0);
689   const Index x0 = b(1);
690   // tensor label: I868
691   std::unique_ptr<double[]> odata(new double[out()->get_size(x1, x0)]);
692   std::fill_n(odata.get(), out()->get_size(x1, x0), 0.0);
693   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x1, x0)]);
694   std::fill_n(odata_sorted.get(), out()->get_size(x1, x0), 0.0);
695   for (auto& c1 : *range_[0]) {
696     for (auto& a2 : *range_[2]) {
697       // tensor label: h1
698       std::unique_ptr<double[]> i0data = in(0)->get_block(c1, a2);
699       std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(c1, a2)]);
700       sort_indices<0,1,0,1,1,1>(i0data, i0data_sorted, c1.size(), a2.size());
701       // tensor label: l2
702       std::unique_ptr<double[]> i1data = in(1)->get_block(c1, a2, x1, x0);
703       std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(c1, a2, x1, x0)]);
704       sort_indices<0,1,2,3,0,1,2,1>(i1data, i1data_sorted, c1.size(), a2.size(), x1.size(), x0.size());
705       dgemm_("T", "N", 1, x0.size()*x1.size(), a2.size()*c1.size(),
706              1.0, i0data_sorted, a2.size()*c1.size(), i1data_sorted, a2.size()*c1.size(),
707              1.0, odata_sorted, 1);
708     }
709   }
710   sort_indices<0,1,1,1,1,1>(odata_sorted, odata, x1.size(), x0.size());
711   out()->add_block(odata, x1, x0);
712 }
713 
compute()714 void Task573::Task_local::compute() {
715   const Index x3 = b(0);
716   const Index x0 = b(1);
717   const Index x2 = b(2);
718   const Index x1 = b(3);
719   // tensor label: I874
720   std::unique_ptr<double[]> i0data = in(0)->get_block(x2, x3, x0, x1);
721   std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x2, x3, x0, x1)]);
722   sort_indices<1,2,0,3,0,1,1,1>(i0data, i0data_sorted, x2.size(), x3.size(), x0.size(), x1.size());
723   // tensor label (calculated on-the-fly): Gamma170
724   {
725     std::unique_ptr<double[]> o2data(new double[out(2)->get_size(x3, x0, x2, x1)]);
726     std::fill_n(o2data.get(), out(2)->get_size(x3, x0, x2, x1), 0.0);
727     sort_indices<0,1,2,3,1,1,1,1>(i0data_sorted, o2data, x3.size(), x0.size(), x2.size(), x1.size());
728     out(2)->add_block(o2data, x3, x0, x2, x1);
729   }
730 }
731 
compute()732 void Task574::Task_local::compute() {
733   const Index x2 = b(0);
734   const Index x3 = b(1);
735   const Index x0 = b(2);
736   const Index x1 = b(3);
737   // tensor label: I874
738   std::unique_ptr<double[]> odata(new double[out()->get_size(x2, x3, x0, x1)]);
739   std::fill_n(odata.get(), out()->get_size(x2, x3, x0, x1), 0.0);
740   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x2, x3, x0, x1)]);
741   std::fill_n(odata_sorted.get(), out()->get_size(x2, x3, x0, x1), 0.0);
742   for (auto& a1 : *range_[2]) {
743     for (auto& a2 : *range_[2]) {
744       // tensor label: v2
745       std::unique_ptr<double[]> i0data = in(0)->get_block(x0, a1, x1, a2);
746       std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x0, a1, x1, a2)]);
747       sort_indices<1,3,0,2,0,1,1,1>(i0data, i0data_sorted, x0.size(), a1.size(), x1.size(), a2.size());
748       // tensor label: l2
749       std::unique_ptr<double[]> i1data = in(1)->get_block(x3, a1, x2, a2);
750       std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(x3, a1, x2, a2)]);
751       sort_indices<1,3,0,2,0,1,1,1>(i1data, i1data_sorted, x3.size(), a1.size(), x2.size(), a2.size());
752       dgemm_("T", "N", x0.size()*x1.size(), x2.size()*x3.size(), a2.size()*a1.size(),
753              1.0, i0data_sorted, a2.size()*a1.size(), i1data_sorted, a2.size()*a1.size(),
754              1.0, odata_sorted, x0.size()*x1.size());
755     }
756   }
757   sort_indices<3,2,0,1,1,1,1,1>(odata_sorted, odata, x0.size(), x1.size(), x3.size(), x2.size());
758   out()->add_block(odata, x2, x3, x0, x1);
759 }
760 
compute()761 void Task575::Task_local::compute() {
762   const Index x2 = b(0);
763   const Index x3 = b(1);
764   const Index x0 = b(2);
765   const Index x1 = b(3);
766   // tensor label: I874
767   std::unique_ptr<double[]> odata(new double[out()->get_size(x2, x3, x0, x1)]);
768   std::fill_n(odata.get(), out()->get_size(x2, x3, x0, x1), 0.0);
769   std::unique_ptr<double[]> odata_sorted(new double[out()->get_size(x2, x3, x0, x1)]);
770   std::fill_n(odata_sorted.get(), out()->get_size(x2, x3, x0, x1), 0.0);
771   for (auto& a1 : *range_[2]) {
772     // tensor label: h1
773     std::unique_ptr<double[]> i0data = in(0)->get_block(x0, a1);
774     std::unique_ptr<double[]> i0data_sorted(new double[in(0)->get_size(x0, a1)]);
775     sort_indices<1,0,0,1,1,1>(i0data, i0data_sorted, x0.size(), a1.size());
776     // tensor label: l2
777     std::unique_ptr<double[]> i1data = in(1)->get_block(x3, a1, x2, x1);
778     std::unique_ptr<double[]> i1data_sorted(new double[in(1)->get_size(x3, a1, x2, x1)]);
779     sort_indices<1,0,2,3,0,1,1,1>(i1data, i1data_sorted, x3.size(), a1.size(), x2.size(), x1.size());
780     dgemm_("T", "N", x0.size(), x1.size()*x2.size()*x3.size(), a1.size(),
781            1.0, i0data_sorted, a1.size(), i1data_sorted, a1.size(),
782            1.0, odata_sorted, x0.size());
783   }
784   sort_indices<2,1,0,3,1,1,1,1>(odata_sorted, odata, x0.size(), x3.size(), x2.size(), x1.size());
785   out()->add_block(odata, x2, x3, x0, x1);
786 }
787 
788 #endif
789