1 /*------------------------------------------------------------------------------------------------*
2  * Copyright (C) by the DBCSR developers group - All rights reserved                              *
3  * This file is part of the DBCSR library.                                                        *
4  *                                                                                                *
5  * For information on the license, see the LICENSE file.                                          *
6  * For further information please visit https://dbcsr.cp2k.org                                    *
7  * SPDX-License-Identifier: GPL-2.0+                                                              *
8  *------------------------------------------------------------------------------------------------*/
9 
10 #include <vector>
11 #include <string>
12 #include <iostream>
13 #include <algorithm>
14 #include <cstdlib>
15 #include <cstdio>
16 #include <functional>
17 #include <cstdint>
18 #include <random>
19 #include <mpi.h>
20 #include <dbcsr.h>
21 #include <tensors/dbcsr_tensor.h>
22 #include <complex.h>
23 
24 const int dbcsr_type_real_4 = 1;
25 const int dbcsr_type_real_8 = 3;
26 const int dbcsr_type_complex_4 = 5;
27 const int dbcsr_type_complex_8 = 7;
28 
29 //-------------------------------------------------------------------------------------------------!
30 // Testing the tensor contraction (13|2)x(54|21)=(3|45)
31 // and several other functions, to make sure there are not any segmentation faults
32 //-------------------------------------------------------------------------------------------------!
33 
34 std::random_device rd;
35 std::mt19937 gen(rd());
36 std::uniform_real_distribution<> dis(-1.0, 1.0);
37 
38 template <typename T>
get_rand_real()39 T get_rand_real() {
40 
41     return dis(gen);
42 
43 }
44 
45 template <typename T>
get_rand_complex()46 T get_rand_complex() {
47 
48     return dis(gen) + dis(gen) * I;
49 
50 }
51 
random_dist(int dist_size,int nbins)52 std::vector<int> random_dist(int dist_size, int nbins)
53 {
54 
55     std::vector<int> dist(dist_size);
56 
57     for(int i=0; i < dist_size; i++)
58         dist[i] = i % nbins;
59 
60     return dist;
61 }
62 
63 template <typename T>
printvec(T & v)64 void printvec(T& v) {
65 
66     for (auto i : v) {
67         std::cout << i << " ";
68     }
69     std::cout << '\n' << std::endl;
70 
71 }
72 
73 template <typename T>
fill_random(void * tensor,std::vector<std::vector<int>> nzblocks,std::function<T ()> & rand_func)74 void fill_random(void* tensor, std::vector<std::vector<int>> nzblocks,
75     std::function<T()>& rand_func) {
76 
77     int myrank, mpi_size;
78     int dim = nzblocks.size();
79 
80     MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
81     MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
82 
83     if (myrank == 0) std::cout << "Filling Tensor..." << std::endl;
84     if (myrank == 0) std::cout << "Dimension: " << dim << std::endl;
85 
86     int nblocks = nzblocks[0].size();
87     std::vector<std::vector<int>> mynzblocks(dim);
88     std::vector<int> idx(dim);
89 
90     for (int i = 0; i != nblocks; ++i) {
91 
92         // make index out of nzblocks
93         for (int j = 0; j != dim; ++j) idx[j] = nzblocks[j][i];
94 
95         int proc = -1;
96 
97         c_dbcsr_t_get_stored_coordinates(tensor, idx.data(), &proc);
98 
99         if (proc == myrank) {
100             for (int j = 0; j != dim; ++j)
101                 mynzblocks[j].push_back(idx[j]);
102         }
103 
104     }
105 
106     std::vector<int*> dataptr(4, nullptr);
107 
108     for (int i = 0; i != dim; ++i) {
109         dataptr[i] = mynzblocks[i].size() == 0 ? nullptr : &mynzblocks[i][0];
110     }
111 
112     if (myrank == 0) std::cout << "Reserving blocks..." << std::endl;
113 
114     if (mynzblocks[0].size() != 0)
115     c_dbcsr_t_reserve_blocks_index(tensor, mynzblocks[0].size(), dataptr[0], dataptr[1], dataptr[2], dataptr[3]);
116 
117     auto fill_rand = [&](std::vector<T>& blk) {
118         for (T& e : blk) {
119             e = rand_func();
120             //std::cout << e << std::endl;
121         }
122     };
123 
124     void* iter = nullptr;
125 
126     c_dbcsr_t_iterator_start(&iter, tensor);
127 
128     std::vector<int> loc_idx(dim);
129     std::vector<int> blk_sizes(dim);
130     std::vector<T> block(1);
131 
132     int blk = 0;
133     int blk_proc = 0;
134 
135     while(c_dbcsr_t_iterator_blocks_left(iter)) {
136 
137         c_dbcsr_t_iterator_next_block(iter, loc_idx.data(), &blk, &blk_proc, blk_sizes.data(), nullptr);
138 
139         int tot = 1;
140         for (int i = 0; i != dim; ++i) {
141             tot *= blk_sizes[i];
142         }
143 
144         block.resize(tot);
145 
146         fill_rand(block);
147 
148         c_dbcsr_t_put_block(tensor, loc_idx.data(), blk_sizes.data(), block.data(), nullptr, nullptr);
149 
150     }
151 
152     c_dbcsr_t_iterator_stop(&iter);
153 
154     c_dbcsr_t_finalize(tensor);
155 
156 }
157 
158 
main(int argc,char * argv[])159 int main(int argc, char* argv[])
160 {
161     MPI_Init(&argc, &argv);
162 
163     int mpi_size, mpi_rank;
164     MPI_Comm_size(MPI_COMM_WORLD, &mpi_size);
165     MPI_Comm_rank(MPI_COMM_WORLD, &mpi_rank);
166 
167     c_dbcsr_init_lib(MPI_COMM_WORLD, nullptr);
168 
169     void* pgrid_3d = nullptr;
170     void* pgrid_4d = nullptr;
171 
172     std::vector<int> dims4(4);
173     std::vector<int> dims3(3);
174 
175     MPI_Fint fcomm = MPI_Comm_c2f(MPI_COMM_WORLD);
176 
177     c_dbcsr_t_pgrid_create(&fcomm, dims3.data(), dims3.size(), &pgrid_3d, nullptr);
178 
179     c_dbcsr_t_pgrid_create(&fcomm, dims4.data(), dims4.size(), &pgrid_4d, nullptr);
180 
181     if (mpi_rank == 0) {
182 
183         std::cout << "pgrid3-dimensions:" << std::endl;
184         printvec(dims3);
185 
186         std::cout << "pgrid4-dimensions:" << std::endl;
187         printvec(dims4);
188     }
189 
190 
191     // block sizes
192     std::vector<int> blk1, blk2, blk3, blk4, blk5;
193     // blk indices of non-zero blocks
194     std::vector<int> nz11, nz12, nz13, nz21, nz22, nz24, nz25, nz33, nz34, nz35;
195 
196     blk1 = {3, 9, 12, 1};
197     blk2 = {4, 2, 3, 1, 9, 2, 32, 10, 5, 8, 7};
198     blk3 = {7, 3, 8, 7, 9, 5, 10, 23, 2};
199     blk4 = {8, 1, 4, 13, 6};
200     blk5 = {4, 2, 22};
201 
202     nz11 = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
203             0, 1, 1, 1, 1, 1, 1, 1, 2, 2,
204             2, 2, 2, 2, 2, 2, 2, 3, 3, 3,
205             3, 3};
206     nz12 = {2, 4, 4, 4, 5, 5, 6, 7, 9,10,10,
207             0, 0, 3, 6, 6, 8, 9 ,1, 1, 4, 5,
208             7, 7, 8,10,10, 1 ,3, 4, 4, 7};
209     nz13 = {6, 2, 4, 8, 5, 7, 1, 7, 2, 1, 2,
210             0, 3, 5, 1, 6, 4, 7, 2, 6, 0, 3,
211             2, 6, 7, 4, 7, 8, 5, 0, 1, 6};
212 
213     nz21 = { 0, 0, 0, 0, 0, 1, 1, 1,  1,  1,
214              1, 1, 1, 1, 1, 1, 1, 1,  1,  1,
215              2, 2, 2, 2, 2, 2, 2, 2,  2,  2,
216              3, 3, 3, 3, 3, 3 };
217     nz22 = { 0, 2, 3, 5, 9,  1, 1, 3,  4,  4,
218              5, 5, 5, 6,  6,  8, 8, 8, 9, 10,
219              0, 2, 2, 3,  4,  5, 7, 8, 10, 10,
220              0, 2, 3, 5, 9, 10 };
221     nz24 = { 2, 4, 1, 2,  1,  2, 4, 0,  0,  3,
222              1, 2, 3, 0,  3,  2, 3, 3,  1,  0,
223              2, 0, 0, 2,  3,  2, 3, 1,  1,  2,
224              0, 0, 2, 1,  4,  4 };
225     nz25 = { 0, 2, 1, 0,  0,  1, 2,  0,  2, 0,
226              1, 2, 1, 0,  2,  1, 2,  1,  0, 1,
227              2, 0, 1, 2,  1,  1, 1,  2,  0, 1,
228              0, 2, 1, 0,  2,  1 };
229 
230     nz33 = { 1, 3, 4, 4, 4, 5, 5, 7 };
231     nz34 = { 2, 1, 0, 0, 2, 1, 3, 4 };
232     nz35 = { 2, 1, 0, 1, 2, 1, 0, 0 };
233 
234     // (13|2)x(54|21)=(3|45)
235     // distribute blocks
236     std::vector<int> dist11 = random_dist(blk1.size(), dims3[0]);
237     std::vector<int> dist12 = random_dist(blk2.size(), dims3[1]);
238     std::vector<int> dist13 = random_dist(blk3.size(), dims3[2]);
239 
240     std::vector<int> dist21 = random_dist(blk1.size(), dims4[0]);
241     std::vector<int> dist22 = random_dist(blk2.size(), dims4[1]);
242     std::vector<int> dist23 = random_dist(blk4.size(), dims4[2]);
243     std::vector<int> dist24 = random_dist(blk5.size(), dims4[3]);
244 
245     std::vector<int> dist31 = random_dist(blk3.size(), dims3[0]);
246     std::vector<int> dist32 = random_dist(blk4.size(), dims3[1]);
247     std::vector<int> dist33 = random_dist(blk5.size(), dims3[2]);
248 
249 
250     if (mpi_rank == 0) {
251 
252         std::cout << "dist11:" << std::endl;
253         printvec(dist11);
254 
255         std::cout << "dist12:" << std::endl;
256         printvec(dist12);
257 
258         std::cout << "dist13:" << std::endl;
259         printvec(dist13);
260 
261         std::cout << "dist21:" << std::endl;
262         printvec(dist21);
263 
264         std::cout << "dist22:" << std::endl;
265         printvec(dist22);
266 
267         std::cout << "dist23:" << std::endl;
268         printvec(dist23);
269 
270         std::cout << "dist24:" << std::endl;
271         printvec(dist24);
272 
273         std::cout << "dist31:" << std::endl;
274         printvec(dist31);
275 
276         std::cout << "dist32:" << std::endl;
277         printvec(dist32);
278 
279         std::cout << "dist33:" << std::endl;
280         printvec(dist33);
281 
282     }
283 
284     void* dist1 = nullptr;
285     void* dist2 = nullptr;
286     void* dist3 = nullptr;
287 
288     // (13|2)x(54|21)=(3|45)
289     std::vector<int> map11, map12, map21, map22, map31, map32;
290 
291     map11 = {0, 2};
292     map12 = {1};
293     map21 = {3, 2};
294     map22 = {1, 0};
295     map31 = {0};
296     map32 = {1, 2};
297 
298     if (mpi_rank == 0) std::cout << "Creating dist objects..." << '\n' << std::endl;
299 
300     // create distribution objects
301     c_dbcsr_t_distribution_new(&dist1, pgrid_3d, dist11.data(), dist11.size(),
302         dist12.data(), dist12.size(), dist13.data(), dist13.size(), nullptr, 0);
303 
304     c_dbcsr_t_distribution_new(&dist2, pgrid_4d, dist21.data(), dist21.size(),
305         dist22.data(), dist22.size(), dist23.data(), dist23.size(),
306         dist24.data(), dist24.size());
307 
308     c_dbcsr_t_distribution_new(&dist3, pgrid_3d, dist31.data(), dist31.size(),
309         dist32.data(), dist32.size(), dist33.data(), dist33.size(), nullptr, 0);
310 
311     // create tensors
312     // (13|2)x(54|21)=(3|45)
313 
314     void* tensor1 = nullptr;
315     void* tensor2 = nullptr;
316     void* tensor3 = nullptr;
317 
318     if (mpi_rank == 0) std::cout << "Creating tensors..." << std::endl;
319 
320     c_dbcsr_t_create_new(&tensor1, "(13|2)", dist1, map11.data(), map11.size(), map12.data(), map12.size(), nullptr, blk1.data(),
321         blk1.size(), blk2.data(), blk2.size(), blk3.data(), blk3.size(), nullptr, 0);
322 
323     c_dbcsr_t_create_new(&tensor2, "(54|21)", dist2, map21.data(), map21.size(), map22.data(), map22.size(), nullptr, blk1.data(),
324         blk1.size(), blk2.data(), blk2.size(), blk4.data(), blk4.size(), blk5.data(), blk5.size());
325 
326     c_dbcsr_t_create_new(&tensor3, "(3|45)", dist3, map31.data(), map31.size(), map32.data(), map32.size(), nullptr, blk3.data(),
327         blk3.size(), blk4.data(), blk4.size(), blk5.data(), blk5.size(), nullptr, 0);
328 
329     // fill the tensors
330 
331     std::function<double()> drand = get_rand_real<double>;
332 
333     if (mpi_rank == 0) std::cout << "Tensor 1" << '\n' << std::endl;
334     fill_random<double>(tensor1, {nz11, nz12, nz13}, drand);
335     if (mpi_rank == 0) std::cout << "Tensor 2" << '\n' << std::endl;
336     fill_random<double>(tensor2, {nz21, nz22, nz24, nz25}, drand);
337     if (mpi_rank == 0) std::cout << "Tensor 3" << '\n' << std::endl;
338     fill_random<double>(tensor3, {nz33, nz34, nz35}, drand);
339 
340     // contracting
341 
342     // (13|2)x(54|21)=(3|45)
343 
344     if (mpi_rank == 0) std::cout << "Contracting..." << std::endl;
345 
346     // cn : indices to be contracted
347     // noncn : indices not to be contracted
348     // mapn : how nonc indices map to tensor 3
349     std::vector<int> c1, nonc1, c2, nonc2, map1, map2;
350     c1         = {0,1};
351     nonc1     = {2};
352     c2         = {0,1};
353     nonc2    = {2,3};
354     map1    = {0};
355     map2    = {1,2};
356 
357 
358     int unit_nr = -1;
359     if (mpi_rank == 0) unit_nr = 6;
360     bool log_verbose = true;
361 
362     // tensor_3(map_1, map_2) := 0.2 * tensor_1(notcontract_1, contract_1)
363     //                                 * tensor_2(contract_2, notcontract_2)
364     //                                 + 0.8 * tensor_3(map_1, map_2)
365 
366     c_dbcsr_t_contract_r_dp (0.2, tensor1, tensor2, 0.8, tensor3, c1.data(), c1.size(), nonc1.data(), nonc1.size(),
367                                     c2.data(), c2.size(), nonc2.data(), nonc2.size(), map1.data(), map1.size(), map2.data(),
368                                     map2.size(), nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr,
369                                     nullptr, nullptr, nullptr, &unit_nr, &log_verbose);
370 
371     // ====================================================
372     // ====== TESTING OTHER FUNCTIONS =============
373     // ====================================================
374 
375     // ======== GET_INFO ===========
376 
377     std::vector<int> cnblkstot(3), nfulltot(3), cnblksloc(3), nfullloc(3), pdims(3), ploc(3);
378 
379     char* name = nullptr;
380     int data_type = 0;
381 
382     std::vector<int> nblksloc(3);
383     std::vector<int> nblkstot(3);
384 
385     for (int i = 0; i != 3; ++i) {
386         nblksloc[i] = c_dbcsr_t_nblks_local(tensor1,i);
387         nblkstot[i] = c_dbcsr_t_nblks_total(tensor1,i);
388     }
389 
390     std::vector<std::vector<int>> c_blks_local(3);
391     std::vector<std::vector<int>> c_proc_dist(3);
392     std::vector<std::vector<int>> c_blk_size(3);
393     std::vector<std::vector<int>> c_blk_offset(3);
394     for (int i = 0; i != 3; ++i) {
395         c_blks_local[i].resize(nblksloc[i]);
396         c_proc_dist[i].resize(nblkstot[i]);
397         c_blk_size[i].resize(nblkstot[i]);
398         c_blk_offset[i].resize(nblkstot[i]);
399     }
400 
401     c_dbcsr_t_get_info(tensor1, 3, cnblkstot.data(), nfulltot.data(),cnblksloc.data(),
402                                nfullloc.data(), pdims.data(), ploc.data(),
403                                nblksloc[0],nblksloc[1],nblksloc[2],0,
404                                nblkstot[0],nblkstot[1],nblkstot[2],0,
405                                c_blks_local[0].data(), c_blks_local[1].data(), c_blks_local[2].data(), nullptr,
406                                c_proc_dist[0].data(), c_proc_dist[1].data(), c_proc_dist[2].data(), nullptr,
407                                c_blk_size[0].data(), c_blk_size[1].data(), c_blk_size[2].data(), nullptr,
408                                c_blk_offset[0].data(), c_blk_offset[1].data(), c_blk_offset[2].data(), nullptr,
409                                nullptr, &name, &data_type);
410 
411     std::string tname(name);
412 
413     if (mpi_rank == 0) {
414         std::cout << "Testing get_info for Tensor 1..." << std::endl;
415         std::cout << "Name: " << tname << std::endl;
416         std::cout << "Data_type: " << data_type << std::endl;
417     }
418 
419     for (int rank = 0; rank != mpi_size; ++rank) {
420         if (rank == mpi_rank) {
421             std::cout << "======= Process: " << rank << " ========" << std::endl;
422 
423             std::cout << "Total number of blocks:" << std::endl;
424             printvec(cnblkstot);
425 
426             std::cout << "Total number of elements:" << std::endl;
427             printvec(nfulltot);
428 
429             std::cout << "Total number of local blocks:" << std::endl;
430             printvec(cnblksloc);
431 
432             std::cout << "Total number of local elements:" << std::endl;
433             printvec(nfullloc);
434 
435             std::cout << "Pgrid dimensions:" << std::endl;
436             printvec(pdims);
437 
438             std::cout << "Process coordinates:" << std::endl;
439             printvec(ploc);
440 
441             std::cout << "blks_local:" << std::endl;
442             for (int i = 0; i != 3; ++i) {
443                 printvec(c_blks_local[i]);
444             }
445 
446             std::cout << "proc_dist:" << std::endl;
447             for (int i = 0; i != 3; ++i) {
448                 printvec(c_proc_dist[i]);
449             }
450 
451             std::cout << "blk_size:" << std::endl;
452             for (int i = 0; i != 3; ++i) {
453                 printvec(c_blk_size[i]);
454             }
455 
456             std::cout << "blk_offset:" << std::endl;
457             for (int i = 0; i != 3; ++i) {
458                 printvec(c_blk_offset[i]);
459             }
460 
461         }
462 
463         MPI_Barrier(MPI_COMM_WORLD);
464 
465     }
466 
467     // ================ GET_MAPPING_INFO ======================
468 
469     int ndim_nd = 0, ndim1_2d = 0, ndim2_2d = 0;
470     std::vector<long long int> dims_2d_i8(2);
471     std::vector<int> dims_2d(2);
472 
473     int nd_size = 3;
474     int nd_row_size = c_dbcsr_t_ndims_matrix_row(tensor1);
475     int nd_col_size = c_dbcsr_t_ndims_matrix_column(tensor1);
476 
477     std::vector<int> dims_nd(nd_size), dims1_2d(nd_row_size), dims2_2d(nd_col_size),
478         map1_2d(nd_row_size), map2_2d(nd_col_size), map_nd(nd_size);
479 
480     int base;
481     bool col_major;
482 
483     c_dbcsr_t_get_mapping_info(tensor1, 3, nd_row_size, nd_col_size, &ndim_nd, &ndim1_2d, &ndim2_2d,
484                         dims_2d_i8.data(), dims_2d.data(), dims_nd.data(),
485                         dims1_2d.data(), dims2_2d.data(),
486                         map1_2d.data(), map2_2d.data(),
487                         map_nd.data(), &base, &col_major);
488 
489      if (mpi_rank == 0) {
490         std::cout << "Testing get_mapping_info for Tensor 1..." << std::endl;
491 
492         std::cout << "ndim_nd = " << ndim_nd << std::endl;
493         std::cout << "ndim1_2d = " << ndim1_2d << std::endl;
494         std::cout << "ndim2_2d = " << ndim2_2d << std::endl;
495 
496         std::cout << "dims_2d_i8: ";
497         printvec(dims_2d_i8);
498 
499         std::cout << "dims_2d: ";
500         printvec(dims_2d);
501 
502         std::cout << "dims_nd: " << std::endl;
503         printvec(dims_nd);
504 
505         std::cout << "dims1_2d: " << std::endl;
506         printvec(dims1_2d);
507 
508         std::cout << "dims2_2d: " << std::endl;
509         printvec(dims2_2d);
510 
511         std::cout << "map1_2d: " << std::endl;
512         printvec(map1_2d);
513 
514         std::cout << "map2_2d: " << std::endl;
515         printvec(map2_2d);
516 
517         std::cout << "map_nd: " << std::endl;
518         printvec(map_nd);
519 
520         std::cout << "Base: " << base << std::endl;
521         std::cout << "col_major " << col_major << std::endl;
522 
523     }
524 
525     // ======== TESTING contract_index ================
526 
527     long long int rsize = c_dbcsr_t_max_nblks_local(tensor3);
528     std::vector<int> result_index(rsize*3);
529     int nblks_loc = 0;
530 
531     if (mpi_rank == 0) std::cout << "\n" << "Testing c_dbcsr_t_contract_index...\n" << std::endl;
532     c_dbcsr_t_contract_index_r_dp (0.2, tensor1, tensor2, 0.8, tensor3, c1.data(), c1.size(), nonc1.data(), nonc1.size(),
533                                     c2.data(), c2.size(), nonc2.data(), nonc2.size(), map1.data(), map1.size(), map2.data(),
534                                     map2.size(), nullptr, nullptr, nullptr, nullptr, &nblks_loc, result_index.data(), rsize, 3);
535 
536     for (int ip = 0; ip != mpi_size; ++ip) {
537         if (ip == mpi_rank) {
538 
539             std::cout << "Result Indices on Rank " << ip << std::endl;
540 
541             for (int i = 0; i != nblks_loc; ++i) {
542                 for (int n = 0; n != 3; ++n) {
543                     std::cout << result_index[i + n*rsize] << " ";
544                 } std::cout << std::endl;
545             }
546         }
547 
548         MPI_Barrier(MPI_COMM_WORLD);
549 
550     }
551 
552     c_dbcsr_t_destroy(&tensor1);
553     c_dbcsr_t_destroy(&tensor2);
554     c_dbcsr_t_destroy(&tensor3);
555 
556     c_dbcsr_t_distribution_destroy(&dist1);
557     c_dbcsr_t_distribution_destroy(&dist2);
558     c_dbcsr_t_distribution_destroy(&dist3);
559 
560     c_dbcsr_t_pgrid_destroy(&pgrid_3d, nullptr);
561     c_dbcsr_t_pgrid_destroy(&pgrid_4d, nullptr);
562 
563     c_free_string(&name);
564 
565     c_dbcsr_finalize_lib();
566 
567     MPI_Finalize();
568 
569     return 0;
570 }
571