1 #ifndef _MULTIPLIER_H_
2 #define _MULTIPLIER_H_
3 
4 #include "CombBLAS/CombBLAS.h"
5 #include "CCGrid.h"
6 #include "SUMMALayer.h"
7 
8 namespace combblas {
9 
10 template <typename IT, typename NT>
multiply(SpDCCols<IT,NT> & splitA,SpDCCols<IT,NT> & splitB,CCGrid & CMG,bool isBT,bool threaded)11 SpDCCols<IT, NT>* multiply(SpDCCols<IT, NT> & splitA, SpDCCols<IT, NT> & splitB, CCGrid & CMG, bool isBT, bool threaded)
12 {
13 
14     comm_bcast = 0, comm_reduce = 0, comp_summa = 0, comp_reduce = 0, comp_result =0, comp_reduce_layer=0;
15     int myrank;
16     MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
17     std::vector< SpTuples<IT,NT>* > unreducedC;
18 
19     MPI_Barrier(MPI_COMM_WORLD);
20     double time_beg = MPI_Wtime();
21 
22     SUMMALayer(splitA, splitB, unreducedC, CMG, isBT, threaded);
23 
24     MPI_Barrier(MPI_COMM_WORLD);
25     double time_mid = MPI_Wtime();
26 
27     SpDCCols<IT,NT> * mergedC;
28     mergedC = ReduceAll_threaded(unreducedC, CMG);
29     MPI_Barrier(MPI_COMM_WORLD);
30     double time_end = MPI_Wtime();
31     double time_total = time_end-time_beg;
32 
33     /*
34     int64_t local_nnz = mergedC->getnnz();
35     int64_t global_nnz = 0;
36 
37     MPI_Reduce(&local_nnz, &global_nnz, 1, MPIType<int64_t>(), MPI_SUM, 0, MPI_COMM_WORLD);
38     if(myrank == 0)
39     {
40         cout << "Global nonzeros in C is " << global_nnz << endl;
41     }
42      */
43 
44     int nthreads;
45 #pragma omp parallel
46     {
47         nthreads = omp_get_num_threads();
48     }
49     if(CMG.myrank == 0)
50     {
51         double time_other = time_total - (comm_bcast + comm_reduce + comp_summa + comp_reduce + comp_reduce_layer + comp_result);
52         //printf(" ----------------------------------------------------------------------------------------------\n");
53         //printf(" comm_bcast   comm_scatter comp_summa comp_merge  comp_scatter  comp_result     other      total\n");
54         //printf(" ----------------------------------------------------------------------------------------------\n");
55 
56         //printf("%10lf %12lf %12lf %10lf %12lf %12lf %12lf %10lf\n\n", comm_bcast, comm_reduce, comp_summa, comp_reduce, comp_reduce_layer, comp_result, time_other, time_total);
57         printf("%4d %4d %5d %6d %10lf %12lf %12lf %10lf %12lf %12lf %12lf %10lf\n", CMG.GridRows, CMG.GridCols, CMG.GridLayers, nthreads, comm_bcast, comm_reduce, comp_summa, comp_reduce, comp_reduce_layer, comp_result, time_other, time_total);
58     }
59 
60     return mergedC;
61 }
62 
63 }
64 
65 #endif
66