1 #ifndef _MULTIPLIER_H_
2 #define _MULTIPLIER_H_
3
4 #include "CombBLAS/CombBLAS.h"
5 #include "CCGrid.h"
6 #include "SUMMALayer.h"
7
8 namespace combblas {
9
10 template <typename IT, typename NT>
multiply(SpDCCols<IT,NT> & splitA,SpDCCols<IT,NT> & splitB,CCGrid & CMG,bool isBT,bool threaded)11 SpDCCols<IT, NT>* multiply(SpDCCols<IT, NT> & splitA, SpDCCols<IT, NT> & splitB, CCGrid & CMG, bool isBT, bool threaded)
12 {
13
14 comm_bcast = 0, comm_reduce = 0, comp_summa = 0, comp_reduce = 0, comp_result =0, comp_reduce_layer=0;
15 int myrank;
16 MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
17 std::vector< SpTuples<IT,NT>* > unreducedC;
18
19 MPI_Barrier(MPI_COMM_WORLD);
20 double time_beg = MPI_Wtime();
21
22 SUMMALayer(splitA, splitB, unreducedC, CMG, isBT, threaded);
23
24 MPI_Barrier(MPI_COMM_WORLD);
25 double time_mid = MPI_Wtime();
26
27 SpDCCols<IT,NT> * mergedC;
28 mergedC = ReduceAll_threaded(unreducedC, CMG);
29 MPI_Barrier(MPI_COMM_WORLD);
30 double time_end = MPI_Wtime();
31 double time_total = time_end-time_beg;
32
33 /*
34 int64_t local_nnz = mergedC->getnnz();
35 int64_t global_nnz = 0;
36
37 MPI_Reduce(&local_nnz, &global_nnz, 1, MPIType<int64_t>(), MPI_SUM, 0, MPI_COMM_WORLD);
38 if(myrank == 0)
39 {
40 cout << "Global nonzeros in C is " << global_nnz << endl;
41 }
42 */
43
44 int nthreads;
45 #pragma omp parallel
46 {
47 nthreads = omp_get_num_threads();
48 }
49 if(CMG.myrank == 0)
50 {
51 double time_other = time_total - (comm_bcast + comm_reduce + comp_summa + comp_reduce + comp_reduce_layer + comp_result);
52 //printf(" ----------------------------------------------------------------------------------------------\n");
53 //printf(" comm_bcast comm_scatter comp_summa comp_merge comp_scatter comp_result other total\n");
54 //printf(" ----------------------------------------------------------------------------------------------\n");
55
56 //printf("%10lf %12lf %12lf %10lf %12lf %12lf %12lf %10lf\n\n", comm_bcast, comm_reduce, comp_summa, comp_reduce, comp_reduce_layer, comp_result, time_other, time_total);
57 printf("%4d %4d %5d %6d %10lf %12lf %12lf %10lf %12lf %12lf %12lf %10lf\n", CMG.GridRows, CMG.GridCols, CMG.GridLayers, nthreads, comm_bcast, comm_reduce, comp_summa, comp_reduce, comp_reduce_layer, comp_result, time_other, time_total);
58 }
59
60 return mergedC;
61 }
62
63 }
64
65 #endif
66