1 #include <mpi.h>
2 #include <sys/time.h>
3 #include <iostream>
4 #include <functional>
5 #include <algorithm>
6 #include <vector>
7 #include <string>
8 #include <sstream>
9 #include <stdint.h>
10 #include <cmath>
11 #include "CombBLAS/CombBLAS.h"
12 #include "Glue.h"
13 #include "CCGrid.h"
14 #include "Reductions.h"
15 #include "Multiplier.h"
16 #include "SplitMatDist.h"
17
18 using namespace std;
19 using namespace combblas;
20
21 double comm_bcast;
22 double comm_reduce;
23 double comp_summa;
24 double comp_reduce;
25 double comp_result;
26 double comp_reduce_layer;
27 double comp_split;
28 double comp_trans;
29 double comm_split;
30
31 #define ITERS 1
32
main(int argc,char * argv[])33 int main(int argc, char *argv[])
34 {
35 int provided;
36 //MPI_Init_thread(&argc, &argv, MPI_THREAD_SINGLE, &provided);
37
38
39 MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
40 if (provided < MPI_THREAD_SERIALIZED)
41 {
42 printf("ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
43 MPI_Abort(MPI_COMM_WORLD, 1);
44 }
45
46
47 int nprocs, myrank;
48 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
49 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
50
51 if(argc != 8)
52 {
53 cout << argc << endl;
54 if(myrank == 0)
55 {
56 printf("Usage (input): ./mpipspgemm <GridRows> <GridCols> <Layers> <matA> <matB> <matC> <algo>\n");
57 printf("Example: ./mpipspgemm 4 4 2 matA.mtx matB.mtx matB.mtx threaded\n");
58 printf("algo: outer | column | threaded \n");
59 }
60 return -1;
61 }
62
63 unsigned GRROWS = (unsigned) atoi(argv[1]);
64 unsigned GRCOLS = (unsigned) atoi(argv[2]);
65 unsigned C_FACTOR = (unsigned) atoi(argv[3]);
66 CCGrid CMG(C_FACTOR, GRCOLS);
67
68 if(GRROWS != GRCOLS)
69 {
70 SpParHelper::Print("This version of the Combinatorial BLAS only works on a square logical processor grid\n");
71 MPI_Barrier(MPI_COMM_WORLD);
72 MPI_Abort(MPI_COMM_WORLD, 1);
73 }
74
75 int layer_length = GRROWS*GRCOLS;
76 if(layer_length * C_FACTOR != nprocs)
77 {
78 SpParHelper::Print("The product of <GridRows> <GridCols> <Replicas> does not match the number of processes\n");
79 MPI_Barrier(MPI_COMM_WORLD);
80 MPI_Abort(MPI_COMM_WORLD, 1);
81 }
82
83
84
85 SpDCCols<int32_t, double> splitA, splitB, controlC;
86 SpDCCols<int32_t, double> *splitC;
87 string type;
88
89
90 string fileA(argv[4]);
91 string fileB(argv[5]);
92 string fileC(argv[6]);
93
94 {
95 shared_ptr<CommGrid> layerGrid;
96 layerGrid.reset( new CommGrid(CMG.layerWorld, 0, 0) );
97 FullyDistVec<int32_t, int32_t> p(layerGrid); // permutation vector defined on layers
98
99 double t01 = MPI_Wtime();
100
101 SpDCCols<int32_t, double> *A = ReadMat<double>(fileA, CMG, true, p);
102 SpDCCols<int32_t, double> *B = ReadMat<double>(fileB, CMG, true, p);
103 SpDCCols<int32_t, double> *C = ReadMat<double>(fileC, CMG, true, p);
104
105 SplitMat(CMG, A, splitA, false);
106 SplitMat(CMG, B, splitB, true);
107 SplitMat(CMG, C, controlC, false);
108
109 if(myrank == 0) cout << "Matrices read and replicated along layers : time " << MPI_Wtime() - t01 << endl;
110
111 type = string(argv[7]);
112
113 if(type == string("outer"))
114 {
115 for(int k=0; k<ITERS; k++)
116 {
117 splitB.Transpose(); // locally "transpose" [ABAB: check correctness]
118 splitC = multiply(splitA, splitB, CMG, true, false); // outer product
119 if (controlC == *splitC)
120 SpParHelper::Print("Outer product multiplication working correctly\n");
121 else
122 SpParHelper::Print("ERROR in Outer product multiplication, go fix it!\n");
123 delete splitC;
124 }
125
126 }
127 else if(type == string("column"))
128 {
129
130 for(int k=0; k<ITERS; k++)
131 {
132 splitC = multiply(splitA, splitB, CMG, false, false);
133 if (controlC == *splitC)
134 SpParHelper::Print("Col-heap multiplication working correctly\n");
135 else
136 SpParHelper::Print("ERROR in Col-heap multiplication, go fix it!\n");
137
138 delete splitC;
139 }
140
141 }
142 else // default threaded
143 {
144 for(int k=0; k<ITERS; k++)
145 {
146 splitC = multiply(splitA, splitB, CMG, false, true);
147 if (controlC == *splitC)
148 SpParHelper::Print("Col-heap-threaded multiplication working correctly\n");
149 else
150 SpParHelper::Print("ERROR in Col-heap-threaded multiplication, go fix it!\n");
151 delete splitC;
152 }
153 }
154 }
155
156 MPI_Finalize();
157 return 0;
158 }
159
160
161