1 #ifndef BP_MAXIMAL_MATCHING_H
2 #define BP_MAXIMAL_MATCHING_H
3
4 #include "../CombBLAS.h"
5 #include <iostream>
6 #include <functional>
7 #include <algorithm>
8 #include <vector>
9 #include <limits>
10 #include "Utility.h"
11 #include "MatchingDefs.h"
12
13 #define NO_INIT 0
14 #define GREEDY 1
15 #define KARP_SIPSER 2
16 #define DMD 3
17 MTRand GlobalMT(123); // for reproducible result
18 double tTotalMaximal;
19
20 namespace combblas {
21
22 // This is not tested with CSC yet
23 // TODO: test with CSC and Setting SPA (similar to Weighted Greedy)
24 template <typename Par_DCSC_Bool, typename IT>
25 void MaximalMatching(Par_DCSC_Bool & A, Par_DCSC_Bool & AT, FullyDistVec<IT, IT>& mateRow2Col,
26 FullyDistVec<IT, IT>& mateCol2Row, FullyDistVec<IT, IT>& degColRecv, int type, bool rand=true)
27 {
28
29 typedef VertexTypeML < IT, IT> VertexType;
30 int nprocs, myrank;
31 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
32 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
33 int nthreads = 1;
34 #ifdef _OPENMP
35 #pragma omp parallel
36 {
37 nthreads = omp_get_num_threads();
38 }
39 #endif
40
41 FullyDistVec<IT, IT> degCol = degColRecv;
42
43 //unmatched row and column vertices
44 FullyDistSpVec<IT, IT> unmatchedRow(mateRow2Col, [](IT mate){return mate==-1;});
45 FullyDistSpVec<IT, IT> degColSG(A.getcommgrid(), A.getncol());
46 //FullyDistVec<IT, IT> degCol(A.getcommgrid());
47 //A.Reduce(degCol, Column, plus<IT>(), static_cast<IT>(0)); // Reduce is not multithreaded
48
49
50 FullyDistSpVec<IT, VertexType> unmatchedCol(A.getcommgrid(), A.getncol());
51 // every veretx is unmatched. keep non-isolated vertices
52 unmatchedCol = EWiseApply<VertexType>(unmatchedCol, degCol,
53 [](VertexType vtx, IT deg){return VertexType();},
54 [](VertexType vtx, IT deg){return deg>0;},
55 true, VertexType());
56
57
58 FullyDistSpVec<IT, VertexType> fringeRow(A.getcommgrid(), A.getnrow());
59 FullyDistSpVec<IT, IT> fringeRow2(A.getcommgrid(), A.getnrow());
60 FullyDistSpVec<IT, VertexType> deg1Col(A.getcommgrid(), A.getncol());
61
62
63 IT curUnmatchedCol = unmatchedCol.getnnz();
64 IT curUnmatchedRow = unmatchedRow.getnnz();
65 IT newlyMatched = 1; // ensure the first pass of the while loop
66 int iteration = 0;
67 double tStart = MPI_Wtime();
68 std::vector<std::vector<double> > timing;
69
70 #ifdef DETAIL_STATS
71 if(myrank == 0)
72 {
73 cout << "=======================================================\n";
74 cout << "@@@@@@ Number of processes: " << nprocs << endl;
75 cout << "=======================================================\n";
76 cout << "It | UMRow | UMCol | newlyMatched | Time "<< endl;
77 cout << "=======================================================\n";
78 }
79 #endif
80 MPI_Barrier(MPI_COMM_WORLD);
81
82
83 while(curUnmatchedCol !=0 && curUnmatchedRow!=0 && newlyMatched != 0 )
84 {
85 unmatchedCol.ApplyInd([](VertexType vtx, IT idx){return VertexType(idx,idx);});
86 if(type==DMD)
87 {
88 unmatchedCol = EWiseApply<VertexType>(unmatchedCol, degCol,
89 [](VertexType vtx, IT deg){return VertexType(vtx.parent,deg);},
90 [](VertexType vtx, IT deg){return true;},
91 false, VertexType());
92 }
93 else if(rand)
94 {
95 unmatchedCol.Apply([](VertexType vtx){return VertexType(vtx.parent, static_cast<IT>((GlobalMT.rand() * 9999999)+1));});
96 }
97
98 // ======================== step1: One step of BFS =========================
99 std::vector<double> times;
100 double t1 = MPI_Wtime();
101 if(type==GREEDY)
102 {
103 SpMV<Select2ndMinSR<bool, VertexType>>(A, unmatchedCol, fringeRow, false);
104 }
105 else if(type==DMD)
106 {
107 SpMV<Select2ndMinSR<bool, VertexType>>(A, unmatchedCol, fringeRow, false);
108 }
109 else //(type==KARP_SIPSER)
110 {
111 deg1Col = EWiseApply<VertexType>(unmatchedCol, degCol,
112 [](VertexType vtx, IT deg){return vtx;},
113 [](VertexType vtx, IT deg){return deg==1;},
114 false, VertexType());
115
116 if(deg1Col.getnnz()>9)
117 SpMV<Select2ndMinSR<bool, VertexType>>(A, deg1Col, fringeRow, false);
118 else
119 SpMV<Select2ndMinSR<bool, VertexType>>(A, unmatchedCol, fringeRow, false);
120 }
121 // Remove matched row vertices
122 fringeRow = EWiseApply<VertexType>(fringeRow, mateRow2Col,
123 [](VertexType vtx, IT mate){return vtx;},
124 [](VertexType vtx, IT mate){return mate==-1;},
125 false, VertexType());
126
127 if(myrank == 0){times.push_back(MPI_Wtime()-t1); t1 = MPI_Wtime();}
128 // ===========================================================================
129
130
131 // ======================== step2: Update matching =========================
132
133 fringeRow2 = EWiseApply<IT>(fringeRow, mateRow2Col,
134 [](VertexType vtx, IT mate){return vtx.parent;},
135 [](VertexType vtx, IT mate){return true;},
136 false, VertexType());
137
138 FullyDistSpVec<IT, IT> newMatchedCols = fringeRow2.Invert(A.getncol());
139 FullyDistSpVec<IT, IT> newMatchedRows = newMatchedCols.Invert(A.getnrow());
140 mateCol2Row.Set(newMatchedCols);
141 mateRow2Col.Set(newMatchedRows);
142 if(myrank == 0){times.push_back(MPI_Wtime()-t1); t1 = MPI_Wtime();}
143 // ===========================================================================
144
145
146 // =============== step3: Update degree of unmatched columns =================
147 unmatchedRow.Select(mateRow2Col, [](IT mate){return mate==-1;});
148 unmatchedCol.Select(mateCol2Row, [](IT mate){return mate==-1;});
149
150 if(type!=GREEDY)
151 {
152 // update degree
153 newMatchedRows.Apply([](IT val){return 1;}); // needed if the matrix is Boolean since the SR::multiply isn't called
154 SpMV< SelectPlusSR<bool, IT>>(AT, newMatchedRows, degColSG, false); // degree of column vertices to matched rows
155 // subtract degree of column vertices
156 degCol.EWiseApply(degColSG,
157 [](IT old_deg, IT new_deg){return old_deg-new_deg;},
158 [](IT old_deg, IT new_deg){return true;},
159 false, static_cast<IT>(0));
160 // remove isolated vertices
161 unmatchedCol = EWiseApply<VertexType>(unmatchedCol, degCol,
162 [](VertexType vtx, IT deg){return vtx;},
163 [](VertexType vtx, IT deg){return deg>0;},
164 false, VertexType());
165 }
166 if(myrank == 0){times.push_back(MPI_Wtime()-t1); t1 = MPI_Wtime();}
167 // ===========================================================================
168
169
170 ++iteration;
171 newlyMatched = newMatchedCols.getnnz();
172 times.push_back(std::accumulate(times.begin(), times.end(), 0.0));
173 timing.push_back(times);
174 #ifdef DETAIL_STATS
175 if(myrank == 0)
176 {
177 printf("%3d %10lld %10lld %10lld %18lf\n", iteration , curUnmatchedRow, curUnmatchedCol, newlyMatched, times.back());
178 }
179 #endif
180 curUnmatchedCol = unmatchedCol.getnnz();
181 curUnmatchedRow = unmatchedRow.getnnz();
182 MPI_Barrier(MPI_COMM_WORLD);
183
184 }
185
186 IT cardinality = mateRow2Col.Count([](IT mate){return mate!=-1;});
187 std::vector<double> totalTimes(timing[0].size(),0);
188 for(int i=0; i<timing.size(); i++)
189 {
190 for(int j=0; j<timing[i].size(); j++)
191 {
192 totalTimes[j] += timing[i][j];
193 }
194 }
195
196
197 if(myrank == 0)
198 {
199 #ifdef DETAIL_STATS
200 cout << "==========================================================\n";
201 cout << "\n================individual timings =======================\n";
202 cout << " SpMV Update-Match Update-UMC Total "<< endl;
203 cout << "==========================================================\n";
204 for(int i=0; i<timing.size(); i++)
205 {
206 for(int j=0; j<timing[i].size(); j++)
207 {
208 printf("%12.5lf ", timing[i][j]);
209 }
210 cout << endl;
211 }
212
213 cout << "-------------------------------------------------------\n";
214 for(int i=0; i<totalTimes.size(); i++)
215 printf("%12.5lf ", totalTimes[i]);
216 cout << endl;
217 #endif
218 std::cout << "****** maximal matching runtime ********\n";
219 std::cout << "nprocesses nthreads ncores algorithm Unmatched-Rows Cardinality Total Time***\n";
220 std::cout << nprocs << " " << nthreads << " " << nprocs * nthreads << " ";
221 if(type == DMD) std::cout << "DMD";
222 else if(type == GREEDY) std::cout << "Greedy";
223 else if(type == KARP_SIPSER) std::cout << "Karp-Sipser";
224 if(rand && (type == KARP_SIPSER || type == GREEDY) ) std::cout << "-rand";
225 std::cout << " ";
226 printf("%lld %lld %lf\n", curUnmatchedRow, cardinality, totalTimes.back());
227 std::cout << "-------------------------------------------------------\n\n";
228 }
229 //isMatching(mateCol2Row, mateRow2Col);
230 }
231
232
233
234 // Special version of the greedy algorithm (works for both CSC (multithreaded) and DCSC)
235 // That uses WeightMaxSR semiring
236 // TODO: check if this is 1/2 approx of the weighted matching (probably no)
237 // TODO: should we remove degCol?
238 // TODO: can be merged with the generalized MaximalMatching
239 template <typename Par_MAT_Double, typename IT>
WeightedGreedy(Par_MAT_Double & A,FullyDistVec<IT,IT> & mateRow2Col,FullyDistVec<IT,IT> & mateCol2Row,FullyDistVec<IT,IT> & degCol)240 void WeightedGreedy(Par_MAT_Double & A, FullyDistVec<IT, IT>& mateRow2Col,
241 FullyDistVec<IT, IT>& mateCol2Row, FullyDistVec<IT, IT>& degCol)
242 {
243
244 typedef VertexTypeML < IT, double> VertexType;
245 int nthreads=1;
246 #ifdef THREADED
247 #pragma omp parallel
248 {
249 nthreads = omp_get_num_threads();
250 }
251 #endif
252 PreAllocatedSPA<VertexType> SPA(A.seq(), nthreads*4);
253 int nprocs, myrank;
254 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
255 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
256
257 double tStart = MPI_Wtime();
258
259 //unmatched row and column vertices
260 FullyDistSpVec<IT, IT> unmatchedRow(mateRow2Col, [](IT mate){return mate==-1;});
261
262
263
264 FullyDistSpVec<IT, VertexType> unmatchedCol(A.getcommgrid(), A.getncol());
265 // every veretx is unmatched. keep non-isolated vertices
266 unmatchedCol = EWiseApply<VertexType>(unmatchedCol, degCol,
267 [](VertexType vtx, IT deg){return VertexType();},
268 [](VertexType vtx, IT deg){return deg>0;},
269 true, VertexType());
270
271
272 FullyDistSpVec<IT, VertexType> fringeRow(A.getcommgrid(), A.getnrow());
273 FullyDistSpVec<IT, IT> fringeRow2(A.getcommgrid(), A.getnrow());
274 FullyDistSpVec<IT, VertexType> fringeRow3(A.getcommgrid(), A.getnrow());
275
276 IT curUnmatchedCol = unmatchedCol.getnnz();
277 IT curUnmatchedRow = unmatchedRow.getnnz();
278 IT newlyMatched = 1; // ensure the first pass of the while loop
279 int iteration = 0;
280
281 std::vector<std::vector<double> > timing;
282
283 #ifdef DETAIL_STATS
284 if(myrank == 0)
285 {
286 cout << "=======================================================\n";
287 cout << "@@@@@@ Number of processes: " << nprocs << endl;
288 cout << "=======================================================\n";
289 cout << "It | UMRow | UMCol | newlyMatched | Time "<< endl;
290 cout << "=======================================================\n";
291 }
292 #endif
293 MPI_Barrier(MPI_COMM_WORLD);
294
295
296 while(curUnmatchedCol !=0 && curUnmatchedRow!=0 && newlyMatched != 0 )
297 {
298 // anything is fine in the second argument
299 unmatchedCol.ApplyInd([](VertexType vtx, IT idx){return VertexType(idx,idx);});
300
301
302 // ======================== step1: One step of BFS =========================
303 std::vector<double> times;
304 double t1 = MPI_Wtime();
305
306 SpMV<WeightMaxMLSR<double, VertexType>>(A, unmatchedCol, fringeRow, false, SPA);
307
308 // Remove matched row vertices
309 fringeRow = EWiseApply<VertexType>(fringeRow, mateRow2Col,
310 [](VertexType vtx, IT mate){return vtx;},
311 [](VertexType vtx, IT mate){return mate==-1;},
312 false, VertexType());
313
314 if(myrank == 0){times.push_back(MPI_Wtime()-t1); t1 = MPI_Wtime();}
315 // ===========================================================================
316
317
318 // ======================== step2: Update matching =========================
319
320 fringeRow2 = EWiseApply<IT>(fringeRow, mateRow2Col,
321 [](VertexType vtx, IT mate){return vtx.parent;},
322 [](VertexType vtx, IT mate){return true;},
323 false, VertexType());
324
325 FullyDistSpVec<IT, IT> newMatchedCols = fringeRow2.Invert(A.getncol());
326 FullyDistSpVec<IT, IT> newMatchedRows = newMatchedCols.Invert(A.getnrow());
327 mateCol2Row.Set(newMatchedCols);
328 mateRow2Col.Set(newMatchedRows);
329 if(myrank == 0){times.push_back(MPI_Wtime()-t1); t1 = MPI_Wtime();}
330 // ===========================================================================
331
332
333 // =============== step3: Update unmatched columns and rows =================
334
335 unmatchedRow.Select(mateRow2Col, [](IT mate){return mate==-1;});
336 unmatchedCol.Select(mateCol2Row, [](IT mate){return mate==-1;});
337 if(myrank == 0){times.push_back(MPI_Wtime()-t1); t1 = MPI_Wtime();}
338 // ===========================================================================
339
340
341 ++iteration;
342 newlyMatched = newMatchedCols.getnnz();
343 times.push_back(std::accumulate(times.begin(), times.end(), 0.0));
344 timing.push_back(times);
345 #ifdef DETAIL_STATS
346 if(myrank == 0)
347 {
348 printf("%3d %10lld %10lld %10lld %18lf\n", iteration , curUnmatchedRow, curUnmatchedCol, newlyMatched, times.back());
349 }
350 #endif
351 curUnmatchedCol = unmatchedCol.getnnz();
352 curUnmatchedRow = unmatchedRow.getnnz();
353 MPI_Barrier(MPI_COMM_WORLD);
354
355 }
356
357 tTotalMaximal = MPI_Wtime() - tStart;
358
359 IT cardinality = mateRow2Col.Count([](IT mate){return mate!=-1;});
360 std::vector<double> totalTimes(timing[0].size(),0);
361 for(int i=0; i<timing.size(); i++)
362 {
363 for(int j=0; j<timing[i].size(); j++)
364 {
365 totalTimes[j] += timing[i][j];
366 }
367 }
368
369
370 if(myrank == 0)
371 {
372 #ifdef DETAIL_STATS
373 cout << "==========================================================\n";
374 cout << "\n================individual timings =======================\n";
375 cout << " SpMV Update-Match Update-UMC Total "<< endl;
376 cout << "==========================================================\n";
377 for(int i=0; i<timing.size(); i++)
378 {
379 for(int j=0; j<timing[i].size(); j++)
380 {
381 printf("%12.5lf ", timing[i][j]);
382 }
383 cout << endl;
384 }
385
386 cout << "-------------------------------------------------------\n";
387 for(int i=0; i<totalTimes.size(); i++)
388 printf("%12.5lf ", totalTimes[i]);
389 cout << endl;
390 #endif
391 std::cout << "****** maximal matching runtime ********\n";
392 std::cout << "Unmatched-Rows Cardinality Total Time***\n";
393 printf("%lld %lld %lf\n", curUnmatchedRow, cardinality, totalTimes.back());
394 std::cout << "-------------------------------------------------------\n\n";
395 }
396 //isMatching(mateCol2Row, mateRow2Col);
397 }
398
399
400
401
402 template <class Par_DCSC_Bool, class IT, class NT>
isMaximalmatching(Par_DCSC_Bool & A,FullyDistVec<IT,NT> & mateRow2Col,FullyDistVec<IT,NT> & mateCol2Row)403 bool isMaximalmatching(Par_DCSC_Bool & A, FullyDistVec<IT,NT> & mateRow2Col, FullyDistVec<IT,NT> & mateCol2Row)
404 {
405 typedef VertexTypeML < IT, IT> VertexType;
406 int myrank;
407 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
408 FullyDistSpVec<IT, IT> fringeRow(A.getcommgrid(), A.getnrow());
409 FullyDistSpVec<IT, IT> fringeCol(A.getcommgrid(), A.getncol());
410 FullyDistSpVec<IT, IT> unmatchedRow(mateRow2Col, [](IT mate){return mate==-1;});
411 FullyDistSpVec<IT, IT> unmatchedCol(mateCol2Row, [](IT mate){return mate==-1;});
412 unmatchedRow.setNumToInd();
413 unmatchedCol.setNumToInd();
414
415
416 SpMV<Select2ndMinSR<bool, VertexType>>(A, unmatchedCol, fringeRow, false);
417 fringeRow = EWiseMult(fringeRow, mateRow2Col, true, (IT) -1);
418 if(fringeRow.getnnz() != 0)
419 {
420 if(myrank == 0)
421 std::cout << "Not maximal matching!!\n";
422 return false;
423 }
424
425 Par_DCSC_Bool tA = A;
426 tA.Transpose();
427 SpMV<Select2ndMinSR<bool, VertexType>>(tA, unmatchedRow, fringeCol, false);
428 fringeCol = EWiseMult(fringeCol, mateCol2Row, true, (IT) -1);
429 if(fringeCol.getnnz() != 0)
430 {
431 if(myrank == 0)
432 std::cout << "Not maximal matching**!!\n";
433 return false;
434 }
435 return true;
436 }
437
438 }
439
440 #endif
441
442