1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc --------------------------*/
6 /****************************************************************/
7 /*
8 Copyright (c) 2010-2017, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29
30 #include <mpi.h>
31
32 // These macros should be defined before stdint.h is included
33 #ifndef __STDC_CONSTANT_MACROS
34 #define __STDC_CONSTANT_MACROS
35 #endif
36 #ifndef __STDC_LIMIT_MACROS
37 #define __STDC_LIMIT_MACROS
38 #endif
39 #include <stdint.h>
40
41 #include <sys/time.h>
42 #include <iostream>
43 #include <fstream>
44 #include <string>
45 #include <sstream> // Required for stringstreams
46 #include <ctime>
47 #include <cmath>
48 #include "CombBLAS/CombBLAS.h"
49 #include "CC.h"
50 #include "WriteMCLClusters.h"
51
52 using namespace std;
53 using namespace combblas;
54
55 #define EPS 0.0001
56
57 double mcl_Abcasttime;
58 double mcl_Bbcasttime;
59 double mcl_localspgemmtime;
60 double mcl_multiwaymergetime;
61 double mcl_kselecttime;
62 double mcl_prunecolumntime;
63 double cblas_allgathertime; // for compilation (TODO: fix this dependency)
64 int64_t mcl_memory;
65 double tIO;
66
67
68
69 class Dist
70 {
71 public:
72 typedef SpDCCols < int64_t, double > DCCols;
73 typedef SpParMat < int64_t, double, DCCols > MPI_DCCols;
74 typedef FullyDistVec < int64_t, double> MPI_DenseVec;
75 };
76
77
78
79 typedef struct
80 {
81 //Input/Output file
82 string ifilename;
83 bool isInputMM;
84 int base; // only usefule for matrix market files
85
86 string ofilename;
87
88 //Preprocessing
89 int randpermute;
90 bool remove_isolated;
91
92 //inflation
93 double inflation;
94
95 //pruning
96 double prunelimit;
97 int64_t select;
98 int64_t recover_num;
99 double recover_pct;
100 int kselectVersion; // 0: adapt based on k, 1: kselect1, 2: kselect2
101
102 //HipMCL optimization
103 int phases;
104 int perProcessMem;
105 bool isDoublePrecision; // true: double, false: float
106 bool is64bInt; // true: int64_t, false: int32_t (currently for both local and global indexing)
107
108 //debugging
109 bool show;
110
111
112 }HipMCLParam;
113
114
InitParam(HipMCLParam & param)115 void InitParam(HipMCLParam & param)
116 {
117 //Input/Output file
118 param.ifilename = "";
119 param.isInputMM = false;
120 param.ofilename = "";
121 param.base = 1;
122
123 //Preprocessing
124 // mcl removes isolated vertices by default,
125 // we don't do this because it will create different ordering of vertices!
126 param.remove_isolated = false;
127 param.randpermute = 0;
128
129 //inflation
130 param.inflation = 0.0;
131
132 //pruning
133 param.prunelimit = 1.0/10000.0;
134 param.select = 1100;
135 param.recover_num = 1400;
136 param.recover_pct = .9; // we allow both 90 or .9 as input. Internally, we keep it 0.9
137 param.kselectVersion = 1;
138
139 //HipMCL optimization
140 param.phases = 1;
141 param.perProcessMem = 0;
142 param.isDoublePrecision = true;
143 param.is64bInt = true;
144
145 //debugging
146 param.show = false;
147 }
148
ShowParam(HipMCLParam & param)149 void ShowParam(HipMCLParam & param)
150 {
151 ostringstream runinfo;
152 runinfo << "\n======================================" << endl;
153 runinfo << "Running HipMCL with the parameters: " << endl;
154 runinfo << "======================================" << endl;
155 runinfo << "Input/Output file" << endl;
156 runinfo << " input filename: " << param.ifilename << endl;
157 runinfo << " input file type: " ;
158 if(param.isInputMM)
159 {
160 runinfo << " Matrix Market" << endl;
161 runinfo << " Base of the input matrix: " << param.base << endl;
162 }
163 else runinfo << " Labeled Triples format" << endl;
164 runinfo << " Output filename: " << param.ofilename << endl;
165
166
167 runinfo << "Preprocessing" << endl;
168 runinfo << " Remove isolated vertices? : ";
169 if (param.remove_isolated) runinfo << "yes";
170 else runinfo << "no" << endl;
171
172 runinfo << " Randomly permute vertices? : ";
173 if (param.randpermute) runinfo << "yes";
174 else runinfo << "no" << endl;
175
176 runinfo << "Inflation: " << param.inflation << endl;
177
178 runinfo << "Pruning" << endl;
179 runinfo << " Prunelimit: " << param.prunelimit << endl;
180 runinfo << " Recover number: " << param.recover_num << endl;
181 runinfo << " Recover percent: " << ceil(param.recover_pct*100) << endl;
182 runinfo << " Selection number: " << param.select << endl;
183 // do not expose selection option at this moment
184 //runinfo << "Selection algorithm: ";
185 //if(kselectVersion==1) runinfo << "tournament select" << endl;
186 //else if(kselectVersion==2) runinfo << "quickselect" << endl;
187 //else runinfo << "adaptive based on k" << endl;
188
189
190
191 runinfo << "HipMCL optimization" << endl;
192 runinfo << " Number of phases: " << param.phases << endl;
193 runinfo << " Memory avilable per process: ";
194 if(param.perProcessMem>0) runinfo << param.perProcessMem << "GB" << endl;
195 else runinfo << "not provided" << endl;
196 if(param.isDoublePrecision) runinfo << "Using double precision floating point" << endl;
197 else runinfo << "Using single precision floating point" << endl;
198 if(param.is64bInt ) runinfo << "Using 64 bit indexing" << endl;
199 else runinfo << "Using 32 bit indexing" << endl;
200
201 runinfo << "Debugging" << endl;
202 runinfo << " Show matrices after major steps? : ";
203 if (param.show) runinfo << "yes";
204 else runinfo << "no" << endl;
205 runinfo << "======================================" << endl;
206 SpParHelper::Print(runinfo.str());
207 }
208
ProcessParam(int argc,char * argv[],HipMCLParam & param)209 void ProcessParam(int argc, char* argv[], HipMCLParam & param)
210 {
211 for (int i = 1; i < argc; i++)
212 {
213 if (strcmp(argv[i],"-M")==0){
214 param.ifilename = string(argv[i+1]);
215 }
216 else if (strcmp(argv[i],"--matrix-market")==0){
217 param.isInputMM = true;
218 }
219 else if (strcmp(argv[i],"-o")==0){
220 param.ofilename = string(argv[i+1]);
221 }
222 else if (strcmp(argv[i],"--show")==0){
223 param.show = true;
224 }
225 else if (strcmp(argv[i],"--remove-isolated")==0){
226 param.remove_isolated = true;
227 }
228 else if (strcmp(argv[i],"--tournament-select")==0){
229 param.kselectVersion = 1;
230 }
231 else if (strcmp(argv[i],"--quick-select")==0){
232 param.kselectVersion = 2;
233
234 }
235 else if (strcmp(argv[i],"-I")==0){
236 param.inflation = atof(argv[i + 1]);
237
238 } else if (strcmp(argv[i],"-p")==0) {
239 param.prunelimit = atof(argv[i + 1]);
240
241 } else if (strcmp(argv[i],"-S")==0) {
242 param.select = atoi(argv[i + 1]);
243
244 } else if (strcmp(argv[i],"-R")==0) {
245 param.recover_num = atoi(argv[i + 1]);
246
247 } else if (strcmp(argv[i],"-pct")==0)
248 {
249 param.recover_pct = atof(argv[i + 1]);
250 if(param.recover_pct>1) param.recover_pct/=100.00;
251 } else if (strcmp(argv[i],"-base")==0) {
252 param.base = atoi(argv[i + 1]);
253 }
254 else if (strcmp(argv[i],"-rand")==0) {
255 param.randpermute = atoi(argv[i + 1]);
256 }
257 else if (strcmp(argv[i],"-phases")==0) {
258 param.phases = atoi(argv[i + 1]);
259 }
260 else if (strcmp(argv[i],"-per-process-mem")==0) {
261 param.perProcessMem = atoi(argv[i + 1]);
262 }
263 else if (strcmp(argv[i],"--single-precision")==0) {
264 param.isDoublePrecision = false;
265 }
266 else if (strcmp(argv[i],"--32bit-index")==0) {
267 param.is64bInt = false;
268 }
269 }
270
271 if(param.ofilename=="") // construct output file name if it is not provided
272 {
273 param.ofilename = param.ifilename + ".hipmcl";
274 }
275
276 }
277
278
ShowOptions()279 void ShowOptions()
280 {
281 ostringstream runinfo;
282
283 runinfo << "Usage: ./hipmcl -M <input filename> -I <inlfation> (required)" << endl;
284
285 runinfo << "======================================" << endl;
286 runinfo << " Detail parameter options " << endl;
287 runinfo << "======================================" << endl;
288
289
290
291 runinfo << "Input/Output file" << endl;
292 runinfo << " -M <input file name (labeled triples format)> (mandatory)" << endl;
293 runinfo << " --matrix-market : if provided, the input file is in the matrix market format (default: the file is in labeled triples format)" << endl;
294 runinfo << " -base <index of the first vertex in the matrix market file, 0|1> (default: 1) " << endl;
295 runinfo << " -o <output filename> (default: input_file_name.hipmcl )" << endl;
296
297 runinfo << "Inflation" << endl;
298 runinfo << "-I <inflation> (mandatory)\n";
299
300 runinfo << "Preprocessing" << endl;
301 runinfo << " -rand <randomly permute vertices> (default:0)\n";
302 runinfo << " --remove-isolated : if provided, remove isolated vertices (default: don't remove isolated vertices)\n";
303
304
305 runinfo << "Pruning" << endl;
306 runinfo << " -p <cutoff> (default: 1/10000)\n";
307 runinfo << " -R <recovery number> (default: 1400)\n";
308 runinfo << " -pct <recovery pct> (default: 90)\n";
309 runinfo << " -S <selection number> (default: 1100)\n";
310
311
312 runinfo << "HipMCL optimization" << endl;
313 runinfo << " -phases <number of phases> (default:1)\n";
314 runinfo << " -per-process-mem <memory (GB) available per process> (default:0, number of phases is not estimated)\n";
315 runinfo << " --single-precision (if not provided, use double precision floating point numbers)\n" << endl;
316 runinfo << " --32bit-index (if not provided, use 64 bit indexing for vertex ids)\n" << endl;
317
318 runinfo << "Debugging" << endl;
319 runinfo << " --show: show information about matrices after major steps (default: do not show matrices)" << endl;
320
321
322
323 runinfo << "======================================" << endl;
324 runinfo << " Few examples " << endl;
325 runinfo << "======================================" << endl;
326 runinfo << "Example with with a graph in labeled triples format on a laptop with 8GB memory and 8 cores:\nexport OMP_NUM_THREADS=8\nbin/hipmcl -M data/sevenvertexgraph.txt -I 2 -per-process-mem 8" << endl;
327 runinfo << "Same as above with 4 processes and 2 theaded per process cores:\nexport OMP_NUM_THREADS=2\nmpirun -np 4 bin/hipmcl -M data/sevenvertexgraph.txt -I 2 -per-process-mem 2" << endl;
328 runinfo << "Example with a graph in matrix market format:\nbin/hipmcl -M data/sevenvertex.mtx --matrix-market -base 1 -I 2 -per-process-mem 8" << endl;
329
330 runinfo << "Example on the NERSC/Edison system with 16 nodes and 24 threads per node: \nsrun -N 16 -n 16 -c 24 bin/hipmcl -M data/hep-th.mtx --matrix-market -base 1 -per-process-mem 64 -o hep-th.hipmcl" << endl;
331 SpParHelper::Print(runinfo.str());
332 }
333
334
335 // base: base of items
336 // clusters are always numbered 0-based
337 template <typename IT, typename NT, typename DER>
Interpret(SpParMat<IT,NT,DER> & A)338 FullyDistVec<IT, IT> Interpret(SpParMat<IT,NT,DER> & A)
339 {
340 IT nCC;
341 // A is a directed graph
342 // symmetricize A
343
344 SpParMat<IT,NT,DER> AT = A;
345 AT.Transpose();
346 A += AT;
347 FullyDistVec<IT, IT> cclabels = CC(A, nCC);
348 return cclabels;
349 }
350
351
352 template <typename IT, typename NT, typename DER>
MakeColStochastic(SpParMat<IT,NT,DER> & A)353 void MakeColStochastic(SpParMat<IT,NT,DER> & A)
354 {
355 FullyDistVec<IT, NT> colsums = A.Reduce(Column, plus<NT>(), 0.0);
356 colsums.Apply(safemultinv<NT>());
357 A.DimApply(Column, colsums, multiplies<NT>()); // scale each "Column" with the given vector
358 }
359
360 template <typename IT, typename NT, typename DER>
361 NT Chaos(SpParMat<IT,NT,DER> & A)
362 {
363 // sums of squares of columns
364 FullyDistVec<IT, NT> colssqs = A.Reduce(Column, plus<NT>(), 0.0, bind2nd(exponentiate(), 2));
365 // Matrix entries are non-negative, so max() can use zero as identity
366 FullyDistVec<IT, NT> colmaxs = A.Reduce(Column, maximum<NT>(), 0.0);
367 colmaxs -= colssqs;
368
369 // multiplu by number of nonzeros in each column
__anon443ce3dc0202(NT val)370 FullyDistVec<IT, NT> nnzPerColumn = A.Reduce(Column, plus<NT>(), 0.0, [](NT val){return 1.0;});
371 colmaxs.EWiseApply(nnzPerColumn, multiplies<NT>());
372
373 return colmaxs.Reduce(maximum<NT>(), 0.0);
374 }
375
376 template <typename IT, typename NT, typename DER>
Inflate(SpParMat<IT,NT,DER> & A,double power)377 void Inflate(SpParMat<IT,NT,DER> & A, double power)
378 {
379 A.Apply(bind2nd(exponentiate(), power));
380 }
381
382 // default adjustloop setting
383 // 1. Remove loops
384 // 2. set loops to max of all arc weights
385 template <typename IT, typename NT, typename DER>
AdjustLoops(SpParMat<IT,NT,DER> & A)386 void AdjustLoops(SpParMat<IT,NT,DER> & A)
387 {
388
389 A.RemoveLoops();
390 FullyDistVec<IT, NT> colmaxs = A.Reduce(Column, maximum<NT>(), numeric_limits<NT>::min());
391 A.Apply([](NT val){return val==numeric_limits<NT>::min() ? 1.0 : val;}); // for isolated vertices
392 A.AddLoops(colmaxs);
393 ostringstream outs;
394 outs << "Adjusting loops" << endl;
395 SpParHelper::Print(outs.str());
396 }
397
398 template <typename IT, typename NT, typename DER>
RemoveIsolated(SpParMat<IT,NT,DER> & A,HipMCLParam & param)399 void RemoveIsolated(SpParMat<IT,NT,DER> & A, HipMCLParam & param)
400 {
401 ostringstream outs;
402 FullyDistVec<IT, NT> ColSums = A.Reduce(Column, plus<NT>(), 0.0);
403 FullyDistVec<IT, IT> nonisov = ColSums.FindInds(bind2nd(greater<NT>(), 0));
404 IT numIsolated = A.getnrow() - nonisov.TotalLength();
405 outs << "Number of isolated vertices: " << numIsolated << endl;
406 SpParHelper::Print(outs.str());
407
408 A(nonisov, nonisov, true);
409 SpParHelper::Print("Removed isolated vertices.\n");
410 if(param.show)
411 {
412 A.PrintInfo();
413 }
414
415 }
416
417 //TODO: handle reordered cluster ids
418 template <typename IT, typename NT, typename DER>
RandPermute(SpParMat<IT,NT,DER> & A,HipMCLParam & param)419 void RandPermute(SpParMat<IT,NT,DER> & A, HipMCLParam & param)
420 {
421 // randomly permute for load balance
422 if(A.getnrow() == A.getncol())
423 {
424 FullyDistVec<IT, IT> p( A.getcommgrid());
425 p.iota(A.getnrow(), 0);
426 p.RandPerm();
427 (A)(p,p,true);// in-place permute to save memory
428 SpParHelper::Print("Applied symmetric permutation.\n");
429 }
430 else
431 {
432 SpParHelper::Print("Rectangular matrix: Can not apply symmetric permutation.\n");
433 }
434 }
435
436 template <typename IT, typename NT, typename DER>
HipMCL(SpParMat<IT,NT,DER> & A,HipMCLParam & param)437 FullyDistVec<IT, IT> HipMCL(SpParMat<IT,NT,DER> & A, HipMCLParam & param)
438 {
439 if(param.remove_isolated)
440 RemoveIsolated(A, param);
441
442 if(param.randpermute)
443 RandPermute(A, param);
444
445 // Adjust self loops
446 AdjustLoops(A);
447
448 // Make stochastic
449 MakeColStochastic(A);
450 SpParHelper::Print("Made stochastic\n");
451
452 if(param.show)
453 {
454 A.PrintInfo();
455 }
456
457
458 // chaos doesn't make sense for non-stochastic matrices
459 // it is in the range {0,1} for stochastic matrices
460 NT chaos = 1;
461 int it=1;
462 double tInflate = 0;
463 double tExpand = 0;
464 typedef PlusTimesSRing<NT, NT> PTFF;
465 // while there is an epsilon improvement
466 while( chaos > EPS)
467 {
468 double t1 = MPI_Wtime();
469 //A.Square<PTFF>() ; // expand
470 A = MemEfficientSpGEMM<PTFF, NT, DER>(A, A, param.phases, param.prunelimit, (IT)param.select, (IT)param.recover_num, param.recover_pct, param.kselectVersion, param.perProcessMem);
471
472 MakeColStochastic(A);
473 tExpand += (MPI_Wtime() - t1);
474
475 if(param.show)
476 {
477 SpParHelper::Print("After expansion\n");
478 A.PrintInfo();
479 }
480 chaos = Chaos(A);
481
482 double tInflate1 = MPI_Wtime();
483 Inflate(A, param.inflation);
484 MakeColStochastic(A);
485 tInflate += (MPI_Wtime() - tInflate1);
486
487 if(param.show)
488 {
489 SpParHelper::Print("After inflation\n");
490 A.PrintInfo();
491 }
492
493
494
495 double newbalance = A.LoadImbalance();
496 double t3=MPI_Wtime();
497 stringstream s;
498 s << "Iteration# " << setw(3) << it << " : " << " chaos: " << setprecision(3) << chaos << " load-balance: "<< newbalance << " Time: " << (t3-t1) << endl;
499 SpParHelper::Print(s.str());
500 it++;
501
502
503
504 }
505
506
507 double tcc1 = MPI_Wtime();
508 // bool does not work because A.AddLoops(1) can not create a fullydist vector with Bool?
509 // write a promote train for float
510 SpParMat<IT,double, SpDCCols < IT, double >> ADouble = A;
511 FullyDistVec<IT, IT> cclabels = Interpret(ADouble);
512 double tcc = MPI_Wtime() - tcc1;
513
514
515 #ifdef TIMING
516 int myrank;
517 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
518 if(myrank==0)
519 {
520 cout << "================detailed timing==================" << endl;
521
522 cout << "Expansion: " << mcl_Abcasttime + mcl_Bbcasttime + mcl_localspgemmtime + mcl_multiwaymergetime << endl;
523 cout << " Abcast= " << mcl_Abcasttime << endl;
524 cout << " Bbcast= " << mcl_Bbcasttime << endl;
525 cout << " localspgemm= " << mcl_localspgemmtime << endl;
526 cout << " multiwaymergetime= "<< mcl_multiwaymergetime << endl;
527 cout << "Prune: " << mcl_kselecttime + mcl_prunecolumntime << endl;
528 cout << " kselect= " << mcl_kselecttime << endl;
529 cout << " prunecolumn= " << mcl_prunecolumntime << endl;
530 cout << "Inflation " << tInflate << endl;
531 cout << "Component: " << tcc << endl;
532 cout << "File I/O: " << tIO << endl;
533 cout << "=================================================" << endl;
534 }
535
536 #endif
537
538 return cclabels;
539
540
541 }
542
543 template <typename IT, typename NT, typename DER>
Symmetricize(SpParMat<IT,NT,DER> & A)544 void Symmetricize(SpParMat<IT,NT,DER> & A)
545 {
546 SpParMat<IT,NT,DER> AT = A;
547 AT.Transpose();
548 if(!(AT == A))
549 {
550 SpParHelper::Print("Symmatricizing an unsymmetric input matrix.\n");
551 A += AT;
552 }
553 }
554
555 template <typename GIT, typename LIT, typename NT>
MainBody(HipMCLParam & param)556 void MainBody(HipMCLParam & param)
557 {
558 SpParMat<GIT,NT, SpDCCols < LIT, NT >> A(MPI_COMM_WORLD); // construct object
559 FullyDistVec<GIT, array<char, MAXVERTNAME> > vtxLabels(A.getcommgrid());
560
561 // read file
562
563 SpParHelper::Print("Reading input file......\n");
564
565 double tIO1 = MPI_Wtime();
566 if(param.isInputMM)
567 A.ParallelReadMM(param.ifilename, param.base, maximum<NT>()); // if base=0, then it is implicitly converted to Boolean false
568 else // default labeled triples format
569 vtxLabels = A.ReadGeneralizedTuples(param.ifilename, maximum<NT>());
570
571 tIO = MPI_Wtime() - tIO1;
572 ostringstream outs;
573 outs << " : took " << tIO << " seconds" << endl;
574 SpParHelper::Print(outs.str());
575 // Symmetricize the matrix only if needed
576 Symmetricize(A);
577
578 double balance = A.LoadImbalance();
579
580 outs.str("");
581 outs.clear();
582 if(!param.is64bInt) // don't compute nnz because it may overflow
583 {
584 GIT nnz = A.getnnz();
585 GIT nv = A.getnrow();
586 outs << "Number of vertices: " << nv << " number of edges: "<< nnz << endl;
587 }
588 outs << "Load balance: " << balance << endl;
589 SpParHelper::Print(outs.str());
590
591 if(param.show)
592 {
593 A.PrintInfo();
594 }
595
596
597
598 #ifdef TIMING
599 mcl_Abcasttime = 0;
600 mcl_Bbcasttime = 0;
601 mcl_localspgemmtime = 0;
602 mcl_multiwaymergetime = 0;
603 mcl_kselecttime = 0;
604 mcl_prunecolumntime = 0;
605 #endif
606
607
608
609 double tstart = MPI_Wtime();
610
611 // Run HipMCL
612 FullyDistVec<GIT, GIT> culstLabels = HipMCL(A, param);
613 //culstLabels.ParallelWrite(param.ofilename, param.base); // clusters are always numbered 0-based
614
615 if(param.isInputMM)
616 WriteMCLClusters(param.ofilename, culstLabels, param.base);
617 else
618 WriteMCLClusters(param.ofilename, culstLabels, vtxLabels);
619
620
621
622 GIT nclusters = culstLabels.Reduce(maximum<GIT>(), (GIT) 0 ) ;
623 nclusters ++; // because of zero based indexing for clusters
624
625 double tend = MPI_Wtime();
626 stringstream s2;
627 s2 << "Number of clusters: " << nclusters << endl;
628 s2 << "Total time: " << (tend-tstart) << endl;
629 s2 << "=================================================\n" << endl ;
630 SpParHelper::Print(s2.str());
631
632 }
633
634
main(int argc,char * argv[])635 int main(int argc, char* argv[])
636 {
637 int provided;
638 MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
639 if (provided < MPI_THREAD_SERIALIZED)
640 {
641 printf("ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
642 MPI_Abort(MPI_COMM_WORLD, 1);
643 }
644
645 int nthreads = 1;
646 #ifdef THREADED
647 #pragma omp parallel
648 {
649 nthreads = omp_get_num_threads();
650 }
651 #endif
652
653 int nprocs, myrank;
654 MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
655 MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
656
657 HipMCLParam param;
658
659 // initialize parameters to default values
660 InitParam(param);
661
662 // Populate parameters from command line options
663 ProcessParam(argc, argv, param);
664
665 // check if mandatory arguments are provided
666 if(param.ifilename=="" || param.inflation == 0.0)
667 {
668 SpParHelper::Print("Required options are missing.\n");
669 ShowOptions();
670 MPI_Finalize();
671 return -1;
672 }
673
674
675 if(myrank == 0)
676 {
677 cout << "\nProcess Grid used (pr x pc x threads): " << sqrt(nprocs) << " x " << sqrt(nprocs) << " x " << nthreads << endl;
678 }
679
680
681 // show parameters used to run HipMCL
682 ShowParam(param);
683
684 if(param.perProcessMem==0)
685 {
686 if(myrank == 0)
687 {
688 cout << "******** Number of phases will not be estimated as -per-process-mem option is supplied. It is highly recommended that you provide -per-process-mem option for large-scale runs. *********** " << endl;
689 }
690 }
691
692 {
693 if(param.isDoublePrecision)
694 if(param.is64bInt) // default case
695 MainBody<int64_t, int64_t, double>(param);
696 else
697 MainBody<int32_t, int32_t, double>(param);
698 else if(param.is64bInt)
699 MainBody<int64_t, int64_t, float>(param);
700 else
701 MainBody<int32_t, int32_t, float>(param); // memory effecient case
702 }
703
704
705
706 // make sure the destructors for all objects are called before MPI::Finalize()
707 MPI_Finalize();
708 return 0;
709 }
710