1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc  --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 #include <mpi.h>
31 
32 // These macros should be defined before stdint.h is included
33 #ifndef __STDC_CONSTANT_MACROS
34 #define __STDC_CONSTANT_MACROS
35 #endif
36 #ifndef __STDC_LIMIT_MACROS
37 #define __STDC_LIMIT_MACROS
38 #endif
39 #include <stdint.h>
40 
41 #include <sys/time.h>
42 #include <iostream>
43 #include <fstream>
44 #include <string>
45 #include <sstream>  // Required for stringstreams
46 #include <ctime>
47 #include <cmath>
48 #include "CombBLAS/CombBLAS.h"
49 #include "CC.h"
50 #include "WriteMCLClusters.h"
51 
52 using namespace std;
53 using namespace combblas;
54 
55 #define EPS 0.0001
56 
57 double mcl_Abcasttime;
58 double mcl_Bbcasttime;
59 double mcl_localspgemmtime;
60 double mcl_multiwaymergetime;
61 double mcl_kselecttime;
62 double mcl_prunecolumntime;
63 double cblas_allgathertime;	// for compilation (TODO: fix this dependency)
64 int64_t mcl_memory;
65 double tIO;
66 
67 
68 
69 class Dist
70 {
71     public:
72     typedef SpDCCols < int64_t, double > DCCols;
73     typedef SpParMat < int64_t, double, DCCols > MPI_DCCols;
74     typedef FullyDistVec < int64_t, double> MPI_DenseVec;
75 };
76 
77 
78 
79 typedef struct
80 {
81     //Input/Output file
82     string ifilename;
83     bool isInputMM;
84     int base; // only usefule for matrix market files
85 
86     string ofilename;
87 
88     //Preprocessing
89     int randpermute;
90     bool remove_isolated;
91 
92     //inflation
93     double inflation;
94 
95     //pruning
96     double prunelimit;
97     int64_t select;
98     int64_t recover_num;
99     double recover_pct;
100     int kselectVersion; // 0: adapt based on k, 1: kselect1, 2: kselect2
101 
102     //HipMCL optimization
103     int phases;
104     int perProcessMem;
105     bool isDoublePrecision; // true: double, false: float
106     bool is64bInt; // true: int64_t, false: int32_t (currently for both local and global indexing)
107 
108     //debugging
109     bool show;
110 
111 
112 }HipMCLParam;
113 
114 
InitParam(HipMCLParam & param)115 void InitParam(HipMCLParam & param)
116 {
117     //Input/Output file
118     param.ifilename = "";
119     param.isInputMM = false;
120     param.ofilename = "";
121     param.base = 1;
122 
123     //Preprocessing
124     // mcl removes isolated vertices by default,
125     // we don't do this because it will create different ordering of vertices!
126     param.remove_isolated = false;
127     param.randpermute = 0;
128 
129     //inflation
130     param.inflation = 0.0;
131 
132     //pruning
133     param.prunelimit = 1.0/10000.0;
134     param.select = 1100;
135     param.recover_num = 1400;
136     param.recover_pct = .9; // we allow both 90 or .9 as input. Internally, we keep it 0.9
137     param.kselectVersion = 1;
138 
139     //HipMCL optimization
140     param.phases = 1;
141     param.perProcessMem = 0;
142     param.isDoublePrecision = true;
143     param.is64bInt = true;
144 
145     //debugging
146     param.show = false;
147 }
148 
ShowParam(HipMCLParam & param)149 void ShowParam(HipMCLParam & param)
150 {
151     ostringstream runinfo;
152     runinfo << "\n======================================" << endl;
153     runinfo << "Running HipMCL with the parameters: " << endl;
154     runinfo << "======================================" << endl;
155     runinfo << "Input/Output file" << endl;
156     runinfo << "    input filename: " << param.ifilename << endl;
157     runinfo << "    input file type: " ;
158     if(param.isInputMM)
159     {
160         runinfo << " Matrix Market" << endl;
161         runinfo << "    Base of the input matrix: " << param.base << endl;
162     }
163     else runinfo << " Labeled Triples format" << endl;
164     runinfo << "    Output filename: " << param.ofilename << endl;
165 
166 
167     runinfo << "Preprocessing" << endl;
168     runinfo << "    Remove isolated vertices? : ";
169     if (param.remove_isolated) runinfo << "yes";
170     else runinfo << "no" << endl;
171 
172     runinfo << "    Randomly permute vertices? : ";
173     if (param.randpermute) runinfo << "yes";
174     else runinfo << "no" << endl;
175 
176     runinfo << "Inflation: " << param.inflation << endl;
177 
178     runinfo << "Pruning" << endl;
179     runinfo << "    Prunelimit: " << param.prunelimit << endl;
180     runinfo << "    Recover number: " << param.recover_num << endl;
181     runinfo << "    Recover percent: " << ceil(param.recover_pct*100) << endl;
182     runinfo << "    Selection number: " << param.select << endl;
183     // do not expose selection option at this moment
184     //runinfo << "Selection algorithm: ";
185     //if(kselectVersion==1) runinfo << "tournament select" << endl;
186     //else if(kselectVersion==2) runinfo << "quickselect" << endl;
187     //else runinfo << "adaptive based on k" << endl;
188 
189 
190 
191     runinfo << "HipMCL optimization" << endl;
192     runinfo << "    Number of phases: " << param.phases << endl;
193     runinfo << "    Memory avilable per process: ";
194     if(param.perProcessMem>0) runinfo << param.perProcessMem << "GB" << endl;
195     else runinfo << "not provided" << endl;
196     if(param.isDoublePrecision) runinfo << "Using double precision floating point" << endl;
197     else runinfo << "Using single precision floating point" << endl;
198     if(param.is64bInt ) runinfo << "Using 64 bit indexing" << endl;
199     else runinfo << "Using 32 bit indexing" << endl;
200 
201     runinfo << "Debugging" << endl;
202     runinfo << "    Show matrices after major steps? : ";
203     if (param.show) runinfo << "yes";
204     else runinfo << "no" << endl;
205     runinfo << "======================================" << endl;
206     SpParHelper::Print(runinfo.str());
207 }
208 
ProcessParam(int argc,char * argv[],HipMCLParam & param)209 void ProcessParam(int argc, char* argv[], HipMCLParam & param)
210 {
211     for (int i = 1; i < argc; i++)
212     {
213         if (strcmp(argv[i],"-M")==0){
214             param.ifilename = string(argv[i+1]);
215         }
216         else if (strcmp(argv[i],"--matrix-market")==0){
217             param.isInputMM = true;
218         }
219         else if (strcmp(argv[i],"-o")==0){
220             param.ofilename = string(argv[i+1]);
221         }
222         else if (strcmp(argv[i],"--show")==0){
223             param.show = true;
224         }
225         else if (strcmp(argv[i],"--remove-isolated")==0){
226             param.remove_isolated = true;
227         }
228         else if (strcmp(argv[i],"--tournament-select")==0){
229             param.kselectVersion = 1;
230         }
231         else if (strcmp(argv[i],"--quick-select")==0){
232             param.kselectVersion = 2;
233 
234         }
235         else if (strcmp(argv[i],"-I")==0){
236             param.inflation = atof(argv[i + 1]);
237 
238         } else if (strcmp(argv[i],"-p")==0) {
239             param.prunelimit = atof(argv[i + 1]);
240 
241         } else if (strcmp(argv[i],"-S")==0) {
242             param.select = atoi(argv[i + 1]);
243 
244         } else if (strcmp(argv[i],"-R")==0) {
245             param.recover_num = atoi(argv[i + 1]);
246 
247         } else if (strcmp(argv[i],"-pct")==0)
248         {
249             param.recover_pct = atof(argv[i + 1]);
250             if(param.recover_pct>1) param.recover_pct/=100.00;
251         } else if (strcmp(argv[i],"-base")==0) {
252             param.base = atoi(argv[i + 1]);
253         }
254         else if (strcmp(argv[i],"-rand")==0) {
255             param.randpermute = atoi(argv[i + 1]);
256         }
257         else if (strcmp(argv[i],"-phases")==0) {
258             param.phases = atoi(argv[i + 1]);
259         }
260         else if (strcmp(argv[i],"-per-process-mem")==0) {
261             param.perProcessMem = atoi(argv[i + 1]);
262         }
263         else if (strcmp(argv[i],"--single-precision")==0) {
264             param.isDoublePrecision = false;
265         }
266         else if (strcmp(argv[i],"--32bit-index")==0) {
267             param.is64bInt = false;
268         }
269     }
270 
271     if(param.ofilename=="") // construct output file name if it is not provided
272     {
273         param.ofilename = param.ifilename + ".hipmcl";
274     }
275 
276 }
277 
278 
ShowOptions()279 void ShowOptions()
280 {
281     ostringstream runinfo;
282 
283     runinfo << "Usage: ./hipmcl -M <input filename> -I <inlfation> (required)" << endl;
284 
285     runinfo << "======================================" << endl;
286     runinfo << "     Detail parameter options    " << endl;
287     runinfo << "======================================" << endl;
288 
289 
290 
291     runinfo << "Input/Output file" << endl;
292     runinfo << "    -M <input file name (labeled triples format)> (mandatory)" << endl;
293     runinfo << "    --matrix-market : if provided, the input file is in the matrix market format (default: the file is in labeled triples format)" << endl;
294     runinfo << "    -base <index of the first vertex in the matrix market file, 0|1> (default: 1) " << endl;
295     runinfo << "    -o <output filename> (default: input_file_name.hipmcl )" << endl;
296 
297     runinfo << "Inflation" << endl;
298     runinfo << "-I <inflation> (mandatory)\n";
299 
300     runinfo << "Preprocessing" << endl;
301     runinfo << "    -rand <randomly permute vertices> (default:0)\n";
302     runinfo << "    --remove-isolated : if provided, remove isolated vertices (default: don't remove isolated vertices)\n";
303 
304 
305     runinfo << "Pruning" << endl;
306     runinfo << "    -p <cutoff> (default: 1/10000)\n";
307     runinfo << "    -R <recovery number> (default: 1400)\n";
308     runinfo << "    -pct <recovery pct> (default: 90)\n";
309     runinfo << "    -S <selection number> (default: 1100)\n";
310 
311 
312     runinfo << "HipMCL optimization" << endl;
313     runinfo << "    -phases <number of phases> (default:1)\n";
314     runinfo << "    -per-process-mem <memory (GB) available per process> (default:0, number of phases is not estimated)\n";
315     runinfo << "    --single-precision (if not provided, use double precision floating point numbers)\n" << endl;
316     runinfo << "    --32bit-index (if not provided, use 64 bit indexing for vertex ids)\n" << endl;
317 
318     runinfo << "Debugging" << endl;
319     runinfo << "    --show: show information about matrices after major steps (default: do not show matrices)" << endl;
320 
321 
322 
323     runinfo << "======================================" << endl;
324     runinfo << "     Few examples    " << endl;
325     runinfo << "======================================" << endl;
326     runinfo << "Example with with a graph in labeled triples format on a laptop with 8GB memory and 8 cores:\nexport OMP_NUM_THREADS=8\nbin/hipmcl -M data/sevenvertexgraph.txt -I 2 -per-process-mem 8" << endl;
327     runinfo << "Same as above with 4 processes and 2 theaded per process cores:\nexport OMP_NUM_THREADS=2\nmpirun -np 4 bin/hipmcl -M data/sevenvertexgraph.txt -I 2 -per-process-mem 2" << endl;
328     runinfo << "Example with a graph in matrix market format:\nbin/hipmcl -M data/sevenvertex.mtx --matrix-market -base 1 -I 2 -per-process-mem 8" << endl;
329 
330     runinfo << "Example on the NERSC/Edison system with 16 nodes and 24 threads per node: \nsrun -N 16 -n 16 -c 24  bin/hipmcl -M data/hep-th.mtx --matrix-market -base 1 -per-process-mem 64 -o hep-th.hipmcl" << endl;
331     SpParHelper::Print(runinfo.str());
332 }
333 
334 
335 // base: base of items
336 // clusters are always numbered 0-based
337 template <typename IT, typename NT, typename DER>
Interpret(SpParMat<IT,NT,DER> & A)338 FullyDistVec<IT, IT> Interpret(SpParMat<IT,NT,DER> & A)
339 {
340     IT nCC;
341     // A is a directed graph
342     // symmetricize A
343 
344     SpParMat<IT,NT,DER> AT = A;
345     AT.Transpose();
346     A += AT;
347     FullyDistVec<IT, IT> cclabels = CC(A, nCC);
348     return cclabels;
349 }
350 
351 
352 template <typename IT, typename NT, typename DER>
MakeColStochastic(SpParMat<IT,NT,DER> & A)353 void MakeColStochastic(SpParMat<IT,NT,DER> & A)
354 {
355     FullyDistVec<IT, NT> colsums = A.Reduce(Column, plus<NT>(), 0.0);
356     colsums.Apply(safemultinv<NT>());
357     A.DimApply(Column, colsums, multiplies<NT>());	// scale each "Column" with the given vector
358 }
359 
360 template <typename IT, typename NT, typename DER>
361 NT Chaos(SpParMat<IT,NT,DER> & A)
362 {
363     // sums of squares of columns
364     FullyDistVec<IT, NT> colssqs = A.Reduce(Column, plus<NT>(), 0.0, bind2nd(exponentiate(), 2));
365     // Matrix entries are non-negative, so max() can use zero as identity
366     FullyDistVec<IT, NT> colmaxs = A.Reduce(Column, maximum<NT>(), 0.0);
367     colmaxs -= colssqs;
368 
369     // multiplu by number of nonzeros in each column
__anon443ce3dc0202(NT val)370     FullyDistVec<IT, NT> nnzPerColumn = A.Reduce(Column, plus<NT>(), 0.0, [](NT val){return 1.0;});
371     colmaxs.EWiseApply(nnzPerColumn, multiplies<NT>());
372 
373     return colmaxs.Reduce(maximum<NT>(), 0.0);
374 }
375 
376 template <typename IT, typename NT, typename DER>
Inflate(SpParMat<IT,NT,DER> & A,double power)377 void Inflate(SpParMat<IT,NT,DER> & A, double power)
378 {
379     A.Apply(bind2nd(exponentiate(), power));
380 }
381 
382 // default adjustloop setting
383 // 1. Remove loops
384 // 2. set loops to max of all arc weights
385 template <typename IT, typename NT, typename DER>
AdjustLoops(SpParMat<IT,NT,DER> & A)386 void AdjustLoops(SpParMat<IT,NT,DER> & A)
387 {
388 
389     A.RemoveLoops();
390     FullyDistVec<IT, NT> colmaxs = A.Reduce(Column, maximum<NT>(), numeric_limits<NT>::min());
391     A.Apply([](NT val){return val==numeric_limits<NT>::min() ? 1.0 : val;}); // for isolated vertices
392     A.AddLoops(colmaxs);
393     ostringstream outs;
394     outs << "Adjusting loops" << endl;
395     SpParHelper::Print(outs.str());
396 }
397 
398 template <typename IT, typename NT, typename DER>
RemoveIsolated(SpParMat<IT,NT,DER> & A,HipMCLParam & param)399 void RemoveIsolated(SpParMat<IT,NT,DER> & A, HipMCLParam & param)
400 {
401     ostringstream outs;
402     FullyDistVec<IT, NT> ColSums = A.Reduce(Column, plus<NT>(), 0.0);
403     FullyDistVec<IT, IT> nonisov = ColSums.FindInds(bind2nd(greater<NT>(), 0));
404     IT numIsolated = A.getnrow() - nonisov.TotalLength();
405     outs << "Number of isolated vertices: " << numIsolated << endl;
406     SpParHelper::Print(outs.str());
407 
408     A(nonisov, nonisov, true);
409     SpParHelper::Print("Removed isolated vertices.\n");
410     if(param.show)
411     {
412         A.PrintInfo();
413     }
414 
415 }
416 
417 //TODO: handle reordered cluster ids
418 template <typename IT, typename NT, typename DER>
RandPermute(SpParMat<IT,NT,DER> & A,HipMCLParam & param)419 void RandPermute(SpParMat<IT,NT,DER> & A, HipMCLParam & param)
420 {
421     // randomly permute for load balance
422     if(A.getnrow() == A.getncol())
423     {
424         FullyDistVec<IT, IT> p( A.getcommgrid());
425         p.iota(A.getnrow(), 0);
426         p.RandPerm();
427         (A)(p,p,true);// in-place permute to save memory
428         SpParHelper::Print("Applied symmetric permutation.\n");
429     }
430     else
431     {
432         SpParHelper::Print("Rectangular matrix: Can not apply symmetric permutation.\n");
433     }
434 }
435 
436 template <typename IT, typename NT, typename DER>
HipMCL(SpParMat<IT,NT,DER> & A,HipMCLParam & param)437 FullyDistVec<IT, IT> HipMCL(SpParMat<IT,NT,DER> & A, HipMCLParam & param)
438 {
439     if(param.remove_isolated)
440         RemoveIsolated(A, param);
441 
442     if(param.randpermute)
443         RandPermute(A, param);
444 
445     // Adjust self loops
446     AdjustLoops(A);
447 
448     // Make stochastic
449     MakeColStochastic(A);
450     SpParHelper::Print("Made stochastic\n");
451 
452     if(param.show)
453     {
454         A.PrintInfo();
455     }
456 
457 
458     // chaos doesn't make sense for non-stochastic matrices
459     // it is in the range {0,1} for stochastic matrices
460     NT chaos = 1;
461     int it=1;
462     double tInflate = 0;
463     double tExpand = 0;
464      typedef PlusTimesSRing<NT, NT> PTFF;
465     // while there is an epsilon improvement
466     while( chaos > EPS)
467     {
468         double t1 = MPI_Wtime();
469         //A.Square<PTFF>() ;		// expand
470         A = MemEfficientSpGEMM<PTFF, NT, DER>(A, A, param.phases, param.prunelimit, (IT)param.select, (IT)param.recover_num, param.recover_pct, param.kselectVersion, param.perProcessMem);
471 
472         MakeColStochastic(A);
473         tExpand += (MPI_Wtime() - t1);
474 
475         if(param.show)
476         {
477             SpParHelper::Print("After expansion\n");
478             A.PrintInfo();
479         }
480         chaos = Chaos(A);
481 
482         double tInflate1 = MPI_Wtime();
483         Inflate(A, param.inflation);
484         MakeColStochastic(A);
485         tInflate += (MPI_Wtime() - tInflate1);
486 
487         if(param.show)
488         {
489             SpParHelper::Print("After inflation\n");
490             A.PrintInfo();
491         }
492 
493 
494 
495         double newbalance = A.LoadImbalance();
496         double t3=MPI_Wtime();
497         stringstream s;
498         s << "Iteration# "  << setw(3) << it << " : "  << " chaos: " << setprecision(3) << chaos << "  load-balance: "<< newbalance << " Time: " << (t3-t1) << endl;
499         SpParHelper::Print(s.str());
500         it++;
501 
502 
503 
504     }
505 
506 
507     double tcc1 = MPI_Wtime();
508     // bool does not work because A.AddLoops(1) can not create a fullydist vector with Bool?
509     // write a promote train for float
510     SpParMat<IT,double, SpDCCols < IT, double >> ADouble = A;
511     FullyDistVec<IT, IT> cclabels = Interpret(ADouble);
512     double tcc = MPI_Wtime() - tcc1;
513 
514 
515 #ifdef TIMING
516     int myrank;
517     MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
518     if(myrank==0)
519     {
520         cout << "================detailed timing==================" << endl;
521 
522         cout << "Expansion: " << mcl_Abcasttime + mcl_Bbcasttime + mcl_localspgemmtime + mcl_multiwaymergetime << endl;
523         cout << "       Abcast= " << mcl_Abcasttime << endl;
524         cout << "       Bbcast= " << mcl_Bbcasttime << endl;
525         cout << "       localspgemm= " << mcl_localspgemmtime << endl;
526         cout << "       multiwaymergetime= "<< mcl_multiwaymergetime << endl;
527         cout << "Prune: " << mcl_kselecttime + mcl_prunecolumntime << endl;
528         cout << "       kselect= " << mcl_kselecttime << endl;
529         cout << "       prunecolumn= " << mcl_prunecolumntime << endl;
530         cout << "Inflation " << tInflate << endl;
531         cout << "Component: " << tcc << endl;
532         cout << "File I/O: " << tIO << endl;
533         cout << "=================================================" << endl;
534     }
535 
536 #endif
537 
538     return cclabels;
539 
540 
541 }
542 
543 template <typename IT, typename NT, typename DER>
Symmetricize(SpParMat<IT,NT,DER> & A)544 void Symmetricize(SpParMat<IT,NT,DER> & A)
545 {
546     SpParMat<IT,NT,DER> AT = A;
547     AT.Transpose();
548     if(!(AT == A))
549     {
550         SpParHelper::Print("Symmatricizing an unsymmetric input matrix.\n");
551         A += AT;
552     }
553 }
554 
555 template <typename GIT, typename LIT, typename NT>
MainBody(HipMCLParam & param)556 void MainBody(HipMCLParam & param)
557 {
558     SpParMat<GIT,NT, SpDCCols < LIT, NT >> A(MPI_COMM_WORLD);    // construct object
559     FullyDistVec<GIT, array<char, MAXVERTNAME> > vtxLabels(A.getcommgrid());
560 
561     // read file
562 
563     SpParHelper::Print("Reading input file......\n");
564 
565     double tIO1 = MPI_Wtime();
566     if(param.isInputMM)
567         A.ParallelReadMM(param.ifilename, param.base, maximum<NT>());    // if base=0, then it is implicitly converted to Boolean false
568     else // default labeled triples format
569         vtxLabels = A.ReadGeneralizedTuples(param.ifilename,  maximum<NT>());
570 
571     tIO = MPI_Wtime() - tIO1;
572     ostringstream outs;
573     outs << " : took " << tIO << " seconds" << endl;
574     SpParHelper::Print(outs.str());
575     // Symmetricize the matrix only if needed
576     Symmetricize(A);
577 
578     double balance = A.LoadImbalance();
579 
580     outs.str("");
581     outs.clear();
582     if(!param.is64bInt) // don't compute nnz because it may overflow
583     {
584         GIT nnz = A.getnnz();
585         GIT nv = A.getnrow();
586         outs << "Number of vertices: " << nv << " number of edges: "<< nnz << endl;
587     }
588     outs << "Load balance: " << balance << endl;
589     SpParHelper::Print(outs.str());
590 
591     if(param.show)
592     {
593         A.PrintInfo();
594     }
595 
596 
597 
598 #ifdef TIMING
599     mcl_Abcasttime = 0;
600     mcl_Bbcasttime = 0;
601     mcl_localspgemmtime = 0;
602     mcl_multiwaymergetime = 0;
603     mcl_kselecttime = 0;
604     mcl_prunecolumntime = 0;
605 #endif
606 
607 
608 
609     double tstart = MPI_Wtime();
610 
611     // Run HipMCL
612     FullyDistVec<GIT, GIT> culstLabels = HipMCL(A, param);
613     //culstLabels.ParallelWrite(param.ofilename, param.base); // clusters are always numbered 0-based
614 
615     if(param.isInputMM)
616         WriteMCLClusters(param.ofilename, culstLabels, param.base);
617     else
618         WriteMCLClusters(param.ofilename, culstLabels, vtxLabels);
619 
620 
621 
622     GIT nclusters = culstLabels.Reduce(maximum<GIT>(), (GIT) 0 ) ;
623     nclusters ++; // because of zero based indexing for clusters
624 
625     double tend = MPI_Wtime();
626     stringstream s2;
627     s2 << "Number of clusters: " << nclusters << endl;
628     s2 << "Total time: " << (tend-tstart) << endl;
629     s2 <<  "=================================================\n" << endl ;
630     SpParHelper::Print(s2.str());
631 
632 }
633 
634 
main(int argc,char * argv[])635 int main(int argc, char* argv[])
636 {
637     int provided;
638     MPI_Init_thread(&argc, &argv, MPI_THREAD_SERIALIZED, &provided);
639     if (provided < MPI_THREAD_SERIALIZED)
640     {
641         printf("ERROR: The MPI library does not have MPI_THREAD_SERIALIZED support\n");
642         MPI_Abort(MPI_COMM_WORLD, 1);
643     }
644 
645     int nthreads = 1;
646 #ifdef THREADED
647 #pragma omp parallel
648     {
649         nthreads = omp_get_num_threads();
650     }
651 #endif
652 
653     int nprocs, myrank;
654     MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
655     MPI_Comm_rank(MPI_COMM_WORLD,&myrank);
656 
657     HipMCLParam param;
658 
659     // initialize parameters to default values
660     InitParam(param);
661 
662     // Populate parameters from command line options
663     ProcessParam(argc, argv, param);
664 
665     // check if mandatory arguments are provided
666     if(param.ifilename=="" || param.inflation == 0.0)
667     {
668         SpParHelper::Print("Required options are missing.\n");
669         ShowOptions();
670         MPI_Finalize();
671         return -1;
672     }
673 
674 
675     if(myrank == 0)
676     {
677         cout << "\nProcess Grid used (pr x pc x threads): " << sqrt(nprocs) << " x " << sqrt(nprocs) << " x " << nthreads << endl;
678     }
679 
680 
681     // show parameters used to run HipMCL
682     ShowParam(param);
683 
684     if(param.perProcessMem==0)
685     {
686         if(myrank == 0)
687         {
688             cout << "******** Number of phases will not be estimated as -per-process-mem option is supplied. It is highly recommended that you provide -per-process-mem option for large-scale runs. *********** " << endl;
689         }
690     }
691 
692     {
693         if(param.isDoublePrecision)
694             if(param.is64bInt) // default case
695                 MainBody<int64_t, int64_t, double>(param);
696             else
697                 MainBody<int32_t, int32_t, double>(param);
698         else if(param.is64bInt)
699             MainBody<int64_t, int64_t, float>(param);
700         else
701             MainBody<int32_t, int32_t, float>(param); // memory effecient case
702     }
703 
704 
705 
706     // make sure the destructors for all objects are called before MPI::Finalize()
707     MPI_Finalize();
708     return 0;
709 }
710