1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.4 -------------------------------------------------*/
4 /* date: 1/17/2014 ---------------------------------------------*/
5 /* authors: Aydin Buluc (abuluc@lbl.gov), Adam Lugowski --------*/
6 /****************************************************************/
7 /*
8 Copyright (c) 2010-2014, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29 #include <mpi.h>
30
31 #include "SpParMat.h"
32 #include "ParFriends.h"
33 #include "Operations.h"
34
35 #ifndef GRAPH_GENERATOR_SEQ
36 #define GRAPH_GENERATOR_SEQ
37 #endif
38
39 #include "graph500/generator/graph_generator.h"
40 #include "graph500/generator/utils.h"
41 #include "RefGen21.h"
42
43 #include <fstream>
44 #include <algorithm>
45
46 namespace combblas {
47
48 template <typename IT>
DistEdgeList()49 DistEdgeList<IT>::DistEdgeList(): edges(NULL), pedges(NULL), nedges(0), globalV(0)
50 {
51 commGrid.reset(new CommGrid(MPI_COMM_WORLD, 0, 0));
52 }
53
54 template <typename IT>
DistEdgeList(MPI_Comm & myWorld)55 DistEdgeList<IT>::DistEdgeList(MPI_Comm & myWorld): edges(NULL), pedges(NULL), nedges(0), globalV(0)
56 {
57 commGrid.reset(new CommGrid(myWorld, 0, 0));
58 }
59
60 template <typename IT>
DistEdgeList(const char * filename,IT globaln,IT globalm)61 DistEdgeList<IT>::DistEdgeList(const char * filename, IT globaln, IT globalm): edges(NULL), pedges(NULL), globalV(globaln)
62 {
63 commGrid.reset(new CommGrid(MPI_COMM_WORLD, 0, 0));
64
65 int nprocs = commGrid->GetSize();
66 int rank = commGrid->GetRank();
67 nedges = (rank == nprocs-1)? (globalm - rank * (globalm / nprocs)) : (globalm / nprocs);
68
69 FILE * infp = fopen(filename, "rb");
70 assert(infp != NULL);
71 IT read_offset_start, read_offset_end;
72 read_offset_start = rank * 8 * (globalm / nprocs);
73 read_offset_end = (rank+1) * 8 * (globalm / nprocs);
74 if (rank == nprocs - 1)
75 read_offset_end = 8*globalm;
76
77
78 std::ofstream oput;
79 commGrid->OpenDebugFile("BinRead", oput);
80 if(infp != NULL)
81 {
82 oput << "File exists" << std::endl;
83 oput << "Trying to read " << nedges << " edges out of " << globalm << std::endl;
84 }
85 else
86 {
87 oput << "File does not exist" << std::endl;
88 }
89
90 /* gen_edges is an array of unsigned ints of size 2*nedges */
91 uint32_t * gen_edges = new uint32_t[2*nedges];
92 fseek(infp, read_offset_start, SEEK_SET);
93 fread(gen_edges, 2*nedges, sizeof(uint32_t), infp);
94 SetMemSize(nedges);
95 oput << "Freads done " << std::endl;
96 for(IT i=0; i< 2*nedges; ++i)
97 edges[i] = (IT) gen_edges[i];
98 oput << "Puts done " << std::endl;
99 delete [] gen_edges;
100 oput.close();
101
102 }
103
104 #if 0
105 // Adam:
106 // commenting out because there is a compiler error with MPI_File_open.
107
108 template <typename IT>
109 void DistEdgeList<IT>::Dump64bit(string filename)
110 {
111 int rank,nprocs;
112 MPI_Comm World = commGrid->GetWorld();
113 MPI_Comm_rank(World, &rank);
114 MPI_Comm_size(World, &nprocs);
115 MPI_File thefile;
116 MPI_File_open(World, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &thefile);
117
118 IT * prelens = new IT[nprocs];
119 prelens[rank] = 2*nedges;
120 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(), prelens, 1, MPIType<IT>(), commGrid->GetWorld());
121 IT lengthuntil = accumulate(prelens, prelens+rank, 0);
122
123 // The disp displacement argument specifies the position
124 // (absolute offset in bytes from the beginning of the file)
125 MPI_File_set_view(thefile, int64_t(lengthuntil * sizeof(IT)), MPIType<IT>(), MPIType<IT>(), "native", MPI_INFO_NULL);
126 MPI_File_write(thefile, edges, prelens[rank], MPIType<IT>(), NULL);
127 MPI_File_close(&thefile);
128 delete [] prelens;
129 }
130
131 template <typename IT>
132 void DistEdgeList<IT>::Dump32bit(string filename)
133 {
134 int rank, nprocs;
135 MPI_Comm World = commGrid->GetWorld();
136 MPI_Comm_rank(World, &rank);
137 MPI_Comm_size(World, &nprocs);
138 MPI_File thefile;
139 MPI_File_open(World, filename.c_str(), MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &thefile);
140
141 IT * prelens = new IT[nprocs];
142 prelens[rank] = 2*nedges;
143 MPI_Allgather(MPI_IN_PLACE, 0, MPIType<IT>(), prelens, 1, MPIType<IT>(), commGrid->GetWorld());
144 IT lengthuntil = accumulate(prelens, prelens+rank, static_cast<IT>(0));
145
146 // The disp displacement argument specifies the position
147 // (absolute offset in bytes from the beginning of the file)
148 MPI_File_set_view(thefile, int64_t(lengthuntil * sizeof(uint32_t)), MPI_UNSIGNED, MPI_UNSIGNED, "native", MPI_INFO_NULL);
149 uint32_t * gen_edges = new uint32_t[prelens[rank]];
150 for(IT i=0; i< prelens[rank]; ++i)
151 gen_edges[i] = (uint32_t) edges[i];
152
153 MPI_File_write(thefile, gen_edges, prelens[rank], MPI_UNSIGNED, NULL);
154 MPI_File_close(&thefile);
155
156 delete [] prelens;
157 delete [] gen_edges;
158 }
159 #endif
160
161 template <typename IT>
~DistEdgeList()162 DistEdgeList<IT>::~DistEdgeList()
163 {
164 if(edges) delete [] edges;
165 if(pedges) delete [] pedges;
166 }
167
168 //! Allocates enough space
169 template <typename IT>
SetMemSize(IT ne)170 void DistEdgeList<IT>::SetMemSize(IT ne)
171 {
172 if (edges)
173 {
174 delete [] edges;
175 edges = NULL;
176 }
177
178 memedges = ne;
179 edges = 0;
180
181 if (memedges > 0)
182 edges = new IT[2*memedges];
183 }
184
185 /** Removes all edges that begin with a -1.
186 * Walks back from the end to tighten the nedges counter, then walks forward and replaces any edge
187 * with a -1 source with the last edge.
188 */
189 template <typename IT>
CleanupEmpties()190 void DistEdgeList<IT>::CleanupEmpties()
191 {
192
193 // find out how many edges there actually are
194 while (nedges > 0 && edges[2*(nedges-1) + 0] == -1)
195 {
196 nedges--;
197 }
198
199 // remove marked multiplicities or self-loops
200 for (IT i = 0; i < (nedges-1); i++)
201 {
202 if (edges[2*i + 0] == -1)
203 {
204 // the graph500 generator marked this edge as a self-loop or a multiple edge.
205 // swap it with the last edge
206 edges[2*i + 0] = edges[2*(nedges-1) + 0];
207 edges[2*i + 1] = edges[2*(nedges-1) + 1];
208 edges[2*(nedges-1) + 0] = -1; // mark this spot as unused
209
210 while (nedges > 0 && edges[2*(nedges-1) + 0] == -1) // the swapped edge might be -1 too
211 nedges--;
212 }
213 }
214 }
215
216
217 /**
218 * Note that GenGraph500Data will return global vertex numbers (from 1... N). The ith edge can be
219 * accessed with edges[2*i] and edges[2*i+1]. There will be duplicates and the data won't be sorted.
220 * Generates an edge list consisting of an RMAT matrix suitable for the Graph500 benchmark.
221 */
222 template <typename IT>
GenGraph500Data(double initiator[4],int log_numverts,int edgefactor,bool scramble,bool packed)223 void DistEdgeList<IT>::GenGraph500Data(double initiator[4], int log_numverts, int edgefactor, bool scramble, bool packed)
224 {
225 if(packed && (!scramble))
226 {
227 SpParHelper::Print("WARNING: Packed version does always generate scrambled vertex identifiers\n");
228 }
229
230 globalV = ((int64_t)1)<< log_numverts;
231 int64_t globaledges = globalV * static_cast<int64_t>(edgefactor);
232
233 if(packed)
234 {
235 RefGen21::make_graph (log_numverts, globaledges, &nedges, (packed_edge**)(&pedges), commGrid->GetWorld());
236 }
237 else
238 {
239 // The generations use different seeds on different processors, generating independent
240 // local RMAT matrices all having vertex ranges [0,...,globalmax-1]
241 // Spread the two 64-bit numbers into five nonzero values in the correct range
242 uint_fast32_t seed[5];
243
244 uint64_t size = (uint64_t) commGrid->GetSize();
245 uint64_t rank = (uint64_t) commGrid->GetRank();
246 #ifdef DETERMINISTIC
247 uint64_t seed2 = 2;
248 #else
249 uint64_t seed2 = time(NULL);
250 #endif
251 make_mrg_seed(rank, seed2, seed); // we give rank as the first seed, so it is different on processors
252
253 // a single pair of [val0,val1] for all the computation, global across all processors
254 uint64_t val0, val1; /* Values for scrambling */
255 if(scramble)
256 {
257 if(rank == 0)
258 RefGen21::MakeScrambleValues(val0, val1, seed); // ignore the return value
259
260 MPI_Bcast(&val0, 1, MPIType<uint64_t>(),0, commGrid->GetWorld());
261 MPI_Bcast(&val1, 1, MPIType<uint64_t>(),0, commGrid->GetWorld());
262 }
263
264 nedges = globaledges/size;
265 SetMemSize(nedges);
266 // clear the source vertex by setting it to -1
267 for (IT i = 0; i < nedges; i++)
268 edges[2*i+0] = -1;
269
270 generate_kronecker(0, 1, seed, log_numverts, nedges, initiator, edges);
271 if(scramble)
272 {
273 for(IT i=0; i < nedges; ++i)
274 {
275 edges[2*i+0] = RefGen21::scramble(edges[2*i+0], log_numverts, val0, val1);
276 edges[2*i+1] = RefGen21::scramble(edges[2*i+1], log_numverts, val0, val1);
277 }
278 }
279 }
280 }
281
282
283 /**
284 * Randomly permutes the distributed edge list.
285 * Once we call Viral's psort on this vector, everything will go to the right place [tuples are
286 * sorted lexicographically] and you can reconstruct the int64_t * edges in an embarrassingly parallel way.
287 * As I understood, the entire purpose of this function is to destroy any locality. It does not
288 * rename any vertices and edges are not named anyway.
289 * For an example, think about the edge (0,1). It will eventually (at the end of kernel 1) be owned by processor P(0,0).
290 * However, assume that processor P(r1,c1) has a copy of it before the call to PermEdges. After
291 * this call, some other irrelevant processor P(r2,c2) will own it. So we gained nothing, it is just a scrambled egg.
292 **/
293 template <typename IT>
PermEdges(DistEdgeList<IT> & DEL)294 void PermEdges(DistEdgeList<IT> & DEL)
295 {
296 IT maxedges = DEL.memedges; // this can be optimized by calling the clean-up first
297
298 // to lower memory consumption, rename in stages
299 // this is not "identical" to a full randomization;
300 // but more than enough to destroy any possible locality
301 IT stages = 8;
302 IT perstage = maxedges / stages;
303
304 int nproc =(DEL.commGrid)->GetSize();
305 int rank = (DEL.commGrid)->GetRank();
306 IT * dist = new IT[nproc];
307
308 #ifdef DETERMINISTIC
309 MTRand M(1);
310 #else
311 MTRand M; // generate random numbers with Mersenne Twister
312 #endif
313 for(IT s=0; s< stages; ++s)
314 {
315 #ifdef DEBUG
316 SpParHelper::Print("PermEdges stage starting\n");
317 double st = MPI_Wtime();
318 #endif
319
320 IT n_sofar = s*perstage;
321 IT n_thisstage = ((s==(stages-1))? (maxedges - n_sofar): perstage);
322
323 std::pair<double, std::pair<IT,IT> >* vecpair = new std::pair<double, std::pair<IT,IT> >[n_thisstage];
324 dist[rank] = n_thisstage;
325 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), DEL.commGrid->GetWorld());
326
327 for (IT i = 0; i < n_thisstage; i++)
328 {
329 vecpair[i].first = M.rand();
330 vecpair[i].second.first = DEL.edges[2*(i+n_sofar)];
331 vecpair[i].second.second = DEL.edges[2*(i+n_sofar)+1];
332 }
333
334 // less< pair<T1,T2> > works correctly (sorts w.r.t. first element of type T1)
335 SpParHelper::MemoryEfficientPSort(vecpair, n_thisstage, dist, DEL.commGrid->GetWorld());
336 // SpParHelper::DebugPrintKeys(vecpair, n_thisstage, dist, DEL.commGrid->GetWorld());
337 for (IT i = 0; i < n_thisstage; i++)
338 {
339 DEL.edges[2*(i+n_sofar)] = vecpair[i].second.first;
340 DEL.edges[2*(i+n_sofar)+1] = vecpair[i].second.second;
341 }
342 delete [] vecpair;
343 #ifdef DEBUG
344 double et = MPI_Wtime();
345 std::ostringstream timeinfo;
346 timeinfo << "Stage " << s << " in " << et-st << " seconds" << std::endl;
347 SpParHelper::Print(timeinfo.str());
348 #endif
349 }
350 delete [] dist;
351 }
352
353 /**
354 * Rename vertices globally.
355 * You first need to do create a random permutation distributed on all processors.
356 * Then the p round robin algorithm will do the renaming:
357 * For all processors P(i,i)
358 * Broadcast local_p to all p processors
359 * For j= i*N/p to min((i+1)*N/p, N)
360 * Rename the all j's with local_p(j) inside the edgelist (and mark them
361 * "renamed" so that yeach vertex id is renamed only once)
362 **/
363 template <typename IU>
RenameVertices(DistEdgeList<IU> & DEL)364 void RenameVertices(DistEdgeList<IU> & DEL)
365 {
366 int nprocs = DEL.commGrid->GetSize();
367 int rank = DEL.commGrid->GetRank();
368 MPI_Comm World = DEL.commGrid->GetWorld();
369
370 // create permutation
371 FullyDistVec<IU, IU> globalPerm(DEL.commGrid);
372 globalPerm.iota(DEL.getGlobalV(), 0);
373 globalPerm.RandPerm(); // now, randperm can return a 0-based permutation
374 IU locrows = globalPerm.MyLocLength();
375
376 // way to mark whether each vertex was already renamed or not
377 IU locedgelist = 2*DEL.getNumLocalEdges();
378 bool* renamed = new bool[locedgelist];
379 std::fill_n(renamed, locedgelist, 0);
380
381 // permutation for one round
382 IU * localPerm = NULL;
383 IU permsize;
384 IU startInd = 0;
385
386 //vector < pair<IU, IU> > vec;
387 //for(IU i=0; i< DEL.getNumLocalEdges(); i++)
388 // vec.push_back(make_pair(DEL.edges[2*i], DEL.edges[2*i+1]));
389 //sort(vec.begin(), vec.end());
390 //vector < pair<IU, IU> > uniqued;
391 //unique_copy(vec.begin(), vec.end(), back_inserter(uniqued));
392 //cout << "before: " << vec.size() << " and after: " << uniqued.size() << endl;
393
394 for (int round = 0; round < nprocs; round++)
395 {
396 // broadcast the permutation from the one processor
397 if (rank == round)
398 {
399 permsize = locrows;
400 localPerm = new IU[permsize];
401 std::copy(globalPerm.arr.begin(), globalPerm.arr.end(), localPerm);
402 }
403 MPI_Bcast(&permsize, 1, MPIType<IU>(), round, World);
404 if(rank != round)
405 {
406 localPerm = new IU[permsize];
407 }
408 MPI_Bcast(localPerm, permsize, MPIType<IU>(), round, World);
409
410 // iterate over
411 for (typename std::vector<IU>::size_type j = 0; j < (unsigned)locedgelist ; j++)
412 {
413 // We are renaming vertices, not edges
414 if (startInd <= DEL.edges[j] && DEL.edges[j] < (startInd + permsize) && !renamed[j])
415 {
416 DEL.edges[j] = localPerm[DEL.edges[j]-startInd];
417 renamed[j] = true;
418 }
419 }
420 startInd += permsize;
421 delete [] localPerm;
422 }
423 delete [] renamed;
424 }
425
426 }
427