1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc  --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 
31 /**
32  * Deterministic vertex scrambling functions from V2.1 of the reference implementation
33  **/
34 
35 #ifndef _REF_GEN_2_1_H_
36 #define _REF_GEN_2_1_H_
37 
38 #ifdef _STDINT_H
39 	#undef _STDINT_H
40 #endif
41 #ifdef _GCC_STDINT_H 	// for cray
42 	#undef _GCC_STDINT_H // original stdint does #include_next<"/opt/gcc/4.5.2/snos/lib/gcc/x86_64-suse-linux/4.5.2/include/stdint-gcc.h">
43 #endif
44 
45 #ifndef __STDC_CONSTANT_MACROS
46 #define __STDC_CONSTANT_MACROS
47 #endif
48 #ifndef __STDC_LIMIT_MACROS
49 #define __STDC_LIMIT_MACROS
50 #endif
51 #include <stdint.h>
52 #include <inttypes.h>
53 #include <errno.h>
54 
55 #include <vector>
56 #include <limits>
57 #include "SpDefs.h"
58 #include "StackEntry.h"
59 #include "promote.h"
60 #include "Isect.h"
61 #include "HeapEntry.h"
62 #include "SpImpl.h"
63 #include "graph500/generator/graph_generator.h"
64 #include "graph500/generator/utils.h"
65 
66 namespace combblas {
67 
68 
69 /* Initiator settings: for faster random number generation, the initiator
70  * probabilities are defined as fractions (a = INITIATOR_A_NUMERATOR /
71  * INITIATOR_DENOMINATOR, b = c = INITIATOR_BC_NUMERATOR /
72  * INITIATOR_DENOMINATOR, d = 1 - a - b - c. */
73 #define INITIATOR_A_NUMERATOR 5700
74 #define INITIATOR_BC_NUMERATOR 1900
75 #define INITIATOR_DENOMINATOR 10000
76 
77 /* If this macro is defined to a non-zero value, use SPK_NOISE_LEVEL /
78  * INITIATOR_DENOMINATOR as the noise parameter to use in introducing noise
79  * into the graph parameters.  The approach used is from "A Hitchhiker's Guide
80  * to Choosing Parameters of Stochastic Kronecker Graphs" by C. Seshadhri, Ali
81  * Pinar, and Tamara G. Kolda (http://arxiv.org/abs/1102.5046v1), except that
82  * the adjustment here is chosen based on the current level being processed
83  * rather than being chosen randomly. */
84 #define SPK_NOISE_LEVEL 0
85 /* #define SPK_NOISE_LEVEL 1000 -- in INITIATOR_DENOMINATOR units */
86 
87 
88 class RefGen21
89 {
90 public:
91 
92 	/* Spread the two 64-bit numbers into five nonzero values in the correct range (2 parameter version) */
make_mrg_seed_short(uint64_t userseed,uint_fast32_t * seed)93 	static void make_mrg_seed_short(uint64_t userseed, uint_fast32_t* seed)
94 	{
95   		seed[0] = (userseed & 0x3FFFFFFF) + 1;
96   		seed[1] = ((userseed >> 30) & 0x3FFFFFFF) + 1;
97   		seed[2] = (userseed & 0x3FFFFFFF) + 1;
98   		seed[3] = ((userseed >> 30) & 0x3FFFFFFF) + 1;
99   		seed[4] = ((userseed >> 60) << 4) + (userseed >> 60) + 1;
100 	}
101 
generate_4way_bernoulli(mrg_state * st,int level,int nlevels)102 	static int generate_4way_bernoulli(mrg_state* st, int level, int nlevels)
103 	{
104   		/* Generator a pseudorandom number in the range [0, INITIATOR_DENOMINATOR) without modulo bias. */
105   		static const uint32_t limit = (UINT32_C(0xFFFFFFFF) % INITIATOR_DENOMINATOR);
106   		uint32_t val = mrg_get_uint_orig(st);
107   		if (/* Unlikely */ val < limit) {
108     			do
109 			{
110       				val = mrg_get_uint_orig(st);
111     			}
112 			while (val < limit);
113   		}
114 		#if SPK_NOISE_LEVEL == 0
115   		int spk_noise_factor = 0;
116 		#else
117   		int spk_noise_factor = 2 * SPK_NOISE_LEVEL * level / nlevels - SPK_NOISE_LEVEL;
118 		#endif
119   		int adjusted_bc_numerator = INITIATOR_BC_NUMERATOR + spk_noise_factor;
120   		val %= INITIATOR_DENOMINATOR;
121   		if ((signed)val < adjusted_bc_numerator) return 1;
122   		val -= adjusted_bc_numerator;
123   		if ((signed)val < adjusted_bc_numerator) return 2;
124   		val -= adjusted_bc_numerator;
125 		#if SPK_NOISE_LEVEL == 0
126   		if (val < INITIATOR_A_NUMERATOR) return 0;
127 		#else
128   		if (val < INITIATOR_A_NUMERATOR * (INITIATOR_DENOMINATOR - 2 * INITIATOR_BC_NUMERATOR) / (INITIATOR_DENOMINATOR - 2 * adjusted_bc_numerator)) return 0;
129 		#endif
130 		return 3;
131 	}
132 
133 	/* Reverse bits in a number; this should be optimized for performance
134  	* (including using bit- or byte-reverse intrinsics if your platform has them).
135  	* */
bitreverse(uint64_t x)136 	static inline uint64_t bitreverse(uint64_t x)
137 	{
138 		#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
139 		#define USE_GCC_BYTESWAP /* __builtin_bswap* are in 4.3 but not 4.2 */
140 		#endif
141 
142 		#ifdef FAST_64BIT_ARITHMETIC
143 
144  		 /* 64-bit code */
145 		#ifdef USE_GCC_BYTESWAP
146   		 x = __builtin_bswap64(x);
147 		#else
148   		 x = (x >> 32) | (x << 32);
149   		 x = ((x >> 16) & UINT64_C(0x0000FFFF0000FFFF)) | ((x & UINT64_C(0x0000FFFF0000FFFF)) << 16);
150   		 x = ((x >>  8) & UINT64_C(0x00FF00FF00FF00FF)) | ((x & UINT64_C(0x00FF00FF00FF00FF)) <<  8);
151 		#endif
152   		x = ((x >>  4) & UINT64_C(0x0F0F0F0F0F0F0F0F)) | ((x & UINT64_C(0x0F0F0F0F0F0F0F0F)) <<  4);
153   		x = ((x >>  2) & UINT64_C(0x3333333333333333)) | ((x & UINT64_C(0x3333333333333333)) <<  2);
154  		x = ((x >>  1) & UINT64_C(0x5555555555555555)) | ((x & UINT64_C(0x5555555555555555)) <<  1);
155   		return x;
156 
157 		#else
158 
159   		/* 32-bit code */
160  		uint32_t h = (uint32_t)(x >> 32);
161   		uint32_t l = (uint32_t)(x & UINT32_MAX);
162 		#ifdef USE_GCC_BYTESWAP
163   		 h = __builtin_bswap32(h);
164   		 l = __builtin_bswap32(l);
165 		#else
166  		 h = (h >> 16) | (h << 16);
167  		 l = (l >> 16) | (l << 16);
168 		 h = ((h >> 8) & UINT32_C(0x00FF00FF)) | ((h & UINT32_C(0x00FF00FF)) << 8);
169  		 l = ((l >> 8) & UINT32_C(0x00FF00FF)) | ((l & UINT32_C(0x00FF00FF)) << 8);
170 		#endif
171   		h = ((h >> 4) & UINT32_C(0x0F0F0F0F)) | ((h & UINT32_C(0x0F0F0F0F)) << 4);
172   		l = ((l >> 4) & UINT32_C(0x0F0F0F0F)) | ((l & UINT32_C(0x0F0F0F0F)) << 4);
173   		h = ((h >> 2) & UINT32_C(0x33333333)) | ((h & UINT32_C(0x33333333)) << 2);
174   		l = ((l >> 2) & UINT32_C(0x33333333)) | ((l & UINT32_C(0x33333333)) << 2);
175   		h = ((h >> 1) & UINT32_C(0x55555555)) | ((h & UINT32_C(0x55555555)) << 1);
176   		l = ((l >> 1) & UINT32_C(0x55555555)) | ((l & UINT32_C(0x55555555)) << 1);
177   		return ((uint64_t)l << 32) | h; /* Swap halves */
178 
179 		#endif
180 	}
181 
182 
183 	/* Apply a permutation to scramble vertex numbers; a randomly generated
184  	* permutation is not used because applying it at scale is too expensive. */
scramble(int64_t v0,int lgN,uint64_t val0,uint64_t val1)185 	static inline int64_t scramble(int64_t v0, int lgN, uint64_t val0, uint64_t val1)
186 	{
187   		uint64_t v = (uint64_t)v0;
188   		v += val0 + val1;
189   		v *= (val0 | UINT64_C(0x4519840211493211));
190  	 	v = (RefGen21::bitreverse(v) >> (64 - lgN));
191   		assert ((v >> lgN) == 0);
192   		v *= (val1 | UINT64_C(0x3050852102C843A5));
193   		v = (RefGen21::bitreverse(v) >> (64 - lgN));
194   		assert ((v >> lgN) == 0);
195   		return (int64_t)v;
196 	}
197 
198 	/* Make a single graph edge using a pre-set MRG state. */
make_one_edge(int64_t nverts,int level,int lgN,mrg_state * st,packed_edge * result,uint64_t val0,uint64_t val1)199 	static void make_one_edge(int64_t nverts, int level, int lgN, mrg_state* st, packed_edge* result, uint64_t val0, uint64_t val1)
200 	{
201   		int64_t base_src = 0, base_tgt = 0;
202   		while (nverts > 1)
203 		{
204     			int square = generate_4way_bernoulli(st, level, lgN);
205     			int src_offset = square / 2;
206     			int tgt_offset = square % 2;
207     			assert (base_src <= base_tgt);
208     			if (base_src == base_tgt)
209 			{
210       				/* Clip-and-flip for undirected graph */
211       				if (src_offset > tgt_offset) {
212         				int temp = src_offset;
213         				src_offset = tgt_offset;
214         				tgt_offset = temp;
215       				}
216     			}
217     			nverts /= 2;
218     			++level;
219     			base_src += nverts * src_offset;
220     			base_tgt += nverts * tgt_offset;
221   		}
222   		write_edge(result,
223              		scramble(base_src, lgN, val0, val1),
224              		scramble(base_tgt, lgN, val0, val1));
225 	}
226 
MakeScrambleValues(uint64_t & val0,uint64_t & val1,const uint_fast32_t seed[])227 	static inline mrg_state MakeScrambleValues(uint64_t & val0, uint64_t & val1, const uint_fast32_t seed[])
228 	{
229 		mrg_state state;
230 		mrg_seed(&state, seed);
231     		mrg_state new_state = state;
232     		mrg_skip(&new_state, 50, 7, 0);
233     		val0 = mrg_get_uint_orig(&new_state);
234     		val0 *= UINT64_C(0xFFFFFFFF);
235     		val0 += mrg_get_uint_orig(&new_state);
236     		val1 = mrg_get_uint_orig(&new_state);
237     		val1 *= UINT64_C(0xFFFFFFFF);
238     		val1 += mrg_get_uint_orig(&new_state);
239 		return state;
240 	}
241 
242 	/* Generate a range of edges (from start_edge to end_edge of the total graph),
243  	 * writing into elements [0, end_edge - start_edge) of the edges array.  This
244  	 * code is parallel on OpenMP, it must be used with separately-implemented SPMD parallelism for MPI.
245 	 */
generate_kronecker_range(const uint_fast32_t seed[5],int logN,int64_t start_edge,int64_t end_edge,packed_edge * edges)246 	static void generate_kronecker_range(	const uint_fast32_t seed[5] /* All values in [0, 2^31 - 1), not all zero */,
247        					int logN /* In base 2 */, int64_t start_edge, int64_t end_edge, packed_edge* edges)
248 	{
249   		int64_t nverts = (int64_t)1 << logN;
250   		uint64_t val0, val1; /* Values for scrambling */
251 		mrg_state state = MakeScrambleValues(val0, val1, seed);
252 
253 		#ifdef _OPENMP
254 		#pragma omp parallel for
255 		#endif
256   		for (int64_t ei = start_edge; ei < end_edge; ++ei)
257 		{
258     			mrg_state new_state = state;
259  		   	mrg_skip(&new_state, 0, ei, 0);
260     			make_one_edge(nverts, 0, logN, &new_state, edges + (ei - start_edge), val0, val1);
261   		}
262 	}
compute_edge_range(int rank,int size,int64_t M,int64_t * start_idx,int64_t * end_idx)263 	static inline void compute_edge_range(int rank, int size, int64_t M, int64_t* start_idx, int64_t* end_idx)
264 	{
265   		int64_t rankc = (int64_t)(rank);
266   		int64_t sizec = (int64_t)(size);
267   		*start_idx = rankc * (M / sizec) + (rankc < (M % sizec) ? rankc : (M % sizec));
268   		*end_idx = (rankc + 1) * (M / sizec) + (rankc + 1 < (M % sizec) ? rankc + 1 : (M % sizec));
269 	}
270 
make_graph(int log_numverts,int64_t M,int64_t * nedges_ptr,packed_edge ** result_ptr,MPI_Comm & world)271 	static inline void make_graph(int log_numverts, int64_t M, int64_t* nedges_ptr, packed_edge** result_ptr, MPI_Comm & world)
272 	{
273   		int rank, size;
274 #ifdef DETERMINISTIC
275 		uint64_t userseed1 = 0;
276 #else
277 		uint64_t userseed1 = (uint64_t) init_random();
278 #endif
279 
280   		/* Spread the two 64-bit numbers into five nonzero values in the correct range. */
281   		uint_fast32_t seed[5];
282   		make_mrg_seed(userseed1, userseed1, seed);
283 
284   		MPI_Comm_rank(world, &rank);
285   		MPI_Comm_size(world, &size);
286 
287   		int64_t start_idx, end_idx;
288   		compute_edge_range(rank, size, M, &start_idx, &end_idx);
289   		int64_t nedges = end_idx - start_idx;
290 
291   		packed_edge* local_edges = new packed_edge[nedges];
292 
293   		double start = MPI_Wtime();
294   		generate_kronecker_range(seed, log_numverts, start_idx, end_idx, local_edges);
295  		double gen_time = MPI_Wtime() - start;
296 
297   		*result_ptr = local_edges;
298   		*nedges_ptr = nedges;
299 
300   		if (rank == 0)
301 		{
302     			fprintf(stdout, "graph_generation:               %f s\n", gen_time);
303   		}
304 	}
305 
init_random()306 	static inline long init_random ()
307 	{
308   		long seed = -1;
309   		if (getenv ("SEED"))
310 		{
311     			errno = 0;
312     			seed = strtol (getenv ("SEED"), NULL, 10);
313     			if (errno) seed = -1;
314   		}
315 
316   		if (seed < 0) seed = 0xDECAFBAD;
317 		return seed;
318 	}
319 };
320 
321 }
322 
323 #endif
324