1 /****************************************************************/ 2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */ 3 /* version 1.6 -------------------------------------------------*/ 4 /* date: 6/15/2017 ---------------------------------------------*/ 5 /* authors: Ariful Azad, Aydin Buluc --------------------------*/ 6 /****************************************************************/ 7 /* 8 Copyright (c) 2010-2017, The Regents of the University of California 9 10 Permission is hereby granted, free of charge, to any person obtaining a copy 11 of this software and associated documentation files (the "Software"), to deal 12 in the Software without restriction, including without limitation the rights 13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 copies of the Software, and to permit persons to whom the Software is 15 furnished to do so, subject to the following conditions: 16 17 The above copyright notice and this permission notice shall be included in 18 all copies or substantial portions of the Software. 19 20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 THE SOFTWARE. 27 */ 28 29 30 31 /** 32 * Deterministic vertex scrambling functions from V2.1 of the reference implementation 33 **/ 34 35 #ifndef _REF_GEN_2_1_H_ 36 #define _REF_GEN_2_1_H_ 37 38 #ifdef _STDINT_H 39 #undef _STDINT_H 40 #endif 41 #ifdef _GCC_STDINT_H // for cray 42 #undef _GCC_STDINT_H // original stdint does #include_next<"/opt/gcc/4.5.2/snos/lib/gcc/x86_64-suse-linux/4.5.2/include/stdint-gcc.h"> 43 #endif 44 45 #ifndef __STDC_CONSTANT_MACROS 46 #define __STDC_CONSTANT_MACROS 47 #endif 48 #ifndef __STDC_LIMIT_MACROS 49 #define __STDC_LIMIT_MACROS 50 #endif 51 #include <stdint.h> 52 #include <inttypes.h> 53 #include <errno.h> 54 55 #include <vector> 56 #include <limits> 57 #include "SpDefs.h" 58 #include "StackEntry.h" 59 #include "promote.h" 60 #include "Isect.h" 61 #include "HeapEntry.h" 62 #include "SpImpl.h" 63 #include "graph500/generator/graph_generator.h" 64 #include "graph500/generator/utils.h" 65 66 namespace combblas { 67 68 69 /* Initiator settings: for faster random number generation, the initiator 70 * probabilities are defined as fractions (a = INITIATOR_A_NUMERATOR / 71 * INITIATOR_DENOMINATOR, b = c = INITIATOR_BC_NUMERATOR / 72 * INITIATOR_DENOMINATOR, d = 1 - a - b - c. */ 73 #define INITIATOR_A_NUMERATOR 5700 74 #define INITIATOR_BC_NUMERATOR 1900 75 #define INITIATOR_DENOMINATOR 10000 76 77 /* If this macro is defined to a non-zero value, use SPK_NOISE_LEVEL / 78 * INITIATOR_DENOMINATOR as the noise parameter to use in introducing noise 79 * into the graph parameters. The approach used is from "A Hitchhiker's Guide 80 * to Choosing Parameters of Stochastic Kronecker Graphs" by C. Seshadhri, Ali 81 * Pinar, and Tamara G. Kolda (http://arxiv.org/abs/1102.5046v1), except that 82 * the adjustment here is chosen based on the current level being processed 83 * rather than being chosen randomly. */ 84 #define SPK_NOISE_LEVEL 0 85 /* #define SPK_NOISE_LEVEL 1000 -- in INITIATOR_DENOMINATOR units */ 86 87 88 class RefGen21 89 { 90 public: 91 92 /* Spread the two 64-bit numbers into five nonzero values in the correct range (2 parameter version) */ make_mrg_seed_short(uint64_t userseed,uint_fast32_t * seed)93 static void make_mrg_seed_short(uint64_t userseed, uint_fast32_t* seed) 94 { 95 seed[0] = (userseed & 0x3FFFFFFF) + 1; 96 seed[1] = ((userseed >> 30) & 0x3FFFFFFF) + 1; 97 seed[2] = (userseed & 0x3FFFFFFF) + 1; 98 seed[3] = ((userseed >> 30) & 0x3FFFFFFF) + 1; 99 seed[4] = ((userseed >> 60) << 4) + (userseed >> 60) + 1; 100 } 101 generate_4way_bernoulli(mrg_state * st,int level,int nlevels)102 static int generate_4way_bernoulli(mrg_state* st, int level, int nlevels) 103 { 104 /* Generator a pseudorandom number in the range [0, INITIATOR_DENOMINATOR) without modulo bias. */ 105 static const uint32_t limit = (UINT32_C(0xFFFFFFFF) % INITIATOR_DENOMINATOR); 106 uint32_t val = mrg_get_uint_orig(st); 107 if (/* Unlikely */ val < limit) { 108 do 109 { 110 val = mrg_get_uint_orig(st); 111 } 112 while (val < limit); 113 } 114 #if SPK_NOISE_LEVEL == 0 115 int spk_noise_factor = 0; 116 #else 117 int spk_noise_factor = 2 * SPK_NOISE_LEVEL * level / nlevels - SPK_NOISE_LEVEL; 118 #endif 119 int adjusted_bc_numerator = INITIATOR_BC_NUMERATOR + spk_noise_factor; 120 val %= INITIATOR_DENOMINATOR; 121 if ((signed)val < adjusted_bc_numerator) return 1; 122 val -= adjusted_bc_numerator; 123 if ((signed)val < adjusted_bc_numerator) return 2; 124 val -= adjusted_bc_numerator; 125 #if SPK_NOISE_LEVEL == 0 126 if (val < INITIATOR_A_NUMERATOR) return 0; 127 #else 128 if (val < INITIATOR_A_NUMERATOR * (INITIATOR_DENOMINATOR - 2 * INITIATOR_BC_NUMERATOR) / (INITIATOR_DENOMINATOR - 2 * adjusted_bc_numerator)) return 0; 129 #endif 130 return 3; 131 } 132 133 /* Reverse bits in a number; this should be optimized for performance 134 * (including using bit- or byte-reverse intrinsics if your platform has them). 135 * */ bitreverse(uint64_t x)136 static inline uint64_t bitreverse(uint64_t x) 137 { 138 #if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) 139 #define USE_GCC_BYTESWAP /* __builtin_bswap* are in 4.3 but not 4.2 */ 140 #endif 141 142 #ifdef FAST_64BIT_ARITHMETIC 143 144 /* 64-bit code */ 145 #ifdef USE_GCC_BYTESWAP 146 x = __builtin_bswap64(x); 147 #else 148 x = (x >> 32) | (x << 32); 149 x = ((x >> 16) & UINT64_C(0x0000FFFF0000FFFF)) | ((x & UINT64_C(0x0000FFFF0000FFFF)) << 16); 150 x = ((x >> 8) & UINT64_C(0x00FF00FF00FF00FF)) | ((x & UINT64_C(0x00FF00FF00FF00FF)) << 8); 151 #endif 152 x = ((x >> 4) & UINT64_C(0x0F0F0F0F0F0F0F0F)) | ((x & UINT64_C(0x0F0F0F0F0F0F0F0F)) << 4); 153 x = ((x >> 2) & UINT64_C(0x3333333333333333)) | ((x & UINT64_C(0x3333333333333333)) << 2); 154 x = ((x >> 1) & UINT64_C(0x5555555555555555)) | ((x & UINT64_C(0x5555555555555555)) << 1); 155 return x; 156 157 #else 158 159 /* 32-bit code */ 160 uint32_t h = (uint32_t)(x >> 32); 161 uint32_t l = (uint32_t)(x & UINT32_MAX); 162 #ifdef USE_GCC_BYTESWAP 163 h = __builtin_bswap32(h); 164 l = __builtin_bswap32(l); 165 #else 166 h = (h >> 16) | (h << 16); 167 l = (l >> 16) | (l << 16); 168 h = ((h >> 8) & UINT32_C(0x00FF00FF)) | ((h & UINT32_C(0x00FF00FF)) << 8); 169 l = ((l >> 8) & UINT32_C(0x00FF00FF)) | ((l & UINT32_C(0x00FF00FF)) << 8); 170 #endif 171 h = ((h >> 4) & UINT32_C(0x0F0F0F0F)) | ((h & UINT32_C(0x0F0F0F0F)) << 4); 172 l = ((l >> 4) & UINT32_C(0x0F0F0F0F)) | ((l & UINT32_C(0x0F0F0F0F)) << 4); 173 h = ((h >> 2) & UINT32_C(0x33333333)) | ((h & UINT32_C(0x33333333)) << 2); 174 l = ((l >> 2) & UINT32_C(0x33333333)) | ((l & UINT32_C(0x33333333)) << 2); 175 h = ((h >> 1) & UINT32_C(0x55555555)) | ((h & UINT32_C(0x55555555)) << 1); 176 l = ((l >> 1) & UINT32_C(0x55555555)) | ((l & UINT32_C(0x55555555)) << 1); 177 return ((uint64_t)l << 32) | h; /* Swap halves */ 178 179 #endif 180 } 181 182 183 /* Apply a permutation to scramble vertex numbers; a randomly generated 184 * permutation is not used because applying it at scale is too expensive. */ scramble(int64_t v0,int lgN,uint64_t val0,uint64_t val1)185 static inline int64_t scramble(int64_t v0, int lgN, uint64_t val0, uint64_t val1) 186 { 187 uint64_t v = (uint64_t)v0; 188 v += val0 + val1; 189 v *= (val0 | UINT64_C(0x4519840211493211)); 190 v = (RefGen21::bitreverse(v) >> (64 - lgN)); 191 assert ((v >> lgN) == 0); 192 v *= (val1 | UINT64_C(0x3050852102C843A5)); 193 v = (RefGen21::bitreverse(v) >> (64 - lgN)); 194 assert ((v >> lgN) == 0); 195 return (int64_t)v; 196 } 197 198 /* Make a single graph edge using a pre-set MRG state. */ make_one_edge(int64_t nverts,int level,int lgN,mrg_state * st,packed_edge * result,uint64_t val0,uint64_t val1)199 static void make_one_edge(int64_t nverts, int level, int lgN, mrg_state* st, packed_edge* result, uint64_t val0, uint64_t val1) 200 { 201 int64_t base_src = 0, base_tgt = 0; 202 while (nverts > 1) 203 { 204 int square = generate_4way_bernoulli(st, level, lgN); 205 int src_offset = square / 2; 206 int tgt_offset = square % 2; 207 assert (base_src <= base_tgt); 208 if (base_src == base_tgt) 209 { 210 /* Clip-and-flip for undirected graph */ 211 if (src_offset > tgt_offset) { 212 int temp = src_offset; 213 src_offset = tgt_offset; 214 tgt_offset = temp; 215 } 216 } 217 nverts /= 2; 218 ++level; 219 base_src += nverts * src_offset; 220 base_tgt += nverts * tgt_offset; 221 } 222 write_edge(result, 223 scramble(base_src, lgN, val0, val1), 224 scramble(base_tgt, lgN, val0, val1)); 225 } 226 MakeScrambleValues(uint64_t & val0,uint64_t & val1,const uint_fast32_t seed[])227 static inline mrg_state MakeScrambleValues(uint64_t & val0, uint64_t & val1, const uint_fast32_t seed[]) 228 { 229 mrg_state state; 230 mrg_seed(&state, seed); 231 mrg_state new_state = state; 232 mrg_skip(&new_state, 50, 7, 0); 233 val0 = mrg_get_uint_orig(&new_state); 234 val0 *= UINT64_C(0xFFFFFFFF); 235 val0 += mrg_get_uint_orig(&new_state); 236 val1 = mrg_get_uint_orig(&new_state); 237 val1 *= UINT64_C(0xFFFFFFFF); 238 val1 += mrg_get_uint_orig(&new_state); 239 return state; 240 } 241 242 /* Generate a range of edges (from start_edge to end_edge of the total graph), 243 * writing into elements [0, end_edge - start_edge) of the edges array. This 244 * code is parallel on OpenMP, it must be used with separately-implemented SPMD parallelism for MPI. 245 */ generate_kronecker_range(const uint_fast32_t seed[5],int logN,int64_t start_edge,int64_t end_edge,packed_edge * edges)246 static void generate_kronecker_range( const uint_fast32_t seed[5] /* All values in [0, 2^31 - 1), not all zero */, 247 int logN /* In base 2 */, int64_t start_edge, int64_t end_edge, packed_edge* edges) 248 { 249 int64_t nverts = (int64_t)1 << logN; 250 uint64_t val0, val1; /* Values for scrambling */ 251 mrg_state state = MakeScrambleValues(val0, val1, seed); 252 253 #ifdef _OPENMP 254 #pragma omp parallel for 255 #endif 256 for (int64_t ei = start_edge; ei < end_edge; ++ei) 257 { 258 mrg_state new_state = state; 259 mrg_skip(&new_state, 0, ei, 0); 260 make_one_edge(nverts, 0, logN, &new_state, edges + (ei - start_edge), val0, val1); 261 } 262 } compute_edge_range(int rank,int size,int64_t M,int64_t * start_idx,int64_t * end_idx)263 static inline void compute_edge_range(int rank, int size, int64_t M, int64_t* start_idx, int64_t* end_idx) 264 { 265 int64_t rankc = (int64_t)(rank); 266 int64_t sizec = (int64_t)(size); 267 *start_idx = rankc * (M / sizec) + (rankc < (M % sizec) ? rankc : (M % sizec)); 268 *end_idx = (rankc + 1) * (M / sizec) + (rankc + 1 < (M % sizec) ? rankc + 1 : (M % sizec)); 269 } 270 make_graph(int log_numverts,int64_t M,int64_t * nedges_ptr,packed_edge ** result_ptr,MPI_Comm & world)271 static inline void make_graph(int log_numverts, int64_t M, int64_t* nedges_ptr, packed_edge** result_ptr, MPI_Comm & world) 272 { 273 int rank, size; 274 #ifdef DETERMINISTIC 275 uint64_t userseed1 = 0; 276 #else 277 uint64_t userseed1 = (uint64_t) init_random(); 278 #endif 279 280 /* Spread the two 64-bit numbers into five nonzero values in the correct range. */ 281 uint_fast32_t seed[5]; 282 make_mrg_seed(userseed1, userseed1, seed); 283 284 MPI_Comm_rank(world, &rank); 285 MPI_Comm_size(world, &size); 286 287 int64_t start_idx, end_idx; 288 compute_edge_range(rank, size, M, &start_idx, &end_idx); 289 int64_t nedges = end_idx - start_idx; 290 291 packed_edge* local_edges = new packed_edge[nedges]; 292 293 double start = MPI_Wtime(); 294 generate_kronecker_range(seed, log_numverts, start_idx, end_idx, local_edges); 295 double gen_time = MPI_Wtime() - start; 296 297 *result_ptr = local_edges; 298 *nedges_ptr = nedges; 299 300 if (rank == 0) 301 { 302 fprintf(stdout, "graph_generation: %f s\n", gen_time); 303 } 304 } 305 init_random()306 static inline long init_random () 307 { 308 long seed = -1; 309 if (getenv ("SEED")) 310 { 311 errno = 0; 312 seed = strtol (getenv ("SEED"), NULL, 10); 313 if (errno) seed = -1; 314 } 315 316 if (seed < 0) seed = 0xDECAFBAD; 317 return seed; 318 } 319 }; 320 321 } 322 323 #endif 324