1 /****************************************************************/ 2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */ 3 /* version 1.4 -------------------------------------------------*/ 4 /* date: 1/17/2014 ---------------------------------------------*/ 5 /* authors: Aydin Buluc (abuluc@lbl.gov), Adam Lugowski --------*/ 6 /* this file contributed by Scott Beamer of UC Berkeley --------*/ 7 /****************************************************************/ 8 /* 9 Copyright (c) 2010-2014, The Regents of the University of California 10 11 Permission is hereby granted, free of charge, to any person obtaining a copy 12 of this software and associated documentation files (the "Software"), to deal 13 in the Software without restriction, including without limitation the rights 14 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 15 copies of the Software, and to permit persons to whom the Software is 16 furnished to do so, subject to the following conditions: 17 18 The above copyright notice and this permission notice shall be included in 19 all copies or substantial portions of the Software. 20 21 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 22 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 23 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 24 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 25 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 26 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 27 THE SOFTWARE. 28 */ 29 30 31 #ifndef BITMAPFRINGE_H 32 #define BITMAPFRINGE_H 33 34 // #include <algorithm> 35 #include "BitMap.h" 36 #include "CommGrid.h" 37 38 namespace combblas { 39 40 template <class IT, class VT> 41 class BitMapFringe { 42 public: BitMapFringe(std::shared_ptr<CommGrid> grid,FullyDistSpVec<IT,VT> & x)43 BitMapFringe(std::shared_ptr<CommGrid> grid, FullyDistSpVec<IT,VT> & x) { 44 cg.reset(new CommGrid(*grid)); 45 46 MPI_Comm World = x.getcommgrid()->GetWorld(); 47 MPI_Comm ColWorld = x.getcommgrid()->GetColWorld(); 48 MPI_Status status; 49 50 // Find out how big local chunk will be after transpose 51 long num_local_send = x.MyLocLength(), num_local_recv; 52 diagneigh = cg->GetComplementRank(); 53 MPI_Sendrecv(&num_local_send, 1, MPI_LONG, diagneigh, TROST, 54 &num_local_recv, 1, MPI_LONG, diagneigh, TROST, World, &status); 55 56 // Calculate new local displacements 57 MPI_Comm_size(ColWorld, &colneighs); 58 MPI_Comm_rank(ColWorld, &colrank); 59 60 int counts[colneighs]; 61 counts[colrank] = num_local_recv; 62 MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, counts, 1, MPI_INT, ColWorld); 63 64 int dpls[colneighs]; 65 dpls[0] = 0; 66 std::partial_sum(counts, counts+colneighs-1, dpls+1); 67 long total_size = dpls[colneighs-1] + counts[colneighs-1]; 68 local_offset = x.LengthUntil(); 69 local_num_set = 0; 70 71 // Compute word/byte displacements and send counts for gather 72 word_dpls = new int[colneighs]; 73 byte_dpls = new int[colneighs]; 74 send_counts = new int[colneighs]; 75 for (int c=0; c<colneighs; c++) { 76 word_dpls[c] = (dpls[c] + (64-(dpls[c]%64)))>>6; 77 byte_dpls[c] = word_dpls[c]<<3; 78 send_counts[c] = (counts[c] - (64-(dpls[c] % 64)) + 63)>>6; 79 } 80 81 // Compute subword displacements and transpose exchange details 82 trans_subword_disp = dpls[colrank] % 64; 83 MPI_Sendrecv(&trans_subword_disp, 1, MPIType<int32_t>(), diagneigh, TROST, 84 &local_subword_disp, 1, MPIType<int32_t>(), diagneigh, TROST, World, &status); 85 86 trans_words_send = (num_local_send + local_subword_disp + 63)>>6; 87 trans_words_recv = (num_local_recv + trans_subword_disp + 63)>>6; 88 89 // Allocate bitmaps 90 local_bm = new BitMap(num_local_send + local_subword_disp); 91 next_bm = new BitMap(num_local_send + local_subword_disp); 92 trans_bm = new BitMap(num_local_recv + trans_subword_disp); 93 gather_bm = new BitMap(total_size); 94 } 95 ~BitMapFringe()96 ~BitMapFringe() { 97 delete local_bm; 98 delete next_bm; 99 delete trans_bm; 100 delete gather_bm; 101 delete[] send_counts; 102 delete[] word_dpls; 103 delete[] byte_dpls; 104 } 105 LoadFromSpVec(FullyDistSpVec<IT,VT> & x)106 void LoadFromSpVec(FullyDistSpVec<IT,VT> & x) { 107 local_bm->reset(); 108 for (SparseVectorLocalIterator<IT,VT> spit(x); spit.HasNext(); spit.Next()) 109 local_bm->set_bit(spit.GetLocIndex() + local_subword_disp); 110 local_num_set = x.getlocnnz(); 111 } 112 LoadFromUpdates(IT * updates,long total_updates)113 void LoadFromUpdates(IT* updates, long total_updates) { 114 local_bm->reset(); 115 for (long i=0; i<total_updates; i++) 116 local_bm->set_bit(updates[i] - local_offset + local_subword_disp); 117 local_num_set = total_updates; 118 } 119 LoadFromNext()120 void LoadFromNext() { 121 local_num_set = next_num_set; 122 } 123 SetNext(IT local_index)124 void SetNext(IT local_index) { 125 next_num_set++; 126 } 127 128 IncrementNumSet(int num_updates)129 void IncrementNumSet(int num_updates) { 130 next_num_set += num_updates; 131 } 132 133 TransposeGather()134 BitMap* TransposeGather() 135 { 136 MPI_Comm World = cg->GetWorld(); 137 MPI_Comm ColWorld = cg->GetColWorld(); 138 MPI_Status status; 139 140 // Transpose bitmaps 141 MPI_Sendrecv(local_bm->data(), trans_words_send, MPIType<uint64_t>(), diagneigh, TROST, 142 trans_bm->data(), trans_words_recv, MPIType<uint64_t>(), diagneigh, TROST, World, &status); 143 144 // Gather all but first words 145 #ifdef BOTTOMUPTIME 146 double t1 = MPI_Wtime(); 147 #endif 148 MPI_Allgatherv(trans_bm->data()+1, send_counts[colrank], MPIType<uint64_t>(), gather_bm->data(), send_counts, word_dpls, MPIType<uint64_t>(), ColWorld); 149 #ifdef BOTTOMUPTIME 150 double t2 = MPI_Wtime(); 151 bottomup_allgather += (t2-t1); 152 #endif 153 154 // Gather first words & add in 155 gather_bm->data()[0] = 0; 156 uint64_t firsts[colneighs]; 157 firsts[colrank] = trans_bm->data()[0]; 158 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<uint64_t>(), firsts, 1, MPIType<uint64_t>(), ColWorld); 159 for (int c=0; c<colneighs; c++) 160 gather_bm->data()[word_dpls[c]-1] |= firsts[c]; 161 162 next_bm->reset(); 163 next_num_set = 0; 164 return gather_bm; 165 } 166 167 UpdateSpVec(FullyDistSpVec<IT,VT> & x)168 void UpdateSpVec(FullyDistSpVec<IT,VT> & x) { 169 IT *updates = new IT[local_num_set]; 170 IT bm_index=local_subword_disp, up_index=0; 171 172 if (local_bm->get_bit(bm_index)) // if the first valid bit is 1 173 updates[up_index++] = bm_index - local_subword_disp; // ABAB: local_subword_disp is NOT subtracted (as local_subword_disp is equal to bm_index) 174 175 bm_index = local_bm->get_next_bit(bm_index); 176 while(bm_index != -1) { 177 updates[up_index++] = bm_index - local_subword_disp; // ABAB: local_subword_disp is subtracted 178 bm_index = local_bm->get_next_bit(bm_index); 179 } 180 x.BulkSet(updates, local_num_set); 181 delete[] updates; 182 } 183 184 GetNumSet()185 IT GetNumSet() { 186 IT global_num_set = 0; 187 MPI_Allreduce(&local_num_set, &global_num_set, 1, MPIType<IT>(), MPI_SUM, cg->GetWorld()); 188 return global_num_set; 189 } 190 191 GetSubWordDisp()192 int GetSubWordDisp() { 193 return local_subword_disp; 194 } 195 196 AccessBM()197 BitMap* AccessBM() { 198 return local_bm; 199 } 200 private: 201 std::shared_ptr<CommGrid> cg; 202 BitMap* local_bm; 203 BitMap* next_bm; 204 BitMap* trans_bm; 205 BitMap* gather_bm; 206 int* send_counts; 207 int* byte_dpls; 208 int* word_dpls; 209 int colneighs; 210 int colrank; 211 int diagneigh; 212 IT local_offset; 213 IT local_num_set; 214 IT next_num_set; 215 int local_subword_disp; 216 int trans_subword_disp; 217 long trans_words_send; 218 long trans_words_recv; 219 }; 220 221 } 222 223 #endif // BITMAPFRINGE_H 224