1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.4 -------------------------------------------------*/
4 /* date: 1/17/2014 ---------------------------------------------*/
5 /* authors: Aydin Buluc (abuluc@lbl.gov), Adam Lugowski --------*/
6 /* this file contributed by Scott Beamer of UC Berkeley --------*/
7 /****************************************************************/
8 /*
9  Copyright (c) 2010-2014, The Regents of the University of California
10 
11  Permission is hereby granted, free of charge, to any person obtaining a copy
12  of this software and associated documentation files (the "Software"), to deal
13  in the Software without restriction, including without limitation the rights
14  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15  copies of the Software, and to permit persons to whom the Software is
16  furnished to do so, subject to the following conditions:
17 
18  The above copyright notice and this permission notice shall be included in
19  all copies or substantial portions of the Software.
20 
21  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27  THE SOFTWARE.
28  */
29 
30 
31 #ifndef BITMAPFRINGE_H
32 #define BITMAPFRINGE_H
33 
34 // #include <algorithm>
35 #include "BitMap.h"
36 #include "CommGrid.h"
37 
38 namespace combblas {
39 
40 template <class IT, class VT>
41 class BitMapFringe {
42  public:
BitMapFringe(std::shared_ptr<CommGrid> grid,FullyDistSpVec<IT,VT> & x)43   BitMapFringe(std::shared_ptr<CommGrid> grid, FullyDistSpVec<IT,VT> & x) {
44     cg.reset(new CommGrid(*grid));
45 
46 	MPI_Comm World = x.getcommgrid()->GetWorld();
47 	MPI_Comm ColWorld = x.getcommgrid()->GetColWorld();
48 	MPI_Status status;
49 
50     // Find out how big local chunk will be after transpose
51     long num_local_send = x.MyLocLength(), num_local_recv;
52     diagneigh = cg->GetComplementRank();
53 	MPI_Sendrecv(&num_local_send, 1, MPI_LONG, diagneigh, TROST,
54 				 &num_local_recv, 1, MPI_LONG, diagneigh, TROST, World, &status);
55 
56     // Calculate new local displacements
57 	MPI_Comm_size(ColWorld, &colneighs);
58 	MPI_Comm_rank(ColWorld, &colrank);
59 
60   	int counts[colneighs];
61   	counts[colrank] = num_local_recv;
62 	MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, counts, 1, MPI_INT, ColWorld);
63 
64 	int dpls[colneighs];
65     dpls[0] = 0;
66   	std::partial_sum(counts, counts+colneighs-1, dpls+1);
67     long total_size = dpls[colneighs-1] + counts[colneighs-1];
68     local_offset = x.LengthUntil();
69     local_num_set = 0;
70 
71     // Compute word/byte displacements and send counts for gather
72     word_dpls = new int[colneighs];
73     byte_dpls = new int[colneighs];
74     send_counts = new int[colneighs];
75     for (int c=0; c<colneighs; c++) {
76       word_dpls[c] = (dpls[c] + (64-(dpls[c]%64)))>>6;
77       byte_dpls[c] = word_dpls[c]<<3;
78       send_counts[c] = (counts[c] - (64-(dpls[c] % 64)) + 63)>>6;
79     }
80 
81     // Compute subword displacements and transpose exchange details
82     trans_subword_disp = dpls[colrank] % 64;
83 	MPI_Sendrecv(&trans_subword_disp, 1, MPIType<int32_t>(), diagneigh, TROST,
84 				 &local_subword_disp, 1, MPIType<int32_t>(), diagneigh, TROST, World, &status);
85 
86     trans_words_send = (num_local_send + local_subword_disp + 63)>>6;
87     trans_words_recv = (num_local_recv + trans_subword_disp + 63)>>6;
88 
89     // Allocate bitmaps
90     local_bm = new BitMap(num_local_send + local_subword_disp);
91     next_bm = new BitMap(num_local_send + local_subword_disp);
92     trans_bm = new BitMap(num_local_recv + trans_subword_disp);
93     gather_bm = new BitMap(total_size);
94   }
95 
~BitMapFringe()96   ~BitMapFringe() {
97     delete local_bm;
98     delete next_bm;
99     delete trans_bm;
100     delete gather_bm;
101     delete[] send_counts;
102     delete[] word_dpls;
103     delete[] byte_dpls;
104   }
105 
LoadFromSpVec(FullyDistSpVec<IT,VT> & x)106   void LoadFromSpVec(FullyDistSpVec<IT,VT> & x) {
107     local_bm->reset();
108     for (SparseVectorLocalIterator<IT,VT> spit(x); spit.HasNext(); spit.Next())
109       local_bm->set_bit(spit.GetLocIndex() + local_subword_disp);
110     local_num_set = x.getlocnnz();
111   }
112 
LoadFromUpdates(IT * updates,long total_updates)113   void LoadFromUpdates(IT* updates, long total_updates) {
114     local_bm->reset();
115     for (long i=0; i<total_updates; i++)
116       local_bm->set_bit(updates[i] - local_offset + local_subword_disp);
117     local_num_set = total_updates;
118   }
119 
LoadFromNext()120   void LoadFromNext() {
121     local_num_set = next_num_set;
122   }
123 
SetNext(IT local_index)124   void SetNext(IT local_index) {
125     next_num_set++;
126   }
127 
128 
IncrementNumSet(int num_updates)129   void IncrementNumSet(int num_updates) {
130     next_num_set += num_updates;
131   }
132 
133 
TransposeGather()134   BitMap* TransposeGather()
135   {
136 	MPI_Comm World = cg->GetWorld();
137 	MPI_Comm ColWorld = cg->GetColWorld();
138 	MPI_Status status;
139 
140     // Transpose bitmaps
141 	MPI_Sendrecv(local_bm->data(), trans_words_send, MPIType<uint64_t>(), diagneigh, TROST,
142 				 trans_bm->data(), trans_words_recv, MPIType<uint64_t>(), diagneigh, TROST, World, &status);
143 
144     // Gather all but first words
145 #ifdef BOTTOMUPTIME
146     double t1 = MPI_Wtime();
147 #endif
148 	MPI_Allgatherv(trans_bm->data()+1, send_counts[colrank], MPIType<uint64_t>(), gather_bm->data(), send_counts, word_dpls, MPIType<uint64_t>(), ColWorld);
149 #ifdef BOTTOMUPTIME
150     double t2 = MPI_Wtime();
151     bottomup_allgather += (t2-t1);
152 #endif
153 
154     // Gather first words & add in
155     gather_bm->data()[0] = 0;
156     uint64_t firsts[colneighs];
157     firsts[colrank] = trans_bm->data()[0];
158     MPI_Allgather(MPI_IN_PLACE, 1, MPIType<uint64_t>(), firsts, 1, MPIType<uint64_t>(), ColWorld);
159     for (int c=0; c<colneighs; c++)
160       gather_bm->data()[word_dpls[c]-1] |= firsts[c];
161 
162     next_bm->reset();
163     next_num_set = 0;
164     return gather_bm;
165   }
166 
167 
UpdateSpVec(FullyDistSpVec<IT,VT> & x)168   void UpdateSpVec(FullyDistSpVec<IT,VT> & x) {
169     IT *updates = new IT[local_num_set];
170     IT bm_index=local_subword_disp, up_index=0;
171 
172     if (local_bm->get_bit(bm_index))	// if the first valid bit is 1
173       updates[up_index++] = bm_index - local_subword_disp;	// ABAB: local_subword_disp is NOT subtracted (as local_subword_disp is equal to bm_index)
174 
175     bm_index = local_bm->get_next_bit(bm_index);
176     while(bm_index != -1) {
177       updates[up_index++] = bm_index - local_subword_disp;	// ABAB: local_subword_disp is subtracted
178       bm_index = local_bm->get_next_bit(bm_index);
179     }
180     x.BulkSet(updates, local_num_set);
181     delete[] updates;
182   }
183 
184 
GetNumSet()185   IT GetNumSet() {
186 	  IT global_num_set = 0;
187 	  MPI_Allreduce(&local_num_set, &global_num_set, 1, MPIType<IT>(), MPI_SUM, cg->GetWorld());
188 	  return global_num_set;
189 	}
190 
191 
GetSubWordDisp()192 	int GetSubWordDisp() {
193     return local_subword_disp;
194 	}
195 
196 
AccessBM()197   BitMap* AccessBM() {
198     return local_bm;
199   }
200  private:
201  	std::shared_ptr<CommGrid> cg;
202   BitMap* local_bm;
203   BitMap* next_bm;
204   BitMap* trans_bm;
205   BitMap* gather_bm;
206   int* send_counts;
207   int* byte_dpls;
208   int* word_dpls;
209   int colneighs;
210   int colrank;
211   int diagneigh;
212   IT local_offset;
213   IT local_num_set;
214   IT next_num_set;
215   int local_subword_disp;
216   int trans_subword_disp;
217   long trans_words_send;
218   long trans_words_recv;
219 };
220 
221 }
222 
223 #endif // BITMAPFRINGE_H
224