1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc --------------------------*/
6 /****************************************************************/
7 /*
8 Copyright (c) 2010-2017, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29
30 #ifndef _FULLY_DIST_H
31 #define _FULLY_DIST_H
32
33 #include <iostream>
34 #include <algorithm>
35 #include "myenableif.h"
36
37 namespace combblas {
38
39 template <class IT, class NT, class Enable=void>
40 class FullyDist
41 {}; // dummy generic template
42
43
44 /**
45 * The full distribution is actually a two-level distribution that matches the matrix distribution
46 * In this scheme, each processor row (except the last) is responsible for t = floor(n/sqrt(p)) elements.
47 * The last processor row gets the remaining (n-floor(n/sqrt(p))*(sqrt(p)-1)) elements
48 * Within the processor row, each processor (except the last) is responsible for loc = floor(t/sqrt(p)) elements.
49 * Example: n=103 and p=16
50 * All processors P_ij for i=0,1,2 and j=0,1,2 get floor(floor(102/4)/4) = 6 elements
51 * All processors P_i3 for i=0,1,2 get 25-6*3 = 7 elements
52 * All processors P_3j for j=0,1,2 get (102-25*3)/4 = 6 elements
53 * Processor P_33 gets 27-6*3 = 9 elements
54 * Both derived classes, whether sparse or dense, are distributed
55 * to processors based on their "length", so that a conversion does not
56 * need any communication between sparse and dense formats
57 **/
58 template <class IT, class NT>
59 class FullyDist<IT, NT, typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type >
60 {
61 public:
FullyDist()62 explicit FullyDist():glen(0)
63 {
64 SpParHelper::Print("COMBBLAS Warning: It is dangerous to create (vector) objects without specifying the communicator, are you sure you want to create this object in MPI_COMM_WORLD?\n");
65 commGrid.reset(new CommGrid(MPI_COMM_WORLD, 0, 0));
66 }
FullyDist(IT globallen)67 explicit FullyDist(IT globallen): glen(globallen)
68 {
69 SpParHelper::Print("COMBBLAS Warning: It is dangerous to create (vector) objects without specifying the communicator, are you sure you want to create this object in MPI_COMM_WORLD?\n");
70 commGrid.reset(new CommGrid(MPI_COMM_WORLD, 0, 0));
71 }
72 /* ABAB: This clashes with FullyDist(IT globallen) signature on MPICH based systems that #define MPI_Comm to be an INT
73 FullyDist( MPI_Comm world):glen(0)
74 {
75 commGrid.reset(new CommGrid(world, 0, 0));
76 }*/
FullyDist(std::shared_ptr<CommGrid> grid)77 FullyDist( std::shared_ptr<CommGrid> grid):glen(0)
78 {
79 commGrid = grid;
80 }
FullyDist(std::shared_ptr<CommGrid> grid,IT globallen)81 FullyDist( std::shared_ptr<CommGrid> grid, IT globallen): glen(globallen)
82 {
83 commGrid = grid;
84 }
85 FullyDist<IT,NT> & operator=(const FullyDist<IT,NT> & rhs)
86 {
87 glen = rhs.glen;
88 commGrid = rhs.commGrid;
89 return *this;
90 }
91
92 IT LengthUntil() const;
93 IT RowLenUntil() const;
94 IT RowLenUntil(int k) const;
95 IT MyLocLength() const;
96 IT MyRowLength() const;
TotalLength()97 IT TotalLength() const { return glen; }
98 int Owner(IT gind, IT & lind) const;
99 int OwnerWithinRow(IT n_thisrow, IT ind_withinrow, IT & lind) const;
100
101 protected:
102 std::shared_ptr<CommGrid> commGrid;
103 IT glen; // global length (actual "length" including zeros)
104 };
105
106
107 //! Given global index gind,
108 //! Return the owner processor id, and
109 //! Assign the local index to lind
110 template <class IT, class NT>
111 int FullyDist<IT,NT, typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>
Owner(IT gind,IT & lind)112 ::Owner(IT gind, IT & lind) const
113 {
114 // C++ implicitly upcasts both operands to 64-bit if one is 64-bit and other is 32-bit
115 int procrows = commGrid->GetGridRows();
116 IT n_perprocrow = glen / procrows; // length on a typical processor row
117 IT n_thisrow; // length assigned to owner's processor row
118 int own_procrow; // owner's processor row
119 if(n_perprocrow != 0)
120 {
121 own_procrow = std::min(static_cast<int>(gind / n_perprocrow), procrows-1); // owner's processor row
122 }
123 else // all owned by the last processor row
124 {
125 own_procrow = procrows -1;
126 }
127
128 IT ind_withinrow = gind - (own_procrow * n_perprocrow);
129 if(own_procrow == procrows-1)
130 n_thisrow = glen - (n_perprocrow*(procrows-1));
131 else
132 n_thisrow = n_perprocrow;
133
134 int proccols = commGrid->GetGridCols();
135 IT n_perproc = n_thisrow / proccols; // length on a typical processor
136
137 int own_proccol;
138 if(n_perproc != 0)
139 {
140 own_proccol = std::min(static_cast<int>(ind_withinrow / n_perproc), proccols-1);
141 }
142 else
143 {
144 own_proccol = proccols-1;
145 }
146 lind = ind_withinrow - (own_proccol * n_perproc);
147
148 // GetRank(int rowrank, int colrank) { return rowrank * grcols + colrank;}
149 return commGrid->GetRank(own_procrow, own_proccol);
150 }
151
152 /**
153 * @param[in] ind_withinrow {index within processor row}
154 * @param[in] n_thisrow {length within this row}
155 * @param[out] lind {index local to owning processor}
156 * Return the owner processor id (within processor row)
157 **/
158 template <class IT, class NT>
159 int FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type >
OwnerWithinRow(IT n_thisrow,IT ind_withinrow,IT & lind)160 ::OwnerWithinRow(IT n_thisrow, IT ind_withinrow, IT & lind) const
161 {
162 int proccols = commGrid->GetGridCols();
163 IT n_perproc = n_thisrow / proccols; // length on a typical processor
164
165 int own_proccol;
166 if(n_perproc != 0)
167 {
168 own_proccol = std::min(static_cast<int>(ind_withinrow / n_perproc), proccols-1);
169 }
170 else
171 {
172 own_proccol = proccols-1;
173 }
174 lind = ind_withinrow - (own_proccol * n_perproc);
175
176 return own_proccol;
177 }
178
179 template <class IT, class NT>
180 IT FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type >
LengthUntil()181 ::LengthUntil() const
182 {
183 int procrows = commGrid->GetGridRows();
184 int my_procrow = commGrid->GetRankInProcCol();
185 IT n_perprocrow = glen / procrows; // length on a typical processor row
186 IT n_thisrow; // length assigned to this processor row
187 if(my_procrow == procrows-1)
188 n_thisrow = glen - (n_perprocrow*(procrows-1));
189 else
190 n_thisrow = n_perprocrow;
191
192 int proccols = commGrid->GetGridCols();
193 int my_proccol = commGrid->GetRankInProcRow();
194 IT n_perproc = n_thisrow / proccols; // length on a typical processor
195 return ((n_perprocrow * my_procrow)+(n_perproc*my_proccol));
196 }
197
198 // Return the length until this processor, within this processor row only
199 template <class IT, class NT>
200 IT FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type >
RowLenUntil()201 ::RowLenUntil() const
202 {
203 int procrows = commGrid->GetGridRows();
204 int my_procrow = commGrid->GetRankInProcCol();
205 IT n_perprocrow = glen / procrows; // length on a typical processor row
206 IT n_thisrow; // length assigned to this processor row
207 if(my_procrow == procrows-1)
208 n_thisrow = glen - (n_perprocrow*(procrows-1));
209 else
210 n_thisrow = n_perprocrow;
211
212 int proccols = commGrid->GetGridCols();
213 int my_proccol = commGrid->GetRankInProcRow();
214 IT n_perproc = n_thisrow / proccols; // length on a typical processor
215 return (n_perproc*my_proccol);
216 }
217
218 // Return the length until the kth processor, within this processor row only
219 template <class IT, class NT>
220 IT FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type >
RowLenUntil(int k)221 ::RowLenUntil(int k) const
222 {
223 int procrows = commGrid->GetGridRows();
224 int my_procrow = commGrid->GetRankInProcCol();
225 IT n_perprocrow = glen / procrows; // length on a typical processor row
226 IT n_thisrow; // length assigned to this processor row
227 if(my_procrow == procrows-1)
228 n_thisrow = glen - (n_perprocrow*(procrows-1));
229 else
230 n_thisrow = n_perprocrow;
231
232 int proccols = commGrid->GetGridCols();
233 IT n_perproc = n_thisrow / proccols; // length on a typical processor
234 assert(k < proccols);
235 return (n_perproc*k);
236 }
237
238 template <class IT, class NT>
239 IT FullyDist<IT,NT, typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>
MyLocLength()240 ::MyLocLength() const
241 {
242 int procrows = commGrid->GetGridRows();
243 int my_procrow = commGrid->GetRankInProcCol();
244 IT n_perprocrow = glen / procrows; // length on a typical processor row
245 IT n_thisrow; // length assigned to this processor row
246 if(my_procrow == procrows-1)
247 n_thisrow = glen - (n_perprocrow*(procrows-1));
248 else
249 n_thisrow = n_perprocrow;
250
251 int proccols = commGrid->GetGridCols();
252 int my_proccol = commGrid->GetRankInProcRow();
253 IT n_perproc = n_thisrow / proccols; // length on a typical processor
254 if(my_proccol == proccols-1)
255 return (n_thisrow - (n_perproc*(proccols-1)));
256 else
257 return n_perproc;
258 }
259
260
261 template <class IT, class NT>
262 IT FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>
MyRowLength()263 ::MyRowLength() const
264 {
265 int procrows = commGrid->GetGridRows();
266 int my_procrow = commGrid->GetRankInProcCol();
267 IT n_perprocrow = glen / procrows; // length on a typical processor row
268 IT n_thisrow; // length assigned to this processor row
269 if(my_procrow == procrows-1)
270 n_thisrow = glen - (n_perprocrow*(procrows-1));
271 else
272 n_thisrow = n_perprocrow;
273
274 return n_thisrow;
275 }
276
277 }
278
279 #endif
280