1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc  --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 #ifndef _FULLY_DIST_H
31 #define _FULLY_DIST_H
32 
33 #include <iostream>
34 #include <algorithm>
35 #include "myenableif.h"
36 
37 namespace combblas {
38 
39 template <class IT, class NT, class Enable=void>
40 class FullyDist
41 {};     // dummy generic template
42 
43 
44 /**
45   * The full distribution is actually a two-level distribution that matches the matrix distribution
46   * In this scheme, each processor row (except the last) is responsible for t = floor(n/sqrt(p)) elements.
47   * The last processor row gets the remaining (n-floor(n/sqrt(p))*(sqrt(p)-1)) elements
48   * Within the processor row, each processor (except the last) is responsible for loc = floor(t/sqrt(p)) elements.
49   * Example: n=103 and p=16
50   * All processors P_ij for i=0,1,2 and j=0,1,2 get floor(floor(102/4)/4) = 6 elements
51   * All processors P_i3 for i=0,1,2 get 25-6*3 = 7 elements
52   * All processors P_3j for j=0,1,2 get (102-25*3)/4 = 6 elements
53   * Processor P_33 gets 27-6*3 = 9 elements
54   * Both derived classes, whether sparse or dense, are distributed
55   * to processors based on their "length", so that a conversion does not
56   * need any communication between sparse and dense formats
57  **/
58 template <class IT, class NT>
59 class FullyDist<IT, NT, typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type >
60 {
61 public:
FullyDist()62 	explicit FullyDist():glen(0)
63 	{
64         SpParHelper::Print("COMBBLAS Warning: It is dangerous to create (vector) objects without specifying the communicator, are you sure you want to create this object in MPI_COMM_WORLD?\n");
65 		commGrid.reset(new CommGrid(MPI_COMM_WORLD, 0, 0));
66 	}
FullyDist(IT globallen)67 	explicit FullyDist(IT globallen): glen(globallen)
68 	{
69         SpParHelper::Print("COMBBLAS Warning: It is dangerous to create (vector) objects without specifying the communicator, are you sure you want to create this object in MPI_COMM_WORLD?\n");
70 		commGrid.reset(new CommGrid(MPI_COMM_WORLD, 0, 0));
71 	}
72     	/* ABAB: This clashes with FullyDist(IT globallen) signature on MPICH based systems that #define MPI_Comm to be an INT
73 	FullyDist( MPI_Comm world):glen(0)
74     	{
75         	commGrid.reset(new CommGrid(world, 0, 0));
76     	}*/
FullyDist(std::shared_ptr<CommGrid> grid)77 	FullyDist( std::shared_ptr<CommGrid> grid):glen(0)
78 	{
79 		commGrid = grid;
80 	}
FullyDist(std::shared_ptr<CommGrid> grid,IT globallen)81 	FullyDist( std::shared_ptr<CommGrid> grid, IT globallen): glen(globallen)
82 	{
83 		commGrid = grid;
84 	}
85 	FullyDist<IT,NT> & operator=(const FullyDist<IT,NT> & rhs)
86 	{
87 		glen = rhs.glen;
88 		commGrid = rhs.commGrid;
89 		return *this;
90 	}
91 
92 	IT LengthUntil() const;
93 	IT RowLenUntil() const;
94 	IT RowLenUntil(int k) const;
95 	IT MyLocLength() const;
96 	IT MyRowLength() const;
TotalLength()97 	IT TotalLength() const { return glen; }
98 	int Owner(IT gind, IT & lind) const;
99 	int OwnerWithinRow(IT n_thisrow, IT ind_withinrow, IT & lind) const;
100 
101 protected:
102 	std::shared_ptr<CommGrid> commGrid;
103 	IT glen;		// global length (actual "length" including zeros)
104 };
105 
106 
107 //! Given global index gind,
108 //! Return the owner processor id, and
109 //! Assign the local index to lind
110 template <class IT, class NT>
111 int FullyDist<IT,NT, typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>
Owner(IT gind,IT & lind)112 ::Owner(IT gind, IT & lind) const
113 {
114 	// C++ implicitly upcasts both operands to 64-bit if one is 64-bit and other is 32-bit
115 	int procrows = commGrid->GetGridRows();
116 	IT n_perprocrow = glen / procrows;	// length on a typical processor row
117 	IT n_thisrow;		// length assigned to owner's processor row
118 	int own_procrow;	// owner's processor row
119 	if(n_perprocrow != 0)
120 	{
121 		own_procrow = std::min(static_cast<int>(gind / n_perprocrow), procrows-1);	// owner's processor row
122 	}
123 	else	// all owned by the last processor row
124 	{
125 		own_procrow = procrows -1;
126 	}
127 
128 	IT ind_withinrow = gind - (own_procrow * n_perprocrow);
129 	if(own_procrow == procrows-1)
130 		n_thisrow = glen - (n_perprocrow*(procrows-1));
131 	else
132 		n_thisrow = n_perprocrow;
133 
134 	int proccols = commGrid->GetGridCols();
135 	IT n_perproc = n_thisrow / proccols;	// length on a typical processor
136 
137 	int own_proccol;
138 	if(n_perproc != 0)
139 	{
140 		own_proccol = std::min(static_cast<int>(ind_withinrow / n_perproc), proccols-1);
141 	}
142 	else
143 	{
144 		own_proccol = proccols-1;
145 	}
146 	lind = ind_withinrow - (own_proccol * n_perproc);
147 
148 	// GetRank(int rowrank, int colrank) { return rowrank * grcols + colrank;}
149 	return commGrid->GetRank(own_procrow, own_proccol);
150 }
151 
152 /**
153  * @param[in] ind_withinrow {index within processor row}
154  * @param[in] n_thisrow {length within this row}
155  * @param[out] lind {index local to owning processor}
156  * Return the owner processor id (within processor row)
157  **/
158 template <class IT, class NT>
159 int FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type >
OwnerWithinRow(IT n_thisrow,IT ind_withinrow,IT & lind)160 ::OwnerWithinRow(IT n_thisrow, IT ind_withinrow, IT & lind) const
161 {
162 	int proccols = commGrid->GetGridCols();
163 	IT n_perproc = n_thisrow / proccols;	// length on a typical processor
164 
165 	int own_proccol;
166 	if(n_perproc != 0)
167 	{
168 		own_proccol = std::min(static_cast<int>(ind_withinrow / n_perproc), proccols-1);
169 	}
170 	else
171 	{
172 		own_proccol = proccols-1;
173 	}
174 	lind = ind_withinrow - (own_proccol * n_perproc);
175 
176 	return own_proccol;
177 }
178 
179 template <class IT, class NT>
180 IT FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type >
LengthUntil()181 ::LengthUntil() const
182 {
183 	int procrows = commGrid->GetGridRows();
184 	int my_procrow = commGrid->GetRankInProcCol();
185 	IT n_perprocrow = glen / procrows;	// length on a typical processor row
186 	IT n_thisrow;	// length assigned to this processor row
187 	if(my_procrow == procrows-1)
188 		n_thisrow = glen - (n_perprocrow*(procrows-1));
189 	else
190 		n_thisrow = n_perprocrow;
191 
192 	int proccols = commGrid->GetGridCols();
193 	int my_proccol = commGrid->GetRankInProcRow();
194 	IT n_perproc = n_thisrow / proccols;	// length on a typical processor
195 	return ((n_perprocrow * my_procrow)+(n_perproc*my_proccol));
196 }
197 
198 // Return the length until this processor, within this processor row only
199 template <class IT, class NT>
200 IT FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type >
RowLenUntil()201 ::RowLenUntil() const
202 {
203 	int procrows = commGrid->GetGridRows();
204 	int my_procrow = commGrid->GetRankInProcCol();
205 	IT n_perprocrow = glen / procrows;	// length on a typical processor row
206 	IT n_thisrow;	// length assigned to this processor row
207 	if(my_procrow == procrows-1)
208 		n_thisrow = glen - (n_perprocrow*(procrows-1));
209 	else
210 		n_thisrow = n_perprocrow;
211 
212 	int proccols = commGrid->GetGridCols();
213 	int my_proccol = commGrid->GetRankInProcRow();
214 	IT n_perproc = n_thisrow / proccols;	// length on a typical processor
215 	return (n_perproc*my_proccol);
216 }
217 
218 // Return the length until the kth processor, within this processor row only
219 template <class IT, class NT>
220 IT FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type >
RowLenUntil(int k)221 ::RowLenUntil(int k) const
222 {
223 	int procrows = commGrid->GetGridRows();
224 	int my_procrow = commGrid->GetRankInProcCol();
225 	IT n_perprocrow = glen / procrows;	// length on a typical processor row
226 	IT n_thisrow;	// length assigned to this processor row
227 	if(my_procrow == procrows-1)
228 		n_thisrow = glen - (n_perprocrow*(procrows-1));
229 	else
230 		n_thisrow = n_perprocrow;
231 
232 	int proccols = commGrid->GetGridCols();
233 	IT n_perproc = n_thisrow / proccols;	// length on a typical processor
234 	assert(k < proccols);
235 	return (n_perproc*k);
236 }
237 
238 template <class IT, class NT>
239 IT FullyDist<IT,NT, typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>
MyLocLength()240 ::MyLocLength() const
241 {
242 	int procrows = commGrid->GetGridRows();
243 	int my_procrow = commGrid->GetRankInProcCol();
244 	IT n_perprocrow = glen / procrows;	// length on a typical processor row
245 	IT n_thisrow;	// length assigned to this processor row
246 	if(my_procrow == procrows-1)
247 		n_thisrow = glen - (n_perprocrow*(procrows-1));
248 	else
249 		n_thisrow = n_perprocrow;
250 
251 	int proccols = commGrid->GetGridCols();
252 	int my_proccol = commGrid->GetRankInProcRow();
253 	IT n_perproc = n_thisrow / proccols;	// length on a typical processor
254 	if(my_proccol == proccols-1)
255 		return (n_thisrow - (n_perproc*(proccols-1)));
256 	else
257 		return n_perproc;
258 }
259 
260 
261 template <class IT, class NT>
262 IT FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>
MyRowLength()263 ::MyRowLength() const
264 {
265 	int procrows = commGrid->GetGridRows();
266 	int my_procrow = commGrid->GetRankInProcCol();
267 	IT n_perprocrow = glen / procrows;	// length on a typical processor row
268 	IT n_thisrow;	// length assigned to this processor row
269 	if(my_procrow == procrows-1)
270 		n_thisrow = glen - (n_perprocrow*(procrows-1));
271 	else
272 		n_thisrow = n_perprocrow;
273 
274 	return n_thisrow;
275 }
276 
277 }
278 
279 #endif
280