1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc --------------------------*/
6 /****************************************************************/
7 /*
8 Copyright (c) 2010-2017, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29 #include "FullyDistVec.h"
30 #include "FullyDistSpVec.h"
31 #include "Operations.h"
32
33 namespace combblas {
34
35 template <class IT, class NT>
FullyDistVec()36 FullyDistVec<IT, NT>::FullyDistVec ()
37 : FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>()
38 {
39 }
40
41 template <class IT, class NT>
FullyDistVec(IT globallen,NT initval)42 FullyDistVec<IT, NT>::FullyDistVec (IT globallen, NT initval)
43 :FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>(globallen)
44 {
45 arr.resize(MyLocLength(), initval);
46 }
47
48
49 template <class IT, class NT>
FullyDistVec(std::shared_ptr<CommGrid> grid)50 FullyDistVec<IT, NT>::FullyDistVec ( std::shared_ptr<CommGrid> grid)
51 : FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>(grid)
52 { }
53
54 template <class IT, class NT>
FullyDistVec(std::shared_ptr<CommGrid> grid,IT globallen,NT initval)55 FullyDistVec<IT, NT>::FullyDistVec ( std::shared_ptr<CommGrid> grid, IT globallen, NT initval)
56 : FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>(grid,globallen)
57 {
58 arr.resize(MyLocLength(), initval);
59 }
60
61 template <class IT, class NT>
FullyDistVec(const FullyDistSpVec<IT,NT> & rhs)62 FullyDistVec<IT,NT>::FullyDistVec (const FullyDistSpVec<IT,NT> & rhs) // Conversion copy-constructor
63 : FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>(rhs.commGrid,rhs.glen)
64 {
65 *this = rhs;
66 }
67
68
69 template <class IT, class NT>
70 template <class ITRHS, class NTRHS>
FullyDistVec(const FullyDistVec<ITRHS,NTRHS> & rhs)71 FullyDistVec<IT, NT>::FullyDistVec ( const FullyDistVec<ITRHS, NTRHS>& rhs )
72 : FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>(rhs.commGrid, static_cast<IT>(rhs.glen))
73 {
74 arr.resize(static_cast<IT>(rhs.arr.size()), NT());
75
76 for(IT i=0; (unsigned)i < arr.size(); ++i)
77 {
78 arr[i] = static_cast<NT>(rhs.arr[static_cast<ITRHS>(i)]);
79 }
80 }
81
82 /**
83 * Initialize a FullyDistVec with a separate vector from each processor
84 * Optimizes for the common case where all fillarr's in separate processors are of the same size
85 */
86 template <class IT, class NT>
FullyDistVec(const std::vector<NT> & fillarr,std::shared_ptr<CommGrid> grid)87 FullyDistVec<IT, NT>::FullyDistVec ( const std::vector<NT> & fillarr, std::shared_ptr<CommGrid> grid )
88 : FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>(grid)
89 {
90 MPI_Comm World = commGrid->GetWorld();
91 int nprocs = commGrid->GetSize();
92 int rank = commGrid->GetRank();
93
94 IT * sizes = new IT[nprocs];
95 IT nsize = fillarr.size();
96 sizes[rank] = nsize;
97 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), sizes, 1, MPIType<IT>(), World);
98 glen = std::accumulate(sizes, sizes+nprocs, static_cast<IT>(0));
99
100 std::vector<IT> uniq_sizes;
101 std::unique_copy(sizes, sizes+nprocs, std::back_inserter(uniq_sizes));
102 if(uniq_sizes.size() == 1)
103 {
104 arr = fillarr;
105 }
106 else
107 {
108 IT lengthuntil = std::accumulate(sizes, sizes+rank, static_cast<IT>(0));
109
110 // Although the found vector is not reshuffled yet, its glen and commGrid are set
111 // We can call the Owner/MyLocLength/LengthUntil functions (to infer future distribution)
112
113 // rebalance/redistribute
114 int * sendcnt = new int[nprocs];
115 std::fill(sendcnt, sendcnt+nprocs, 0);
116 for(IT i=0; i<nsize; ++i)
117 {
118 IT locind;
119 int owner = Owner(i+lengthuntil, locind);
120 ++sendcnt[owner];
121 }
122 int * recvcnt = new int[nprocs];
123 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World); // share the counts
124
125 int * sdispls = new int[nprocs];
126 int * rdispls = new int[nprocs];
127 sdispls[0] = 0;
128 rdispls[0] = 0;
129 for(int i=0; i<nprocs-1; ++i)
130 {
131 sdispls[i+1] = sdispls[i] + sendcnt[i];
132 rdispls[i+1] = rdispls[i] + recvcnt[i];
133 }
134 IT totrecv = std::accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
135 std::vector<IT> recvbuf(totrecv);
136
137 // data is already in the right order in found.arr
138 MPI_Alltoallv(&(arr[0]), sendcnt, sdispls, MPIType<IT>(), &(recvbuf[0]), recvcnt, rdispls, MPIType<IT>(), World);
139 arr.swap(recvbuf);
140 DeleteAll(sendcnt, recvcnt, sdispls, rdispls);
141 }
142 delete [] sizes;
143
144 }
145
146
147 template <class IT, class NT>
MinElement() const148 std::pair<IT, NT> FullyDistVec<IT,NT>::MinElement() const
149 {
150
151
152 auto it = min_element(arr.begin(), arr.end());
153 NT localMin = *it;
154 NT globalMin;
155 MPI_Allreduce( &localMin, &globalMin, 1, MPIType<NT>(), MPI_MIN, commGrid->GetWorld());
156
157 IT localMinIdx = TotalLength();
158 if(globalMin==localMin)
159 {
160 localMinIdx = distance(arr.begin(), it) + LengthUntil();
161 }
162 IT globalMinIdx;
163 MPI_Allreduce( &localMinIdx, &globalMinIdx, 1, MPIType<IT>(), MPI_MIN, commGrid->GetWorld()); // it can be MPI_MAX or anything
164
165 return std::make_pair(globalMinIdx, globalMin);
166 }
167
168
169 template <class IT, class NT>
170 template <typename _BinaryOperation>
Reduce(_BinaryOperation __binary_op,NT identity) const171 NT FullyDistVec<IT,NT>::Reduce(_BinaryOperation __binary_op, NT identity) const
172 {
173 // std::accumulate returns identity for empty sequences
174 NT localsum = std::accumulate( arr.begin(), arr.end(), identity, __binary_op);
175
176 NT totalsum = identity;
177 MPI_Allreduce( &localsum, &totalsum, 1, MPIType<NT>(), MPIOp<_BinaryOperation, NT>::op(), commGrid->GetWorld());
178 return totalsum;
179 }
180
181 template <class IT, class NT>
182 template <typename OUT, typename _BinaryOperation, typename _UnaryOperation>
183 OUT FullyDistVec<IT,NT>::Reduce(_BinaryOperation __binary_op, OUT default_val, _UnaryOperation __unary_op) const
184 {
185 // std::accumulate returns identity for empty sequences
186 OUT localsum = default_val;
187
188 if (arr.size() > 0)
189 {
190 typename std::vector< NT >::const_iterator iter = arr.begin();
191 //localsum = __unary_op(*iter);
192 //iter++;
193 while (iter < arr.end())
194 {
195 localsum = __binary_op(localsum, __unary_op(*iter));
196 iter++;
197 }
198 }
199
200 OUT totalsum = default_val;
201 MPI_Allreduce( &localsum, &totalsum, 1, MPIType<OUT>(), MPIOp<_BinaryOperation, OUT>::op(), commGrid->GetWorld());
202 return totalsum;
203 }
204
205
206 //! ABAB: Put concept check, NT should be integer for this to make sense
207 template<class IT, class NT>
SelectCandidates(double nver)208 void FullyDistVec<IT,NT>::SelectCandidates(double nver)
209 {
210 #ifdef DETERMINISTIC
211 MTRand M(1);
212 #else
213 MTRand M; // generate random numbers with Mersenne Twister
214 #endif
215
216 IT length = TotalLength();
217 std::vector<double> loccands(length);
218 std::vector<NT> loccandints(length);
219 MPI_Comm World = commGrid->GetWorld();
220 int myrank = commGrid->GetRank();
221 if(myrank == 0)
222 {
223 for(int i=0; i<length; ++i)
224 loccands[i] = M.rand();
225 std::transform(loccands.begin(), loccands.end(), loccands.begin(), std::bind2nd( std::multiplies<double>(), nver ));
226
227 for(int i=0; i<length; ++i)
228 loccandints[i] = static_cast<NT>(loccands[i]);
229 }
230 MPI_Bcast(&(loccandints[0]), length, MPIType<NT>(),0, World);
231 for(IT i=0; i<length; ++i)
232 SetElement(i,loccandints[i]);
233 }
234
235 template <class IT, class NT>
236 template <class ITRHS, class NTRHS>
operator =(const FullyDistVec<ITRHS,NTRHS> & rhs)237 FullyDistVec< IT,NT > & FullyDistVec<IT,NT>::operator=(const FullyDistVec< ITRHS,NTRHS > & rhs)
238 {
239 if(static_cast<const void*>(this) != static_cast<const void*>(&rhs))
240 {
241 //FullyDist<IT,NT>::operator= (rhs); // to update glen and commGrid
242 glen = static_cast<IT>(rhs.glen);
243 commGrid = rhs.commGrid;
244
245 arr.resize(rhs.arr.size(), NT());
246 for(IT i=0; (unsigned)i < arr.size(); ++i)
247 {
248 arr[i] = static_cast<NT>(rhs.arr[static_cast<ITRHS>(i)]);
249 }
250 }
251 return *this;
252 }
253
254 template <class IT, class NT>
operator =(const FullyDistVec<IT,NT> & rhs)255 FullyDistVec< IT,NT > & FullyDistVec<IT,NT>::operator=(const FullyDistVec< IT,NT > & rhs)
256 {
257 if(this != &rhs)
258 {
259 FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>::operator= (rhs); // to update glen and commGrid
260 arr = rhs.arr;
261 }
262 return *this;
263 }
264
265 template <class IT, class NT>
operator =(const FullyDistSpVec<IT,NT> & rhs)266 FullyDistVec< IT,NT > & FullyDistVec<IT,NT>::operator=(const FullyDistSpVec< IT,NT > & rhs) // FullyDistSpVec->FullyDistVec conversion operator
267 {
268 FullyDist<IT,NT,typename combblas::disable_if< combblas::is_boolean<NT>::value, NT >::type>::operator= (rhs); // to update glen and commGrid
269 arr.resize(rhs.MyLocLength());
270 std::fill(arr.begin(), arr.end(), NT());
271
272 IT spvecsize = rhs.getlocnnz();
273 for(IT i=0; i< spvecsize; ++i)
274 {
275 //if(rhs.ind[i] > arr.size())
276 // cout << "rhs.ind[i]: " << rhs.ind[i] << endl;
277 arr[rhs.ind[i]] = rhs.num[i];
278 }
279 return *this;
280 }
281
282
283
284 /**
285 ** Let the compiler create an assignment operator and call base class'
286 ** assignment operator automatically
287 **/
288
289
290 template <class IT, class NT>
operator +=(const FullyDistSpVec<IT,NT> & rhs)291 FullyDistVec< IT,NT > & FullyDistVec<IT,NT>::operator+=(const FullyDistSpVec< IT,NT > & rhs)
292 {
293 IT spvecsize = rhs.getlocnnz();
294 #ifdef _OPENMP
295 #pragma omp parallel for
296 #endif
297 for(IT i=0; i< spvecsize; ++i)
298 {
299 if(arr[rhs.ind[i]] == NT()) // not set before
300 arr[rhs.ind[i]] = rhs.num[i];
301 else
302 arr[rhs.ind[i]] += rhs.num[i];
303 }
304 return *this;
305 }
306
307
308
309 template <class IT, class NT>
operator -=(const FullyDistSpVec<IT,NT> & rhs)310 FullyDistVec< IT,NT > & FullyDistVec<IT,NT>::operator-=(const FullyDistSpVec< IT,NT > & rhs)
311 {
312 IT spvecsize = rhs.getlocnnz();
313 for(IT i=0; i< spvecsize; ++i)
314 {
315 arr[rhs.ind[i]] -= rhs.num[i];
316 }
317 return *this;
318 }
319
320
321 /**
322 * Perform __binary_op(*this[i], rhs[i]) for every element in rhs, *this,
323 * which are of the same size. and write the result back to *this
324 */
325 template <class IT, class NT>
326 template <typename _BinaryOperation>
EWise(const FullyDistVec<IT,NT> & rhs,_BinaryOperation __binary_op)327 void FullyDistVec<IT,NT>::EWise(const FullyDistVec<IT,NT> & rhs, _BinaryOperation __binary_op)
328 {
329 std::transform ( arr.begin(), arr.end(), rhs.arr.begin(), arr.begin(), __binary_op );
330 };
331
332 /**
333 * Perform __binary_op(*this[i], rhs[i]) for every element in rhs and *this, which are of the same size.
334 * write the result output vector, hence the name EWiseOut
335 */
336 template <class IT, class NT>
337 template <typename _BinaryOperation, typename OUT>
EWiseOut(const FullyDistVec<IT,NT> & rhs,_BinaryOperation __binary_op,FullyDistVec<IT,OUT> & result)338 void FullyDistVec<IT,NT>::EWiseOut(const FullyDistVec<IT,NT> & rhs, _BinaryOperation __binary_op, FullyDistVec<IT,OUT> & result)
339 {
340 std::transform ( arr.begin(), arr.end(), rhs.arr.begin(), result.arr.begin(), __binary_op );
341 };
342
343
344 template <class IT, class NT>
operator +=(const FullyDistVec<IT,NT> & rhs)345 FullyDistVec<IT,NT> & FullyDistVec<IT, NT>::operator+=(const FullyDistVec<IT,NT> & rhs)
346 {
347 if(this != &rhs)
348 {
349 if(!(*commGrid == *rhs.commGrid))
350 {
351 std::cout << "Grids are not comparable elementwise addition" << std::endl;
352 MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
353 }
354 else
355 {
356 EWise(rhs, std::plus<NT>());
357 }
358 }
359 return *this;
360 };
361
362 template <class IT, class NT>
operator -=(const FullyDistVec<IT,NT> & rhs)363 FullyDistVec<IT,NT> & FullyDistVec<IT, NT>::operator-=(const FullyDistVec<IT,NT> & rhs)
364 {
365 if(this != &rhs)
366 {
367 if(!(*commGrid == *rhs.commGrid))
368 {
369 std::cout << "Grids are not comparable elementwise addition" << std::endl;
370 MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
371 }
372 else
373 {
374 EWise(rhs, std::minus<NT>());
375 }
376 }
377 return *this;
378 };
379
380 template <class IT, class NT>
operator ==(const FullyDistVec<IT,NT> & rhs) const381 bool FullyDistVec<IT,NT>::operator==(const FullyDistVec<IT,NT> & rhs) const
382 {
383 ErrorTolerantEqual<NT> epsilonequal;
384 int local = 1;
385 local = (int) std::equal(arr.begin(), arr.end(), rhs.arr.begin(), epsilonequal );
386 int whole = 1;
387 MPI_Allreduce( &local, &whole, 1, MPI_INT, MPI_BAND, commGrid->GetWorld());
388 return static_cast<bool>(whole);
389 }
390
391 template <class IT, class NT>
392 template <typename _Predicate>
Count(_Predicate pred) const393 IT FullyDistVec<IT,NT>::Count(_Predicate pred) const
394 {
395 IT local = count_if( arr.begin(), arr.end(), pred );
396 IT whole = 0;
397 MPI_Allreduce( &local, &whole, 1, MPIType<IT>(), MPI_SUM, commGrid->GetWorld());
398 return whole;
399 }
400
401 //! Returns a dense vector of global indices
402 //! for which the predicate is satisfied
403 template <class IT, class NT>
404 template <typename _Predicate>
FindInds(_Predicate pred) const405 FullyDistVec<IT,IT> FullyDistVec<IT,NT>::FindInds(_Predicate pred) const
406 {
407 FullyDistVec<IT,IT> found(commGrid);
408 MPI_Comm World = commGrid->GetWorld();
409 int nprocs = commGrid->GetSize();
410 int rank = commGrid->GetRank();
411
412 IT sizelocal = LocArrSize();
413 IT sizesofar = LengthUntil();
414 for(IT i=0; i<sizelocal; ++i)
415 {
416 if(pred(arr[i]))
417 {
418 found.arr.push_back(i+sizesofar);
419 }
420 }
421 IT * dist = new IT[nprocs];
422 IT nsize = found.arr.size();
423 dist[rank] = nsize;
424 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
425 IT lengthuntil = std::accumulate(dist, dist+rank, static_cast<IT>(0));
426 found.glen = std::accumulate(dist, dist+nprocs, static_cast<IT>(0));
427
428 // Although the found vector is not reshuffled yet, its glen and commGrid are set
429 // We can call the Owner/MyLocLength/LengthUntil functions (to infer future distribution)
430
431 // rebalance/redistribute
432 int * sendcnt = new int[nprocs];
433 std::fill(sendcnt, sendcnt+nprocs, 0);
434 for(IT i=0; i<nsize; ++i)
435 {
436 IT locind;
437 int owner = found.Owner(i+lengthuntil, locind);
438 ++sendcnt[owner];
439 }
440 int * recvcnt = new int[nprocs];
441 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World); // share the counts
442
443 int * sdispls = new int[nprocs];
444 int * rdispls = new int[nprocs];
445 sdispls[0] = 0;
446 rdispls[0] = 0;
447 for(int i=0; i<nprocs-1; ++i)
448 {
449 sdispls[i+1] = sdispls[i] + sendcnt[i];
450 rdispls[i+1] = rdispls[i] + recvcnt[i];
451 }
452 IT totrecv = std::accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
453 std::vector<IT> recvbuf(totrecv);
454
455 // data is already in the right order in found.arr
456 MPI_Alltoallv(&(found.arr[0]), sendcnt, sdispls, MPIType<IT>(), &(recvbuf[0]), recvcnt, rdispls, MPIType<IT>(), World);
457 found.arr.swap(recvbuf);
458 delete [] dist;
459 DeleteAll(sendcnt, recvcnt, sdispls, rdispls);
460
461 return found;
462 }
463
464
465 //! Requires no communication because FullyDistSpVec (the return object)
466 //! is distributed based on length, not nonzero counts
467 template <class IT, class NT>
468 template <typename _Predicate>
Find(_Predicate pred) const469 FullyDistSpVec<IT,NT> FullyDistVec<IT,NT>::Find(_Predicate pred) const
470 {
471 FullyDistSpVec<IT,NT> found(commGrid);
472 size_t size = arr.size();
473 for(size_t i=0; i<size; ++i)
474 {
475 if(pred(arr[i]))
476 {
477 found.ind.push_back( (IT) i);
478 found.num.push_back(arr[i]);
479 }
480 }
481 found.glen = glen;
482 return found;
483 }
484
485
486 //! Retain a sparse vector with indices where the supplied value is found
487 template <class IT, class NT>
Find(NT val) const488 FullyDistSpVec<IT,NT> FullyDistVec<IT,NT>::Find(NT val) const
489 {
490 FullyDistSpVec<IT,NT> found(commGrid);
491 size_t size = arr.size();
492 for(size_t i=0; i<size; ++i)
493 {
494 if(arr[i]==val)
495 {
496 found.ind.push_back( (IT) i);
497 found.num.push_back(val);
498 }
499 }
500 found.glen = glen;
501 return found;
502 }
503
504
505 template <class IT, class NT>
506 template <class HANDLER>
ReadDistribute(std::ifstream & infile,int master,HANDLER handler)507 std::ifstream& FullyDistVec<IT,NT>::ReadDistribute (std::ifstream& infile, int master, HANDLER handler)
508 {
509 FullyDistSpVec<IT,NT> tmpSpVec(commGrid);
510 tmpSpVec.ReadDistribute(infile, master, handler);
511
512 *this = tmpSpVec;
513 return infile;
514 }
515
516 template <class IT, class NT>
517 template <class HANDLER>
SaveGathered(std::ofstream & outfile,int master,HANDLER handler,bool printProcSplits)518 void FullyDistVec<IT,NT>::SaveGathered(std::ofstream& outfile, int master, HANDLER handler, bool printProcSplits)
519 {
520 FullyDistSpVec<IT,NT> tmpSpVec = *this;
521 tmpSpVec.SaveGathered(outfile, master, handler, printProcSplits);
522 }
523
524 template <class IT, class NT>
SetElement(IT indx,NT numx)525 void FullyDistVec<IT,NT>::SetElement (IT indx, NT numx)
526 {
527 int rank = commGrid->GetRank();
528 if (glen == 0)
529 {
530 if(rank == 0)
531 std::cout << "FullyDistVec::SetElement can't be called on an empty vector." << std::endl;
532 return;
533 }
534 IT locind;
535 int owner = Owner(indx, locind);
536 if(commGrid->GetRank() == owner)
537 {
538 if (locind > (LocArrSize() -1))
539 {
540 std::cout << "FullyDistVec::SetElement cannot expand array" << std::endl;
541 }
542 else if (locind < 0)
543 {
544 std::cout << "FullyDistVec::SetElement local index < 0" << std::endl;
545 }
546 else
547 {
548 arr[locind] = numx;
549 }
550 }
551 }
552
553 template <class IT, class NT>
GetElement(IT indx) const554 NT FullyDistVec<IT,NT>::GetElement (IT indx) const
555 {
556 NT ret;
557 MPI_Comm World = commGrid->GetWorld();
558 int rank = commGrid->GetRank();
559 if (glen == 0)
560 {
561 if(rank == 0)
562 std::cout << "FullyDistVec::GetElement can't be called on an empty vector." << std::endl;
563
564 return NT();
565 }
566 IT locind;
567 int owner = Owner(indx, locind);
568 if(commGrid->GetRank() == owner)
569 {
570 if (locind > (LocArrSize() -1))
571 {
572 std::cout << "FullyDistVec::GetElement local index > size" << std::endl;
573 ret = NT();
574
575 }
576 else if (locind < 0)
577 {
578 std::cout << "FullyDistVec::GetElement local index < 0" << std::endl;
579 ret = NT();
580 }
581 else
582 {
583 ret = arr[locind];
584 }
585 }
586 MPI_Bcast(&ret, 1, MPIType<NT>(), owner, World);
587 return ret;
588 }
589
590 // Write to file using MPI-2
591 template <class IT, class NT>
DebugPrint()592 void FullyDistVec<IT,NT>::DebugPrint()
593 {
594 int nprocs, rank;
595 MPI_Comm World = commGrid->GetWorld();
596 MPI_Comm_rank(World, &rank);
597 MPI_Comm_size(World, &nprocs);
598 MPI_File thefile;
599 char _fn[] = "temp_fullydistvec"; // AL: this is to avoid the problem that C++ string literals are const char* while C string literals are char*, leading to a const warning (technically error, but compilers are tolerant)
600 MPI_File_open(World, _fn, MPI_MODE_CREATE | MPI_MODE_WRONLY, MPI_INFO_NULL, &thefile);
601 IT lengthuntil = LengthUntil();
602
603 // The disp displacement argument specifies the position
604 // (absolute offset in bytes from the beginning of the file)
605 char native[] = "native"; // AL: this is to avoid the problem that C++ string literals are const char* while C string literals are char*, leading to a const warning (technically error, but compilers are tolerant)
606 MPI_File_set_view(thefile, int64_t(lengthuntil * sizeof(NT)), MPIType<NT>(), MPIType<NT>(), native, MPI_INFO_NULL);
607
608 IT count = LocArrSize();
609 MPI_File_write(thefile, &(arr[0]), count, MPIType<NT>(), MPI_STATUS_IGNORE);
610 MPI_File_close(&thefile);
611
612 // Now let processor-0 read the file and print
613 IT * counts = new IT[nprocs];
614 MPI_Gather(&count, 1, MPIType<IT>(), counts, 1, MPIType<IT>(), 0, World); // gather at root=0
615 if(rank == 0)
616 {
617 FILE * f = fopen("temp_fullydistvec", "r");
618 if(!f)
619 {
620 std::cerr << "Problem reading binary input file\n";
621 return;
622 }
623 IT maxd = *std::max_element(counts, counts+nprocs);
624 NT * data = new NT[maxd];
625
626 for(int i=0; i<nprocs; ++i)
627 {
628 // read counts[i] integers and print them
629 size_t result = fread(data, sizeof(NT), counts[i],f);
630 if (result != (unsigned)counts[i]) { std::cout << "Error in fread, only " << result << " entries read" << std::endl; }
631
632 std::cout << "Elements stored on proc " << i << ": {" ;
633 for (int j = 0; j < counts[i]; j++)
634 {
635 std::cout << data[j] << ",";
636 }
637 std::cout << "}" << std::endl;
638 }
639 delete [] data;
640 delete [] counts;
641 }
642 }
643
644 template <class IT, class NT>
645 template <typename _UnaryOperation, typename IRRELEVANT_NT>
Apply(_UnaryOperation __unary_op,const FullyDistSpVec<IT,IRRELEVANT_NT> & mask)646 void FullyDistVec<IT,NT>::Apply(_UnaryOperation __unary_op, const FullyDistSpVec<IT,IRRELEVANT_NT> & mask)
647 {
648 typename std::vector< IT >::const_iterator miter = mask.ind.begin();
649 while (miter < mask.ind.end())
650 {
651 IT index = *miter++;
652 arr[index] = __unary_op(arr[index]);
653 }
654 }
655
656 template <class IT, class NT>
657 template <typename _BinaryOperation, typename _BinaryPredicate, class NT2>
EWiseApply(const FullyDistVec<IT,NT2> & other,_BinaryOperation __binary_op,_BinaryPredicate _do_op,const bool useExtendedBinOp)658 void FullyDistVec<IT,NT>::EWiseApply(const FullyDistVec<IT,NT2> & other, _BinaryOperation __binary_op, _BinaryPredicate _do_op, const bool useExtendedBinOp)
659 {
660 if(*(commGrid) == *(other.commGrid))
661 {
662 if(glen != other.glen)
663 {
664 std::ostringstream outs;
665 outs << "Vector dimensions don't match (" << glen << " vs " << other.glen << ") for FullyDistVec::EWiseApply\n";
666 SpParHelper::Print(outs.str());
667 MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
668 }
669 else
670 {
671 typename std::vector< NT >::iterator thisIter = arr.begin();
672 typename std::vector< NT2 >::const_iterator otherIter = other.arr.begin();
673 while (thisIter < arr.end())
674 {
675 if (_do_op(*thisIter, *otherIter, false, false))
676 *thisIter = __binary_op(*thisIter, *otherIter, false, false);
677 thisIter++;
678 otherIter++;
679 }
680 }
681 }
682 else
683 {
684 std::ostringstream outs;
685 outs << "Grids are not comparable for FullyDistVec<IT,NT>::EWiseApply" << std::endl;
686 SpParHelper::Print(outs.str());
687 MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
688 }
689 }
690
691
692 // Note (Ariful): multithreded implemented only when applyNulls=false.
693 // TODO: employ multithreding when applyNulls=true
694 template <class IT, class NT>
695 template <typename _BinaryOperation, typename _BinaryPredicate, class NT2>
EWiseApply(const FullyDistSpVec<IT,NT2> & other,_BinaryOperation __binary_op,_BinaryPredicate _do_op,bool applyNulls,NT2 nullValue,const bool useExtendedBinOp)696 void FullyDistVec<IT,NT>::EWiseApply(const FullyDistSpVec<IT,NT2> & other, _BinaryOperation __binary_op, _BinaryPredicate _do_op, bool applyNulls, NT2 nullValue, const bool useExtendedBinOp)
697 {
698 if(*(commGrid) == *(other.commGrid))
699 {
700 if(glen != other.glen)
701 {
702 std::cerr << "Vector dimensions don't match (" << glen << " vs " << other.glen << ") for FullyDistVec::EWiseApply\n";
703 MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
704 }
705 else
706 {
707 typename std::vector< IT >::const_iterator otherInd = other.ind.begin();
708 typename std::vector< NT2 >::const_iterator otherNum = other.num.begin();
709
710 if (applyNulls) // scan the entire dense vector and apply sparse elements as they appear
711 {
712 for(IT i=0; (unsigned)i < arr.size(); ++i)
713 {
714 if (otherInd == other.ind.end() || i < *otherInd)
715 {
716 if (_do_op(arr[i], nullValue, false, true))
717 arr[i] = __binary_op(arr[i], nullValue, false, true);
718 }
719 else
720 {
721 if (_do_op(arr[i], *otherNum, false, false))
722 arr[i] = __binary_op(arr[i], *otherNum, false, false);
723 otherInd++;
724 otherNum++;
725 }
726 }
727 }
728 else // scan the sparse vector only
729 {
730 /*
731 for(otherInd = other.ind.begin(); otherInd < other.ind.end(); otherInd++, otherNum++)
732 {
733 if (_do_op(arr[*otherInd], *otherNum, false, false))
734 arr[*otherInd] = __binary_op(arr[*otherInd], *otherNum, false, false);
735 }*/
736
737 IT spsize = other.ind.size();
738 #ifdef _OPENMP
739 #pragma omp parallel for
740 #endif
741 for(IT i=0; i< spsize; i++)
742 {
743 if (_do_op(arr[other.ind[i]], other.num[i], false, false))
744 arr[other.ind[i]] = __binary_op(arr[other.ind[i]], other.num[i], false, false);
745 }
746 }
747 }
748 }
749 else
750 {
751 std::cout << "Grids are not comparable elementwise apply" << std::endl;
752 MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
753 }
754 }
755
756
757 template <class IT, class NT>
sort()758 FullyDistVec<IT, IT> FullyDistVec<IT, NT>::sort()
759 {
760 MPI_Comm World = commGrid->GetWorld();
761 FullyDistVec<IT,IT> temp(commGrid);
762 IT nnz = LocArrSize();
763 std::pair<NT,IT> * vecpair = new std::pair<NT,IT>[nnz];
764 int nprocs = commGrid->GetSize();
765 int rank = commGrid->GetRank();
766
767 IT * dist = new IT[nprocs];
768 dist[rank] = nnz;
769 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
770 IT sizeuntil = LengthUntil(); // size = length, for dense vectors
771 for(IT i=0; i< nnz; ++i)
772 {
773 vecpair[i].first = arr[i]; // we'll sort wrt numerical values
774 vecpair[i].second = i + sizeuntil;
775 }
776 SpParHelper::MemoryEfficientPSort(vecpair, nnz, dist, World);
777
778 std::vector< IT > narr(nnz);
779 for(IT i=0; i< nnz; ++i)
780 {
781 arr[i] = vecpair[i].first; // sorted range (change the object itself)
782 narr[i] = vecpair[i].second; // inverse permutation stored as numerical values
783 }
784 delete [] vecpair;
785 delete [] dist;
786
787 temp.glen = glen;
788 temp.arr = narr;
789 return temp;
790 }
791
792
793 // Randomly permutes an already existing vector
794 template <class IT, class NT>
RandPerm()795 void FullyDistVec<IT,NT>::RandPerm()
796 {
797 #ifdef DETERMINISTIC
798 uint64_t seed = 1383098845;
799 #else
800 uint64_t seed= time(NULL);
801 #endif
802
803 MTRand M(seed); // generate random numbers with Mersenne Twister
804 MPI_Comm World = commGrid->GetWorld();
805 int nprocs = commGrid->GetSize();
806 int rank = commGrid->GetRank();
807 IT size = LocArrSize();
808
809 #ifdef COMBBLAS_LEGACY
810 std::pair<double,NT> * vecpair = new std::pair<double,NT>[size];
811 IT * dist = new IT[nprocs];
812 dist[rank] = size;
813 MPI_Allgather(MPI_IN_PLACE, 1, MPIType<IT>(), dist, 1, MPIType<IT>(), World);
814 for(int i=0; i<size; ++i)
815 {
816 vecpair[i].first = M.rand();
817 vecpair[i].second = arr[i];
818 }
819 // less< pair<T1,T2> > works correctly (sorts wrt first elements)
820 SpParHelper::MemoryEfficientPSort(vecpair, size, dist, World);
821 std::vector< NT > nnum(size);
822 for(int i=0; i<size; ++i) nnum[i] = vecpair[i].second;
823 DeleteAll(vecpair, dist);
824 arr.swap(nnum);
825 #else
826 std::vector< std::vector< NT > > data_send(nprocs);
827 for(int i=0; i<size; ++i)
828 {
829 // send each entry to a random process
830 uint32_t dest = M.randInt(nprocs-1);
831 data_send[dest].push_back(arr[i]);
832 }
833 int * sendcnt = new int[nprocs];
834 int * sdispls = new int[nprocs];
835 for(int i=0; i<nprocs; ++i) sendcnt[i] = (int) data_send[i].size();
836
837 int * rdispls = new int[nprocs];
838 int * recvcnt = new int[nprocs];
839 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World); // share the request counts
840 sdispls[0] = 0;
841 rdispls[0] = 0;
842 for(int i=0; i<nprocs-1; ++i)
843 {
844 sdispls[i+1] = sdispls[i] + sendcnt[i];
845 rdispls[i+1] = rdispls[i] + recvcnt[i];
846 }
847 IT totrecv = std::accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
848 if(totrecv > std::numeric_limits<int>::max())
849 {
850 std::cout << "COMBBLAS_WARNING: total data to receive exceeds max int: " << totrecv << std::endl;
851 }
852 std::vector<NT>().swap(arr); // make space for temporaries
853
854 NT * sendbuf = new NT[size];
855 for(int i=0; i<nprocs; ++i)
856 {
857 std::copy(data_send[i].begin(), data_send[i].end(), sendbuf+sdispls[i]);
858 std::vector<NT>().swap(data_send[i]); // free memory
859 }
860 NT * recvbuf = new NT[totrecv];
861 MPI_Alltoallv(sendbuf, sendcnt, sdispls, MPIType<NT>(), recvbuf, recvcnt, rdispls, MPIType<NT>(), World);
862 //std::random_shuffle(recvbuf, recvbuf+ totrecv);
863 std::default_random_engine gen(seed);
864 std::shuffle(recvbuf, recvbuf+ totrecv,gen); // locally shuffle data
865
866 int64_t * localcounts = new int64_t[nprocs];
867 localcounts[rank] = totrecv;
868 MPI_Allgather(MPI_IN_PLACE, 1, MPI_LONG_LONG, localcounts, 1, MPI_LONG_LONG, World);
869 int64_t glenuntil = std::accumulate(localcounts, localcounts+rank, static_cast<int64_t>(0));
870
871 std::vector< std::vector< IT > > locs_send(nprocs);
872 for(IT i=0; i< totrecv; ++i) // determine new locations w/ prefix sums
873 {
874 IT remotelocind;
875 int owner = Owner(glenuntil+i, remotelocind);
876 locs_send[owner].push_back(remotelocind);
877 data_send[owner].push_back(recvbuf[i]);
878 }
879
880 for(int i=0; i<nprocs; ++i) sendcnt[i] = (int) data_send[i].size(); // = locs_send.size()
881 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World);
882 sdispls[0] = 0;
883 rdispls[0] = 0;
884 for(int i=0; i<nprocs-1; ++i)
885 {
886 sdispls[i+1] = sdispls[i] + sendcnt[i];
887 rdispls[i+1] = rdispls[i] + recvcnt[i];
888 }
889 IT newsize = std::accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
890 if(newsize > std::numeric_limits<int>::max())
891 {
892 std::cout << "COMBBLAS_WARNING: total data to receive exceeds max int: " << newsize << std::endl;
893 }
894 // re-use the receive buffer as sendbuf of second stage
895 IT totalsend = std::accumulate(sendcnt, sendcnt+nprocs, static_cast<IT>(0));
896 if(totalsend != totrecv || newsize != size)
897 {
898 std::cout << "COMBBLAS_WARNING: sending different sized data than received: " << totalsend << "=" << totrecv << " , " << newsize << "=" << size << std::endl;
899 }
900 for(int i=0; i<nprocs; ++i)
901 {
902 std::copy(data_send[i].begin(), data_send[i].end(), recvbuf+sdispls[i]);
903 std::vector<NT>().swap(data_send[i]); // free memory
904 }
905 // re-use the send buffer as receive buffer of second stage
906 MPI_Alltoallv(recvbuf, sendcnt, sdispls, MPIType<NT>(), sendbuf, recvcnt, rdispls, MPIType<NT>(), World);
907 delete [] recvbuf;
908 IT * newinds = new IT[totalsend];
909 for(int i=0; i<nprocs; ++i)
910 {
911 std::copy(locs_send[i].begin(), locs_send[i].end(), newinds+sdispls[i]);
912 std::vector<IT>().swap(locs_send[i]); // free memory
913 }
914 IT * indsbuf = new IT[size];
915 MPI_Alltoallv(newinds, sendcnt, sdispls, MPIType<IT>(), indsbuf, recvcnt, rdispls, MPIType<IT>(), World);
916 DeleteAll(newinds, sendcnt, sdispls, rdispls, recvcnt);
917 arr.resize(size);
918 for(IT i=0; i<size; ++i)
919 {
920 arr[indsbuf[i]] = sendbuf[i];
921 }
922 #endif
923 }
924
925 // ABAB: In its current form, unless LengthUntil returns NT
926 // this won't work reliably for anything other than integers
927 template <class IT, class NT>
iota(IT globalsize,NT first)928 void FullyDistVec<IT,NT>::iota(IT globalsize, NT first)
929 {
930 glen = globalsize;
931 IT length = MyLocLength(); // only needs glen to determine length
932
933 arr.resize(length);
934 SpHelper::iota(arr.begin(), arr.end(), LengthUntil() + first); // global across processors
935 }
936
937 template <class IT, class NT>
operator ()(const FullyDistVec<IT,IT> & ri) const938 FullyDistVec<IT,NT> FullyDistVec<IT,NT>::operator() (const FullyDistVec<IT,IT> & ri) const
939 {
940 if(!(*commGrid == *ri.commGrid))
941 {
942 std::cout << "Grids are not comparable for dense vector subsref" << std::endl;
943 return FullyDistVec<IT,NT>();
944 }
945
946 MPI_Comm World = commGrid->GetWorld();
947 FullyDistVec<IT,NT> Indexed(commGrid, ri.glen, NT()); // length(Indexed) = length(ri)
948 int nprocs = commGrid->GetSize();
949 std::vector< std::vector< IT > > data_req(nprocs);
950 std::vector< std::vector< IT > > revr_map(nprocs); // to put the incoming data to the correct location
951
952 IT riloclen = ri.LocArrSize();
953 for(IT i=0; i < riloclen; ++i)
954 {
955 IT locind;
956 int owner = Owner(ri.arr[i], locind); // numerical values in ri are 0-based
957 data_req[owner].push_back(locind);
958 revr_map[owner].push_back(i);
959 }
960 IT * sendbuf = new IT[riloclen];
961 int * sendcnt = new int[nprocs];
962 int * sdispls = new int[nprocs];
963 for(int i=0; i<nprocs; ++i)
964 sendcnt[i] = (int) data_req[i].size();
965
966 int * rdispls = new int[nprocs];
967 int * recvcnt = new int[nprocs];
968 MPI_Alltoall(sendcnt, 1, MPI_INT, recvcnt, 1, MPI_INT, World); // share the request counts
969 sdispls[0] = 0;
970 rdispls[0] = 0;
971 for(int i=0; i<nprocs-1; ++i)
972 {
973 sdispls[i+1] = sdispls[i] + sendcnt[i];
974 rdispls[i+1] = rdispls[i] + recvcnt[i];
975 }
976 IT totrecv = std::accumulate(recvcnt,recvcnt+nprocs, static_cast<IT>(0));
977 for(int i=0; i<nprocs; ++i)
978 {
979 std::copy(data_req[i].begin(), data_req[i].end(), sendbuf+sdispls[i]);
980 std::vector<IT>().swap(data_req[i]);
981 }
982
983 IT * reversemap = new IT[riloclen];
984 for(int i=0; i<nprocs; ++i)
985 {
986 std::copy(revr_map[i].begin(), revr_map[i].end(), reversemap+sdispls[i]); // reversemap array is unique
987 std::vector<IT>().swap(revr_map[i]);
988 }
989
990 IT * recvbuf = new IT[totrecv];
991 MPI_Alltoallv(sendbuf, sendcnt, sdispls, MPIType<IT>(), recvbuf, recvcnt, rdispls, MPIType<IT>(), World); // request data
992 delete [] sendbuf;
993
994 // We will return the requested data,
995 // our return will be as big as the request
996 // as we are indexing a dense vector, all elements exist
997 // so the displacement boundaries are the same as rdispls
998 NT * databack = new NT[totrecv];
999 for(int i=0; i<nprocs; ++i)
1000 {
1001 for(int j = rdispls[i]; j < rdispls[i] + recvcnt[i]; ++j) // fetch the numerical values
1002 {
1003 databack[j] = arr[recvbuf[j]];
1004 }
1005 }
1006 delete [] recvbuf;
1007 NT * databuf = new NT[riloclen];
1008
1009 // the response counts are the same as the request counts
1010 MPI_Alltoallv(databack, recvcnt, rdispls, MPIType<NT>(), databuf, sendcnt, sdispls, MPIType<NT>(), World); // send data
1011 DeleteAll(rdispls, recvcnt, databack);
1012
1013 // Now create the output from databuf
1014 // Indexed.arr is already allocated in contructor
1015 for(int i=0; i<nprocs; ++i)
1016 {
1017 for(int j=sdispls[i]; j< sdispls[i]+sendcnt[i]; ++j)
1018 {
1019 Indexed.arr[reversemap[j]] = databuf[j];
1020 }
1021 }
1022 DeleteAll(sdispls, sendcnt, databuf,reversemap);
1023 return Indexed;
1024 }
1025
1026
1027 template <class IT, class NT>
PrintInfo(std::string vectorname) const1028 void FullyDistVec<IT,NT>::PrintInfo(std::string vectorname) const
1029 {
1030 IT totl = TotalLength();
1031 if (commGrid->GetRank() == 0)
1032 std::cout << "As a whole, " << vectorname << " has length " << totl << std::endl;
1033 }
1034
1035
1036
1037
1038
1039 template <class IT, class NT>
Set(const FullyDistSpVec<IT,NT> & other)1040 void FullyDistVec<IT,NT>::Set(const FullyDistSpVec< IT,NT > & other)
1041 {
1042 if(*(commGrid) == *(other.commGrid))
1043 {
1044 if(glen != other.glen)
1045 {
1046 std::cerr << "Vector dimensions don't match (" << glen << " vs " << other.glen << ") for FullyDistVec::Set\n";
1047 MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
1048 }
1049 else
1050 {
1051
1052 IT spvecsize = other.getlocnnz();
1053 #ifdef _OPENMP
1054 #pragma omp parallel for
1055 #endif
1056 for(IT i=0; i< spvecsize; ++i)
1057 {
1058 arr[other.ind[i]] = other.num[i];
1059 }
1060 }
1061 }
1062 else
1063 {
1064 std::cout << "Grids are not comparable for Set" << std::endl;
1065 MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
1066 }
1067 }
1068
1069
1070
1071
1072 // General purpose set operation on dense vector by a sparse vector
1073
1074 template <class IT, class NT>
1075 template <class NT1, typename _BinaryOperationIdx, typename _BinaryOperationVal>
GSet(const FullyDistSpVec<IT,NT1> & spVec,_BinaryOperationIdx __binopIdx,_BinaryOperationVal __binopVal,MPI_Win win)1076 void FullyDistVec<IT,NT>::GSet (const FullyDistSpVec<IT,NT1> & spVec, _BinaryOperationIdx __binopIdx, _BinaryOperationVal __binopVal, MPI_Win win)
1077 {
1078 if(*(commGrid) != *(spVec.commGrid))
1079 {
1080 std::cout << "Grids are not comparable for GSet" << std::endl;
1081 MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
1082 }
1083
1084 IT spVecSize = spVec.getlocnnz();
1085 if(spVecSize==0) return;
1086
1087
1088 MPI_Comm World = commGrid->GetWorld();
1089 int nprocs = commGrid->GetSize();
1090 int myrank;
1091 MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
1092
1093
1094 std::vector< std::vector< NT > > datsent(nprocs);
1095 std::vector< std::vector< IT > > indsent(nprocs);
1096 IT lengthUntil = spVec.LengthUntil();
1097
1098 for(IT k=0; k < spVecSize; ++k)
1099 {
1100 IT locind;
1101 // get global index of the dense vector from the value. Most often a select operator.
1102 // If the first operand is selected, then invert; otherwise, EwiseApply.
1103 IT globind = __binopIdx(spVec.num[k], spVec.ind[k] + lengthUntil);
1104 int owner = Owner(globind, locind); // get local index
1105 NT val = __binopVal(spVec.num[k], spVec.ind[k] + lengthUntil);
1106 if(globind < glen) // prevent index greater than size of the composed vector
1107 {
1108 datsent[owner].push_back(val);
1109 indsent[owner].push_back(locind); // so that we don't need no correction at the recipient
1110 }
1111 }
1112
1113
1114 for(int j = 0; j < datsent[myrank].size(); ++j) // directly set local entries
1115 {
1116 arr[indsent[myrank][j]] = datsent[myrank][j];
1117 }
1118
1119
1120 //MPI_Win win;
1121 //MPI_Win_create(&arr[0], LocArrSize() * sizeof(NT), sizeof(NT), MPI_INFO_NULL, World, &win);
1122 //MPI_Win_fence(0, win);
1123 for(int i=0; i<nprocs; ++i)
1124 {
1125 if(i!=myrank)
1126 {
1127 MPI_Win_lock(MPI_LOCK_SHARED,i,MPI_MODE_NOCHECK,win);
1128 for(int j = 0; j < datsent[i].size(); ++j)
1129 {
1130 MPI_Put(&datsent[i][j], 1, MPIType<NT>(), i, indsent[i][j], 1, MPIType<NT>(), win);
1131 }
1132 MPI_Win_unlock(i, win);
1133 }
1134 }
1135 //MPI_Win_fence(0, win);
1136 //MPI_Win_free(&win);
1137
1138 }
1139
1140
1141
1142 // General purpose get operation on dense vector by a sparse vector
1143 // Get the element of the dense vector indexed by the value of the sparse vector
1144 // invert and get might not work in the presence of repeated values
1145
1146 template <class IT, class NT>
1147 template <class NT1, typename _BinaryOperationIdx>
GGet(const FullyDistSpVec<IT,NT1> & spVec,_BinaryOperationIdx __binopIdx,NT nullValue)1148 FullyDistSpVec<IT,NT> FullyDistVec<IT,NT>::GGet (const FullyDistSpVec<IT,NT1> & spVec, _BinaryOperationIdx __binopIdx, NT nullValue)
1149 {
1150 if(*(commGrid) != *(spVec.commGrid))
1151 {
1152 std::cout << "Grids are not comparable for GGet" << std::endl;
1153 MPI_Abort(MPI_COMM_WORLD, GRIDMISMATCH);
1154 }
1155
1156 MPI_Comm World = commGrid->GetWorld();
1157 int nprocs = commGrid->GetSize();
1158 int myrank;
1159 MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
1160
1161
1162 std::vector< std::vector< NT > > spIdx(nprocs);
1163 std::vector< std::vector< IT > > indsent(nprocs);
1164 IT lengthUntil = spVec.LengthUntil();
1165 IT spVecSize = spVec.getlocnnz();
1166
1167 FullyDistSpVec<IT, NT> res(spVec.commGrid, spVec.TotalLength());
1168 res.ind.resize(spVecSize);
1169 res.num.resize(spVecSize);
1170
1171
1172 for(IT k=0; k < spVecSize; ++k)
1173 {
1174 IT locind;
1175 // get global index of the dense vector from the value. Most often a select operator.
1176 // If the first operand is selected, then invert; otherwise, EwiseApply.
1177 IT globind = __binopIdx(spVec.num[k], spVec.ind[k] + lengthUntil);
1178 int owner = Owner(globind, locind); // get local index
1179 //NT val = __binopVal(spVec.num[k], spVec.ind[k] + lengthUntil);
1180 if(globind < glen) // prevent index greater than size of the composed vector
1181 {
1182 spIdx[owner].push_back(k); // position of spVec
1183 indsent[owner].push_back(locind); // so that we don't need no correction at the recipient
1184 }
1185 else
1186 res.num[k] = nullValue;
1187 res.ind[k] = spVec.ind[k];
1188 }
1189
1190
1191 for(int j = 0; j < indsent[myrank].size(); ++j) // directly get local entries
1192 {
1193 res.num[spIdx[myrank][j]] = arr[indsent[myrank][j]];
1194 }
1195
1196
1197 MPI_Win win;
1198 MPI_Win_create(&arr[0], LocArrSize() * sizeof(NT), sizeof(NT), MPI_INFO_NULL, World, &win);
1199 for(int i=0; i<nprocs; ++i)
1200 {
1201 if(i!=myrank)
1202 {
1203 MPI_Win_lock(MPI_LOCK_SHARED,i,MPI_MODE_NOCHECK,win);
1204 for(int j = 0; j < indsent[i].size(); ++j)
1205 {
1206 MPI_Get(&res.num[spIdx[i][j]], 1, MPIType<NT>(), i, indsent[i][j], 1, MPIType<NT>(), win);
1207 }
1208 MPI_Win_unlock(i, win);
1209 }
1210 }
1211 MPI_Win_free(&win);
1212
1213 return res;
1214 }
1215
1216 }
1217