1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc  --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 #ifndef _SP_PAR_MAT_H_
31 #define _SP_PAR_MAT_H_
32 
33 #include <iostream>
34 #include <fstream>
35 #include <cmath>
36 #include <mpi.h>
37 #include <vector>
38 #include <iterator>
39 
40 #include "SpMat.h"
41 #include "SpTuples.h"
42 #include "SpDCCols.h"
43 #include "CommGrid.h"
44 #include "MPIType.h"
45 #include "LocArr.h"
46 #include "SpDefs.h"
47 #include "Deleter.h"
48 #include "SpHelper.h"
49 #include "SpParHelper.h"
50 #include "DenseParMat.h"
51 #include "FullyDistVec.h"
52 #include "Friends.h"
53 #include "Operations.h"
54 #include "DistEdgeList.h"
55 #include "CombBLAS.h"
56 
57 namespace combblas {
58 
59 /**
60   * Fundamental 2D distributed sparse matrix class
61   * The index type IT is encapsulated by the class in a way that it is only
62   * guarantee that the implementation will ensure the requested semantics.
63   * For instance, if IT=int64 then the implementation can still use 32 bit
64   * local indices but it should return correct 64-bit numbers in its functions.
65   * In other words, DER can be SpDCCols<int32_t, double> while IT=int64_t
66   */
67 template <class IT, class NT, class DER>
68 class SpParMat
69 {
70 public:
71 	typedef typename DER::LocalIT LocalIT;
72 	typedef typename DER::LocalNT LocalNT;
73 	typedef IT GlobalIT;
74 	typedef NT GlobalNT;
75 
76 	// Constructors
77 	SpParMat ();
78     SpParMat (MPI_Comm world); 	// ABAB: there is risk that any integer would call this constructor due to MPICH representation
79 	SpParMat (std::shared_ptr<CommGrid> grid);
80 	SpParMat (DER * myseq, std::shared_ptr<CommGrid> grid);
81 
82 	SpParMat (std::ifstream & input, MPI_Comm & world);
83 	SpParMat (DER * myseq, MPI_Comm & world);
84 
85 	template <class DELIT>
86 	SpParMat (const DistEdgeList< DELIT > & rhs, bool removeloops = true);	// conversion from distributed edge list
87 
88 	SpParMat (const SpParMat< IT,NT,DER > & rhs);				// copy constructor
89 
90 	SpParMat (IT total_m, IT total_n, const FullyDistVec<IT,IT> & , const FullyDistVec<IT,IT> & , const FullyDistVec<IT,NT> & , bool SumDuplicates = false);	// matlab sparse
91 	SpParMat (IT total_m, IT total_n, const FullyDistVec<IT,IT> & , const FullyDistVec<IT,IT> & , const NT & , bool SumDuplicates = false);	// matlab sparse
92 	SpParMat< IT,NT,DER > & operator=(const SpParMat< IT,NT,DER > & rhs);	// assignment operator
93 	SpParMat< IT,NT,DER > & operator+=(const SpParMat< IT,NT,DER > & rhs);
94 	~SpParMat ();
95 
96 	template <typename SR>
97 	void Square ();
98 
99 	float LoadImbalance() const;
100 	void Transpose();
101 	void FreeMemory();
102 	void EWiseMult (const SpParMat< IT,NT,DER >  & rhs, bool exclude);
103 	void EWiseScale (const DenseParMat<IT,NT> & rhs);
104 	void Find (FullyDistVec<IT,IT> & , FullyDistVec<IT,IT> & , FullyDistVec<IT,NT> & ) const;
105 	void Find (FullyDistVec<IT,IT> & , FullyDistVec<IT,IT> & ) const;
106 
107 	template <typename _BinaryOperation>
108 	void DimApply(Dim dim, const FullyDistVec<IT, NT>& v, _BinaryOperation __binary_op);
109 
110 	template <typename _BinaryOperation, typename _UnaryOperation >
111 	FullyDistVec<IT,NT> Reduce(Dim dim, _BinaryOperation __binary_op, NT id, _UnaryOperation __unary_op) const;
112 
113 	template <typename _BinaryOperation>
114 	FullyDistVec<IT,NT> Reduce(Dim dim, _BinaryOperation __binary_op, NT id) const;
115 
116 	template <typename VT, typename GIT, typename _BinaryOperation, typename _UnaryOperation >
117 	void Reduce(FullyDistVec<GIT,VT> & rvec, Dim dim, _BinaryOperation __binary_op, VT id, _UnaryOperation __unary_op) const;
118 
119 	template <typename VT, typename GIT, typename _BinaryOperation>
120 	void Reduce(FullyDistVec<GIT,VT> & rvec, Dim dim, _BinaryOperation __binary_op, VT id) const;
121 
122     template <typename VT, typename GIT>
123     bool Kselect(FullyDistVec<GIT,VT> & rvec, IT k_limit, int kselectVersion) const;
124     template <typename VT, typename GIT>
125     bool Kselect(FullyDistSpVec<GIT,VT> & kth, IT k_limit, int kselectVersion) const; //sparse case
126 
127     template <typename VT, typename GIT, typename _UnaryOperation>
128     bool Kselect1(FullyDistVec<GIT,VT> & rvec, IT k_limit, _UnaryOperation __unary_op) const; // TODO: make private
129     template <typename VT, typename GIT, typename _UnaryOperation>
130     bool Kselect1(FullyDistSpVec<GIT,VT> & rvec, IT k_limit, _UnaryOperation __unary_op) const; // TODO: make private
131     template <typename VT, typename GIT>
132     bool Kselect1(FullyDistVec<GIT,VT> & rvec, IT k_limit) const; // TODO: make private
133     template <typename VT, typename GIT>
134     bool Kselect2(FullyDistVec<GIT,VT> & rvec, IT k_limit) const; // TODO: make private
135 
136     IT Bandwidth() const;
137     IT Profile() const;
138 
139     template <typename VT, typename GIT, typename _BinaryOperation>
140     void MaskedReduce(FullyDistVec<GIT,VT> & rvec, FullyDistSpVec<GIT,VT> & mask, Dim dim, _BinaryOperation __binary_op, VT id, bool exclude=false) const;
141     template <typename VT, typename GIT, typename _BinaryOperation, typename _UnaryOperation >
142     void MaskedReduce(FullyDistVec<GIT,VT> & rvec, FullyDistSpVec<GIT,VT> & mask, Dim dim, _BinaryOperation __binary_op, VT id, _UnaryOperation __unary_op, bool exclude=false) const;
143 
144 	template <typename _UnaryOperation>
Apply(_UnaryOperation __unary_op)145 	void Apply(_UnaryOperation __unary_op)
146 	{
147 		spSeq->Apply(__unary_op);
148 	}
149 
150 	IT RemoveLoops();	// returns the number of loops removed
151 	void AddLoops(NT loopval, bool replaceExisting=false);
152     void AddLoops(FullyDistVec<IT,NT> loopvals, bool replaceExisting=false);
153 
154 	template <typename LIT, typename OT>
155 	void OptimizeForGraph500(OptBuf<LIT,OT> & optbuf);
156 	void ActivateThreading(int numsplits);	//<! As of version 1.2, only works with boolean matrices
157 
158 	template <typename _UnaryOperation>
159 	SpParMat<IT,NT,DER> PruneI(_UnaryOperation __unary_op, bool inPlace = true) //<! Prune any nonzero entries based on both row/column indices and value
160 	{
161 		IT grow=0, gcol=0;
162 		GetPlaceInGlobalGrid(grow, gcol);
163 		if (inPlace)
164 		{
165 			spSeq->PruneI(__unary_op, inPlace, grow, gcol);
166 			return SpParMat<IT,NT,DER>(getcommgrid()); // return blank to match signature
167 		}
168 		else
169 		{
170 			return SpParMat<IT,NT,DER>(spSeq->PruneI(__unary_op, inPlace, grow, gcol), commGrid);
171 		}
172 	}
173 
174 	template <typename _UnaryOperation>
175 	SpParMat<IT,NT,DER> Prune(_UnaryOperation __unary_op, bool inPlace = true) //<! Prune any nonzero entries for which the __unary_op evaluates to true (solely based on value)
176 	{
177 		if (inPlace)
178 		{
179 			spSeq->Prune(__unary_op, inPlace);
180 			return SpParMat<IT,NT,DER>(getcommgrid()); // return blank to match signature
181 		}
182 		else
183 		{
184 			return SpParMat<IT,NT,DER>(spSeq->Prune(__unary_op, inPlace), commGrid);
185 		}
186 	}
187 
188     template <typename _BinaryOperation>
189     SpParMat<IT,NT,DER> PruneColumn(const FullyDistVec<IT,NT> & pvals, _BinaryOperation __binary_op, bool inPlace=true);
190 
191     template <typename _BinaryOperation>
192     SpParMat<IT,NT,DER> PruneColumn(const FullyDistSpVec<IT,NT> & pvals, _BinaryOperation __binary_op, bool inPlace=true);
193 
194 	template <typename _BinaryOperation>
195 	void UpdateDense(DenseParMat<IT, NT> & rhs, _BinaryOperation __binary_op) const;
196 
197 	void Dump(std::string filename) const;
198 	void PrintInfo() const;
199 
200 	template <typename NNT, typename NDER> operator SpParMat< IT,NNT,NDER > () const;	//!< Type conversion operator
201 	template <typename NIT, typename NNT, typename NDER> operator SpParMat< NIT,NNT,NDER > () const;	//!< Type conversion operator (for indices as well)
202 
203 	IT getnrow() const;
204 	IT getncol() const;
205 	IT getnnz() const;
206 
207     template <typename LIT>
208     int Owner(IT total_m, IT total_n, IT grow, IT gcol, LIT & lrow, LIT & lcol) const;
209 
210 	SpParMat<IT,NT,DER> SubsRefCol (const std::vector<IT> & ci) const;				//!< Column indexing with special parallel semantics
211 
212 	//! General indexing with serial semantics
213 	template <typename SelectFirstSR, typename SelectSecondSR>
214 	SpParMat<IT,NT,DER> SubsRef_SR (const FullyDistVec<IT,IT> & ri, const FullyDistVec<IT,IT> & ci, bool inplace=false);
215 
operator()216 	SpParMat<IT,NT,DER> operator() (const FullyDistVec<IT,IT> & ri, const FullyDistVec<IT,IT> & ci, bool inplace=false)
217 	{
218 		return SubsRef_SR<BoolCopy1stSRing<NT>, BoolCopy2ndSRing<NT> >(ri, ci, inplace);
219 	}
220 	void Prune(const FullyDistVec<IT,IT> & ri, const FullyDistVec<IT,IT> & ci);	//!< prune all entries whose row indices are in ri and column indices are in ci
221 	void SpAsgn(const FullyDistVec<IT,IT> & ri, const FullyDistVec<IT,IT> & ci, SpParMat<IT,NT,DER> & B);
222 
223 	bool operator== (const SpParMat<IT,NT,DER> & rhs) const;
224 
225 	class ScalarReadSaveHandler
226 	{
227 	public:
getNoNum(IT row,IT col)228 		NT getNoNum(IT row, IT col) { return static_cast<NT>(1); }
binaryfill(FILE * rFile,IT & row,IT & col,NT & val)229 		void binaryfill(FILE * rFile, IT & row, IT & col, NT & val)
230 		{
231 			if (fread(&row, sizeof(IT), 1,rFile) != 1)
232 				std::cout << "binaryfill(): error reading row index" << std::endl;
233 			if (fread(&col, sizeof(IT), 1,rFile) != 1)
234 				std::cout << "binaryfill(): error reading col index" << std::endl;
235 			if (fread(&val, sizeof(NT), 1,rFile) != 1)
236 				std::cout << "binaryfill(): error reading value" << std::endl;
237 			return;
238 		}
entrylength()239 		size_t entrylength() { return 2*sizeof(IT)+sizeof(NT); }
240 
241 		template <typename c, typename t>
read(std::basic_istream<c,t> & is,IT row,IT col)242 		NT read(std::basic_istream<c,t>& is, IT row, IT col)
243 		{
244 			NT v;
245 			is >> v;
246 			return v;
247 		}
248 
249 		template <typename c, typename t>
save(std::basic_ostream<c,t> & os,const NT & v,IT row,IT col)250 		void save(std::basic_ostream<c,t>& os, const NT& v, IT row, IT col)
251 		{
252 			os << v;
253 		}
254 	};
255 
256    	template <typename _BinaryOperation>
257     	void ParallelReadMM (const std::string & filename, bool onebased, _BinaryOperation BinOp);
258 
259     	template <typename _BinaryOperation>
260     	FullyDistVec<IT,std::array<char, MAXVERTNAME>> ReadGeneralizedTuples(const std::string&, _BinaryOperation);
261 
262 	template <class HANDLER>
263 	void ReadDistribute (const std::string & filename, int master, bool nonum, HANDLER handler, bool transpose = false, bool pario = false);
264 	void ReadDistribute (const std::string & filename, int master, bool nonum=false, bool pario = false)
265 	{
266 		ReadDistribute(filename, master, nonum, ScalarReadSaveHandler(), false, pario);
267 	}
268 
269 	template <class HANDLER>
270 	void SaveGathered(std::string filename, HANDLER handler, bool transpose = false) const;
SaveGathered(std::string filename)271 	void SaveGathered(std::string filename) const { SaveGathered(filename, ScalarReadSaveHandler(), false); }
272 
273 	std::ofstream& put(std::ofstream& outfile) const;
274 
getcommgrid()275 	std::shared_ptr<CommGrid> getcommgrid() const { return commGrid; }
getlocalrows()276 	typename DER::LocalIT getlocalrows() const { return spSeq->getnrow(); }
getlocalcols()277 	typename DER::LocalIT getlocalcols() const { return spSeq->getncol();}
getlocalnnz()278 	typename DER::LocalIT getlocalnnz() const { return spSeq->getnnz(); }
seq()279 	DER & seq() { return (*spSeq); }
seqptr()280 	DER * seqptr() { return spSeq; }
281 
282     template <typename _BinaryOperation, typename LIT>
283     void SparseCommon(std::vector< std::vector < std::tuple<LIT,LIT,NT> > > & data, LIT locsize, IT total_m, IT total_n, _BinaryOperation BinOp);
284 
285 	//! Friend declarations
286 	template <typename SR, typename NUO, typename UDERO, typename IU, typename NU1, typename NU2, typename UDER1, typename UDER2>
287 	friend SpParMat<IU, NUO, UDERO>
288 	Mult_AnXBn_DoubleBuff (SpParMat<IU,NU1,UDER1> & A, SpParMat<IU,NU2,UDER2> & B, bool clearA, bool clearB);
289 
290 	template <typename SR, typename NUO, typename UDERO, typename IU, typename NU1, typename NU2, typename UDER1, typename UDER2>
291 	friend SpParMat<IU,NUO,UDERO>
292 	Mult_AnXBn_Synch (SpParMat<IU,NU1,UDER1> & A, SpParMat<IU,NU2,UDER2> & B, bool clearA, bool clearB);
293 
294     template <typename IU, typename NU1, typename NU2, typename UDERA, typename UDERB>
295     friend int64_t EstPerProcessNnzSUMMA(SpParMat<IU,NU1,UDERA> & A, SpParMat<IU,NU2,UDERB> & B);
296 
297 	template <typename SR, typename IU, typename NU1, typename NU2, typename UDER1, typename UDER2>
298 	friend SpParMat<IU,typename promote_trait<NU1,NU2>::T_promote,typename promote_trait<UDER1,UDER2>::T_promote>
299 	Mult_AnXBn_ActiveTarget (const SpParMat<IU,NU1,UDER1> & A, const SpParMat<IU,NU2,UDER2> & B );
300 
301 	template <typename SR, typename IU, typename NU1, typename NU2, typename UDER1, typename UDER2>
302 	friend SpParMat<IU,typename promote_trait<NU1,NU2>::T_promote,typename promote_trait<UDER1,UDER2>::T_promote>
303 	Mult_AnXBn_PassiveTarget (const SpParMat<IU,NU1,UDER1> & A, const SpParMat<IU,NU2,UDER2> & B );
304 
305 	template <typename SR, typename IU, typename NU1, typename NU2, typename UDER1, typename UDER2>
306 	friend SpParMat<IU,typename promote_trait<NU1,NU2>::T_promote,typename promote_trait<UDER1,UDER2>::T_promote>
307 	Mult_AnXBn_Fence (const SpParMat<IU,NU1,UDER1> & A, const SpParMat<IU,NU2,UDER2> & B );
308 
309     template <typename SR, typename NUO, typename UDERO, typename IU, typename NU1, typename NU2, typename UDERA, typename UDERB>
310     friend SpParMat<IU,NUO,UDERO> MemEfficientSpGEMM (SpParMat<IU,NU1,UDERA> & A, SpParMat<IU,NU2,UDERB> & B,
311                                                int phases, NUO hardThreshold, IU selectNum, IU recoverNum, NUO recoverPct, int kselectVersion, int64_t perProcessMem);
312 
313 	template <typename SR, typename IU, typename NUM, typename NUV, typename UDER>
314 	friend FullyDistSpVec<IU,typename promote_trait<NUM,NUV>::T_promote>
315 	SpMV (const SpParMat<IU,NUM,UDER> & A, const FullyDistSpVec<IU,NUV> & x );
316 
317 	template <typename SR, typename IU, typename NUM, typename NUV, typename UDER>
318 	friend FullyDistVec<IU,typename promote_trait<NUM,NUV>::T_promote>
319 	SpMV (const SpParMat<IU,NUM,UDER> & A, const FullyDistVec<IU,NUV> & x );
320 
321 	template <typename SR, typename IU, typename NUM, typename UDER>
322 	friend FullyDistSpVec<IU,typename promote_trait<NUM,IU>::T_promote>
323 	SpMV (const SpParMat<IU,NUM,UDER> & A, const FullyDistSpVec<IU,IU> & x, bool indexisvalue);
324 
325 	// output type is part of the signature
326 	template <typename SR, typename IVT, typename OVT, typename IU, typename NUM, typename UDER>
327 	friend void SpMV (const SpParMat<IU,NUM,UDER> & A, const FullyDistSpVec<IU,IVT> & x, FullyDistSpVec<IU,OVT> & y, bool indexisvalue);
328 
329 	template <typename SR, typename IVT, typename OVT, typename IU, typename NUM, typename UDER>
330 	friend void SpMV (const SpParMat<IU,NUM,UDER> & A, const FullyDistSpVec<IU,IVT> & x, FullyDistSpVec<IU,OVT> & y,bool indexisvalue, OptBuf<int32_t, OVT > & optbuf);
331 
332 	template <typename IU, typename NU1, typename NU2, typename UDER1, typename UDER2>
333 	friend SpParMat<IU,typename promote_trait<NU1,NU2>::T_promote,typename promote_trait<UDER1,UDER2>::T_promote>
334 	EWiseMult (const SpParMat<IU,NU1,UDER1> & A, const SpParMat<IU,NU2,UDER2> & B , bool exclude);
335 
336 	template <typename RETT, typename RETDER, typename IU, typename NU1, typename NU2, typename UDERA, typename UDERB, typename _BinaryOperation>
337 	friend SpParMat<IU,RETT,RETDER>
338 	EWiseApply (const SpParMat<IU,NU1,UDERA> & A, const SpParMat<IU,NU2,UDERB> & B, _BinaryOperation __binary_op, bool notB, const NU2& defaultBVal);
339 
340 	template <typename RETT, typename RETDER, typename IU, typename NU1, typename NU2, typename UDERA, typename UDERB, typename _BinaryOperation, typename _BinaryPredicate>
341 	friend SpParMat<IU,RETT,RETDER>
342 	EWiseApply (const SpParMat<IU,NU1,UDERA> & A, const SpParMat<IU,NU2,UDERB> & B, _BinaryOperation __binary_op, _BinaryPredicate do_op, bool allowANulls, bool allowBNulls, const NU1& ANullVal, const NU2& BNullVal, const bool allowIntersect, const bool useExtendedBinOp);
343 
344 	template<typename SR, typename IVT, typename OVT, typename IU, typename NUM, typename UDER>
345 	friend void LocalSpMV(const SpParMat<IU,NUM,UDER> & A, int rowneighs, OptBuf<int32_t, OVT > & optbuf, int32_t * & indacc, IVT * & numacc,
346                            int32_t * & sendindbuf, OVT * & sendnumbuf, int * & sdispls, int * sendcnt, int accnz, bool indexisvalue, PreAllocatedSPA<OVT> & SPA);
347 
348 	template<typename VT, typename IU, typename UDER>
349 	friend void LocalSpMV(const SpParMat<IU,bool,UDER> & A, int rowneighs, OptBuf<int32_t, VT > & optbuf, int32_t * & indacc, VT * & numacc, int * sendcnt, int accnz);
350 
351 private:
352 	typedef std::array<char, MAXVERTNAME> STRASARRAY;
353 	typedef std::pair< STRASARRAY, uint64_t> TYPE2SEND;
354 
355 	class CharArraySaveHandler
356 	{
357 		public:
358     		// no reader
359     		template <typename c, typename t>
save(std::basic_ostream<c,t> & os,STRASARRAY & chararray,int64_t index)360     		void save(std::basic_ostream<c,t>& os, STRASARRAY & chararray, int64_t index)
361     		{
362 			          auto locnull = std::find(chararray.begin(), chararray.end(), '\0'); // find the null character (or string::end)
363                 std::string strtmp(chararray.begin(), locnull); // range constructor
364 			os << strtmp;
365     		}
366 	};
367 
368 	MPI_File TupleRead1stPassNExchange (const std::string & filename, TYPE2SEND * & senddata, IT & totsend, FullyDistVec<IT,STRASARRAY> & distmapper, uint64_t & totallength);
369 
370 	template <typename VT, typename GIT, typename _BinaryOperation, typename _UnaryOperation >
371     	void Reduce(FullyDistVec<GIT,VT> & rvec, Dim dim, _BinaryOperation __binary_op, VT id, _UnaryOperation __unary_op, MPI_Op mympiop) const;
372 
373 
374     	template <typename VT, typename GIT>	// GIT: global index type of vector
375     	void TopKGather(std::vector<NT> & all_medians, std::vector<IT> & nnz_per_col, int & thischunk, int & chunksize,
376                     const std::vector<NT> & medians, const std::vector<IT> & nnzperc, int itersuntil, std::vector< std::vector<NT> > & localmat,
377                     const std::vector<IT> & actcolsmap, std::vector<IT> & klimits, std::vector<IT> & toretain, std::vector<std::vector<std::pair<IT,NT>>> & tmppair,
378                     IT coffset, const FullyDistVec<GIT,VT> & rvec) const;
379 
380     void GetPlaceInGlobalGrid(IT& rowOffset, IT& colOffset) const;
381 
382 	void HorizontalSend(IT * & rows, IT * & cols, NT * & vals, IT * & temprows, IT * & tempcols, NT * & tempvals, std::vector < std::tuple <IT,IT,NT> > & localtuples,
383 						int * rcurptrs, int * rdispls, IT buffperrowneigh, int rowneighs, int recvcount, IT m_perproc, IT n_perproc, int rankinrow);
384 
385         template <class HANDLER>
386 	void ReadAllMine(FILE * binfile, IT * & rows, IT * & cols, NT * & vals, std::vector< std::tuple<IT,IT,NT> > & localtuples, int * rcurptrs, int * ccurptrs, int * rdispls, int * cdispls,
387 			IT m_perproc, IT n_perproc, int rowneighs, int colneighs, IT buffperrowneigh, IT buffpercolneigh, IT entriestoread, HANDLER handler, int rankinrow, bool transpose);
388 
389 	void VerticalSend(IT * & rows, IT * & cols, NT * & vals, std::vector< std::tuple<IT,IT,NT> > & localtuples, int * rcurptrs, int * ccurptrs, int * rdispls, int * cdispls,
390 				IT m_perproc, IT n_perproc, int rowneighs, int colneighs, IT buffperrowneigh, IT buffpercolneigh, int rankinrow);
391 
392 	void AllocateSetBuffers(IT * & rows, IT * & cols, NT * & vals,  int * & rcurptrs, int * & ccurptrs, int rowneighs, int colneighs, IT buffpercolneigh);
393 	void BcastEssentials(MPI_Comm & world, IT & total_m, IT & total_n, IT & total_nnz, int master);
394 
395 	std::shared_ptr<CommGrid> commGrid;
396 	DER * spSeq;
397 
398 	template <class IU, class NU>
399 	friend class DenseParMat;
400 
401 	template <typename IU, typename NU, typename UDER>
402 	friend std::ofstream& operator<< (std::ofstream& outfile, const SpParMat<IU,NU,UDER> & s);
403 };
404 
405 template <typename SR, typename NUO, typename UDERO, typename IU, typename NU1, typename NU2, typename UDER1, typename UDER2>
406 void PSpGEMM(SpParMat<IU,NU1,UDER1> & A, SpParMat<IU,NU2,UDER2> & B, SpParMat<IU,NUO,UDERO> & out, bool clearA = false, bool clearB = false)
407 {
408 	out = Mult_AnXBn_Synch<SR, NUO, UDERO> (A, B, clearA, clearB );
409 }
410 
411 template <typename SR, typename IU, typename NU1, typename NU2, typename UDER1, typename UDER2>
412 SpParMat<IU,typename promote_trait<NU1,NU2>::T_promote,typename promote_trait<UDER2,UDER2>::T_promote>
413 	PSpGEMM	(SpParMat<IU,NU1,UDER1> & A, SpParMat<IU,NU2,UDER2> & B, bool clearA = false, bool clearB = false)
414 {
415 	typedef typename promote_trait<NU1,NU2>::T_promote N_promote;
416 	typedef typename promote_trait<UDER1,UDER2>::T_promote DER_promote;
417 	return Mult_AnXBn_Synch<SR, N_promote, DER_promote> (A, B, clearA, clearB );
418 }
419 
420 }
421 
422 
423 
424 #include "SpParMat.cpp"
425 
426 #endif
427