1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc  --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 #include "SpDCCols.h"
31 #include "Deleter.h"
32 #include <algorithm>
33 #include <functional>
34 #include <vector>
35 #include <climits>
36 #include <iomanip>
37 #include <cassert>
38 
39 namespace combblas {
40 
41 /****************************************************************************/
42 /********************* PUBLIC CONSTRUCTORS/DESTRUCTORS **********************/
43 /****************************************************************************/
44 
45 template <class IT, class NT>
46 const IT SpDCCols<IT,NT>::esscount = static_cast<IT>(4);
47 
48 
49 template <class IT, class NT>
SpDCCols()50 SpDCCols<IT,NT>::SpDCCols():dcsc(NULL), m(0), n(0), nnz(0), splits(0){
51 }
52 
53 // Allocate all the space necessary
54 template <class IT, class NT>
SpDCCols(IT size,IT nRow,IT nCol,IT nzc)55 SpDCCols<IT,NT>::SpDCCols(IT size, IT nRow, IT nCol, IT nzc)
56 :m(nRow), n(nCol), nnz(size), splits(0)
57 {
58 	if(nnz > 0)
59 		dcsc = new Dcsc<IT,NT>(nnz, nzc);
60 	else
61 		dcsc = NULL;
62 }
63 
64 template <class IT, class NT>
~SpDCCols()65 SpDCCols<IT,NT>::~SpDCCols()
66 {
67 	if(nnz > 0)
68 	{
69 		if(dcsc != NULL)
70 		{
71 			if(splits > 0)
72 			{
73 				for(int i=0; i<splits; ++i)
74 					delete dcscarr[i];
75 				delete [] dcscarr;
76 			}
77 			else
78 			{
79 				delete dcsc;
80 			}
81 		}
82 	}
83 }
84 
85 // Copy constructor (constructs a new object. i.e. this is NEVER called on an existing object)
86 // Derived's copy constructor can safely call Base's default constructor as base has no data members
87 template <class IT, class NT>
SpDCCols(const SpDCCols<IT,NT> & rhs)88 SpDCCols<IT,NT>::SpDCCols(const SpDCCols<IT,NT> & rhs)
89 : m(rhs.m), n(rhs.n), nnz(rhs.nnz), splits(rhs.splits)
90 {
91 	if(splits > 0)
92 	{
93 		for(int i=0; i<splits; ++i)
94 			CopyDcsc(rhs.dcscarr[i]);
95 	}
96 	else
97 	{
98 		CopyDcsc(rhs.dcsc);
99 	}
100 }
101 
102 /**
103  * Constructor for converting SpTuples matrix -> SpDCCols (may use a private memory heap)
104  * @param[in] 	rhs if transpose=true,
105  *	\n		then rhs is assumed to be a row sorted SpTuples object
106  *	\n		else rhs is assumed to be a column sorted SpTuples object
107  **/
108 template <class IT, class NT>
SpDCCols(const SpTuples<IT,NT> & rhs,bool transpose)109 SpDCCols<IT,NT>::SpDCCols(const SpTuples<IT, NT> & rhs, bool transpose)
110 : m(rhs.m), n(rhs.n), nnz(rhs.nnz), splits(0)
111 {
112 
113 	if(nnz == 0)	// m by n matrix of complete zeros
114 	{
115 		if(transpose) std::swap(m,n);
116 		dcsc = NULL;
117 	}
118 	else
119 	{
120 		if(transpose)
121 		{
122       std::swap(m,n);
123 			IT localnzc = 1;
124 			for(IT i=1; i< rhs.nnz; ++i)
125 			{
126 				if(rhs.rowindex(i) != rhs.rowindex(i-1))
127 				{
128 					++localnzc;
129 	 			}
130 	 		}
131 			dcsc = new Dcsc<IT,NT>(rhs.nnz,localnzc);
132 			dcsc->jc[0]  = rhs.rowindex(0);
133 			dcsc->cp[0] = 0;
134 
135 			for(IT i=0; i<rhs.nnz; ++i)
136 	 		{
137 				dcsc->ir[i]  = rhs.colindex(i);		// copy rhs.jc to ir since this transpose=true
138 				dcsc->numx[i] = rhs.numvalue(i);
139 			}
140 
141 			IT jspos  = 1;
142 			for(IT i=1; i<rhs.nnz; ++i)
143 			{
144 				if(rhs.rowindex(i) != dcsc->jc[jspos-1])
145 				{
146 					dcsc->jc[jspos] = rhs.rowindex(i);	// copy rhs.ir to jc since this transpose=true
147 					dcsc->cp[jspos++] = i;
148 				}
149 			}
150 			dcsc->cp[jspos] = rhs.nnz;
151 	 	}
152 		else
153 		{
154 			IT localnzc = 1;
155 			for(IT i=1; i<rhs.nnz; ++i)
156 			{
157 				if(rhs.colindex(i) != rhs.colindex(i-1))
158 				{
159 					++localnzc;
160 				}
161 			}
162 			dcsc = new Dcsc<IT,NT>(rhs.nnz,localnzc);
163 			dcsc->jc[0]  = rhs.colindex(0);
164 			dcsc->cp[0] = 0;
165 
166 			for(IT i=0; i<rhs.nnz; ++i)
167 			{
168 				dcsc->ir[i]  = rhs.rowindex(i);		// copy rhs.ir to ir since this transpose=false
169 				dcsc->numx[i] = rhs.numvalue(i);
170 			}
171 
172 			IT jspos = 1;
173 			for(IT i=1; i<rhs.nnz; ++i)
174 			{
175 				if(rhs.colindex(i) != dcsc->jc[jspos-1])
176 				{
177 					dcsc->jc[jspos] = rhs.colindex(i);	// copy rhs.jc to jc since this transpose=true
178 					dcsc->cp[jspos++] = i;
179 				}
180 			}
181 			dcsc->cp[jspos] = rhs.nnz;
182 		}
183 	}
184 }
185 
186 
187 
188 
189 /**
190  * Multithreaded Constructor for converting tuples matrix -> SpDCCols
191  * @param[in] 	rhs if transpose=true,
192  *	\n		then tuples is assumed to be a row sorted list of tuple objects
193  *	\n		else tuples is assumed to be a column sorted list of tuple objects
194  **/
195 
196 
197 template <class IT, class NT>
SpDCCols(IT nRow,IT nCol,IT nTuples,const std::tuple<IT,IT,NT> * tuples,bool transpose)198 SpDCCols<IT,NT>::SpDCCols(IT nRow, IT nCol, IT nTuples, const std::tuple<IT, IT, NT>*  tuples, bool transpose)
199 : m(nRow), n(nCol), nnz(nTuples), splits(0)
200 {
201 
202     if(nnz == 0)	// m by n matrix of complete zeros
203     {
204         dcsc = NULL;
205     }
206     else
207     {
208         int totThreads=1;
209 #ifdef _OPENMP
210 #pragma omp parallel
211         {
212             totThreads = omp_get_num_threads();
213         }
214 #endif
215 
216         std::vector <IT> tstart(totThreads);
217         std::vector <IT> tend(totThreads);
218         std::vector <IT> tdisp(totThreads+1);
219 
220         // extra memory, but replaces an O(nnz) loop by an O(nzc) loop
221         IT* temp_jc = new IT[nTuples];
222         IT* temp_cp = new IT[nTuples];
223 
224 #ifdef _OPENMP
225 #pragma omp parallel
226 #endif
227         {
228             int threadID = 0;
229 #ifdef _OPENMP
230             threadID = omp_get_thread_num();
231 #endif
232             IT start = threadID * (nTuples / totThreads);
233             IT end = (threadID + 1) * (nTuples / totThreads);
234             if(threadID == (totThreads-1)) end = nTuples;
235             IT curpos=start;
236             if(end>start) // no work for the current thread
237             {
238                 temp_jc[start] = std::get<1>(tuples[start]);
239                 temp_cp[start] = start;
240                 for (IT i = start+1; i < end; ++i)
241                 {
242                     if(std::get<1>(tuples[i]) != temp_jc[curpos] )
243                     {
244                         temp_jc[++curpos] = std::get<1>(tuples[i]);
245                         temp_cp[curpos] = i;
246                     }
247                 }
248             }
249 
250             tstart[threadID] = start;
251             if(end>start) tend[threadID] = curpos+1;
252             else tend[threadID] = end; // start=end
253         }
254 
255 
256         // serial part
257         for(int t=totThreads-1; t>0; --t)
258         {
259             if(tend[t] > tstart[t] && tend[t-1] > tstart[t-1])
260             {
261                 if(temp_jc[tstart[t]] == temp_jc[tend[t-1]-1])
262                 {
263                     tstart[t] ++;
264                 }
265             }
266         }
267 
268         tdisp[0] = 0;
269         for(int t=0; t<totThreads; ++t)
270         {
271             tdisp[t+1] = tdisp[t] + tend[t] - tstart[t];
272         }
273 
274         IT localnzc = tdisp[totThreads];
275         dcsc = new Dcsc<IT,NT>(nTuples,localnzc);
276 
277 #ifdef _OPENMP
278 #pragma omp parallel
279 #endif
280         {
281             int threadID = 0;
282 #ifdef _OPENMP
283             threadID = omp_get_thread_num();
284 #endif
285             std::copy(temp_jc + tstart[threadID],  temp_jc + tend[threadID], dcsc->jc + tdisp[threadID]);
286             std::copy(temp_cp + tstart[threadID],  temp_cp + tend[threadID], dcsc->cp + tdisp[threadID]);
287         }
288         dcsc->cp[localnzc] = nTuples;
289 
290         delete [] temp_jc;
291         delete [] temp_cp;
292 
293 #ifdef _OPENMP
294 #pragma omp parallel for schedule (static)
295 #endif
296         for(IT i=0; i<nTuples; ++i)
297         {
298             dcsc->ir[i]  = std::get<0>(tuples[i]);
299             dcsc->numx[i] = std::get<2>(tuples[i]);
300         }
301      }
302 
303     if(transpose) Transpose(); // this is not efficient, think to improve later. We included this parameter anyway to make this constructor different from another constracttor when the fourth argument is passed as 0.
304 }
305 
306 
307 
308 /*
309 template <class IT, class NT>
310 SpDCCols<IT,NT>::SpDCCols(IT nRow, IT nCol, IT nTuples, const tuple<IT, IT, NT>*  tuples)
311 : m(nRow), n(nCol), nnz(nTuples), splits(0)
312 {
313 
314     if(nnz == 0)	// m by n matrix of complete zeros
315     {
316         dcsc = NULL;
317     }
318     else
319     {
320         IT localnzc = 1;
321 #pragma omp parallel for schedule (static) default(shared) reduction(+:localnzc)
322         for(IT i=1; i<nTuples; ++i) // not scaling well, try my own version
323         {
324             if(std::get<1>(tuples[i]) != std::get<1>(tuples[i-1]))
325             {
326                 ++localnzc;
327             }
328         }
329 
330         dcsc = new Dcsc<IT,NT>(nTuples,localnzc);
331         dcsc->jc[0]  = std::get<1>(tuples[0]);
332         dcsc->cp[0] = 0;
333 
334 #pragma omp parallel for schedule (static)
335         for(IT i=0; i<nTuples; ++i)
336         {
337             dcsc->ir[i]  = std::get<0>(tuples[i]);
338             dcsc->numx[i] = std::get<2>(tuples[i]);
339         }
340 
341         IT jspos = 1;
342         for(IT i=1; i<nTuples; ++i) // now this loop
343         {
344             if(std::get<1>(tuples[i]) != dcsc->jc[jspos-1])
345             {
346                 dcsc->jc[jspos] = std::get<1>(tuples[i]);
347                 dcsc->cp[jspos++] = i;
348             }
349         }
350         dcsc->cp[jspos] = nTuples;
351     }
352 }
353 */
354 
355 /****************************************************************************/
356 /************************** PUBLIC OPERATORS ********************************/
357 /****************************************************************************/
358 
359 /**
360  * The assignment operator operates on an existing object
361  * The assignment operator is the only operator that is not inherited.
362  * But there is no need to call base's assigment operator as it has no data members
363  */
364 template <class IT, class NT>
operator =(const SpDCCols<IT,NT> & rhs)365 SpDCCols<IT,NT> & SpDCCols<IT,NT>::operator=(const SpDCCols<IT,NT> & rhs)
366 {
367 	// this pointer stores the address of the class instance
368 	// check for self assignment using address comparison
369 	if(this != &rhs)
370 	{
371 		if(dcsc != NULL && nnz > 0)
372 		{
373 			delete dcsc;
374 		}
375 		if(rhs.dcsc != NULL)
376 		{
377 			dcsc = new Dcsc<IT,NT>(*(rhs.dcsc));
378 			nnz = rhs.nnz;
379 		}
380 		else
381 		{
382 			dcsc = NULL;
383 			nnz = 0;
384 		}
385 
386 		m = rhs.m;
387 		n = rhs.n;
388 		splits = rhs.splits;
389 	}
390 	return *this;
391 }
392 
393 template <class IT, class NT>
operator +=(const SpDCCols<IT,NT> & rhs)394 SpDCCols<IT,NT> & SpDCCols<IT,NT>::operator+= (const SpDCCols<IT,NT> & rhs)
395 {
396 	// this pointer stores the address of the class instance
397 	// check for self assignment using address comparison
398 	if(this != &rhs)
399 	{
400 		if(m == rhs.m && n == rhs.n)
401 		{
402 			if(rhs.nnz == 0)
403 			{
404 				return *this;
405 			}
406 			else if(nnz == 0)
407 			{
408 				dcsc = new Dcsc<IT,NT>(*(rhs.dcsc));
409 				nnz = dcsc->nz;
410 			}
411 			else
412 			{
413 				(*dcsc) += (*(rhs.dcsc));
414 				nnz = dcsc->nz;
415 			}
416 		}
417 		else
418 		{
419 			std::cout<< "Not addable: " << m  << "!=" << rhs.m << " or " << n << "!=" << rhs.n <<std::endl;
420 		}
421 	}
422 	else
423 	{
424 		std::cout<< "Missing feature (A+A): Use multiply with 2 instead !"<<std::endl;
425 	}
426 	return *this;
427 }
428 
429 template <class IT, class NT>
430 template <typename _UnaryOperation, typename GlobalIT>
PruneI(_UnaryOperation __unary_op,bool inPlace,GlobalIT rowOffset,GlobalIT colOffset)431 SpDCCols<IT,NT>* SpDCCols<IT,NT>::PruneI(_UnaryOperation __unary_op, bool inPlace, GlobalIT rowOffset, GlobalIT colOffset)
432 {
433 	if(nnz > 0)
434 	{
435 		Dcsc<IT,NT>* ret = dcsc->PruneI (__unary_op, inPlace, rowOffset, colOffset);
436 		if (inPlace)
437 		{
438 			nnz = dcsc->nz;
439 
440 			if(nnz == 0)
441 			{
442 				delete dcsc;
443 				dcsc = NULL;
444 			}
445 			return NULL;
446 		}
447 		else
448 		{
449 			// wrap the new pruned Dcsc into a new SpDCCols
450 			SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
451 			retcols->dcsc = ret;
452 			retcols->nnz = retcols->dcsc->nz;
453 			retcols->n = n;
454 			retcols->m = m;
455 			return retcols;
456 		}
457 	}
458 	else
459 	{
460 		if (inPlace)
461 		{
462 			return NULL;
463 		}
464 		else
465 		{
466 			SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
467 			retcols->dcsc = NULL;
468 			retcols->nnz = 0;
469 			retcols->n = n;
470 			retcols->m = m;
471 			return retcols;
472 		}
473 	}
474 }
475 
476 template <class IT, class NT>
477 template <typename _UnaryOperation>
Prune(_UnaryOperation __unary_op,bool inPlace)478 SpDCCols<IT,NT>* SpDCCols<IT,NT>::Prune(_UnaryOperation __unary_op, bool inPlace)
479 {
480 	if(nnz > 0)
481 	{
482 		Dcsc<IT,NT>* ret = dcsc->Prune (__unary_op, inPlace);
483 		if (inPlace)
484 		{
485 			nnz = dcsc->nz;
486 
487 			if(nnz == 0)
488 			{
489 				delete dcsc;
490 				dcsc = NULL;
491 			}
492 			return NULL;
493 		}
494 		else
495 		{
496 			// wrap the new pruned Dcsc into a new SpDCCols
497 			SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
498 			retcols->dcsc = ret;
499 			retcols->nnz = retcols->dcsc->nz;
500 			retcols->n = n;
501 			retcols->m = m;
502 			return retcols;
503 		}
504 	}
505 	else
506 	{
507 		if (inPlace)
508 		{
509 			return NULL;
510 		}
511 		else
512 		{
513 			SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
514 			retcols->dcsc = NULL;
515 			retcols->nnz = 0;
516 			retcols->n = n;
517 			retcols->m = m;
518 			return retcols;
519 		}
520 	}
521 }
522 
523 
524 
525 template <class IT, class NT>
526 template <typename _BinaryOperation>
PruneColumn(NT * pvals,_BinaryOperation __binary_op,bool inPlace)527 SpDCCols<IT,NT>* SpDCCols<IT,NT>::PruneColumn(NT* pvals, _BinaryOperation __binary_op, bool inPlace)
528 {
529     if(nnz > 0)
530     {
531         Dcsc<IT,NT>* ret = dcsc->PruneColumn (pvals, __binary_op, inPlace);
532         if (inPlace)
533         {
534             nnz = dcsc->nz;
535 
536             if(nnz == 0)
537             {
538                 delete dcsc;
539                 dcsc = NULL;
540             }
541             return NULL;
542         }
543         else
544         {
545             // wrap the new pruned Dcsc into a new SpDCCols
546             SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
547             retcols->dcsc = ret;
548             retcols->nnz = retcols->dcsc->nz;
549             retcols->n = n;
550             retcols->m = m;
551             return retcols;
552         }
553     }
554     else
555     {
556         if (inPlace)
557         {
558             return NULL;
559         }
560         else
561         {
562             SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
563             retcols->dcsc = NULL;
564             retcols->nnz = 0;
565             retcols->n = n;
566             retcols->m = m;
567             return retcols;
568         }
569     }
570 }
571 
572 
573 template <class IT, class NT>
574 template <typename _BinaryOperation>
PruneColumn(IT * pinds,NT * pvals,_BinaryOperation __binary_op,bool inPlace)575 SpDCCols<IT,NT>* SpDCCols<IT,NT>::PruneColumn(IT* pinds, NT* pvals, _BinaryOperation __binary_op, bool inPlace)
576 {
577     if(nnz > 0)
578     {
579         Dcsc<IT,NT>* ret = dcsc->PruneColumn (pinds, pvals, __binary_op, inPlace);
580         if (inPlace)
581         {
582             nnz = dcsc->nz;
583 
584             if(nnz == 0)
585             {
586                 delete dcsc;
587                 dcsc = NULL;
588             }
589             return NULL;
590         }
591         else
592         {
593             // wrap the new pruned Dcsc into a new SpDCCols
594             SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
595             retcols->dcsc = ret;
596             retcols->nnz = retcols->dcsc->nz;
597             retcols->n = n;
598             retcols->m = m;
599             return retcols;
600         }
601     }
602     else
603     {
604         if (inPlace)
605         {
606             return NULL;
607         }
608         else
609         {
610             SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
611             retcols->dcsc = NULL;
612             retcols->nnz = 0;
613             retcols->n = n;
614             retcols->m = m;
615             return retcols;
616         }
617     }
618 }
619 
620 
621 
622 template <class IT, class NT>
EWiseMult(const SpDCCols<IT,NT> & rhs,bool exclude)623 void SpDCCols<IT,NT>::EWiseMult (const SpDCCols<IT,NT> & rhs, bool exclude)
624 {
625 	if(this != &rhs)
626 	{
627 		if(m == rhs.m && n == rhs.n)
628 		{
629 			if(rhs.nnz == 0)
630 			{
631 				if(exclude)	// then we don't exclude anything
632 				{
633 					return;
634 				}
635 				else	// A .* zeros() is zeros()
636 				{
637 					*this = SpDCCols<IT,NT>(0,m,n,0);	// completely reset the matrix
638 				}
639 			}
640 			else if (rhs.nnz != 0 && nnz != 0)
641 			{
642 				dcsc->EWiseMult (*(rhs.dcsc), exclude);
643 				nnz = dcsc->nz;
644 				if(nnz == 0 )
645 					dcsc = NULL;
646 			}
647 		}
648 		else
649 		{
650 			std::cout<< "Matrices do not conform for A .* op(B) !"<<std::endl;
651 		}
652 	}
653 	else
654 	{
655 		std::cout<< "Missing feature (A .* A): Use Square_EWise() instead !"<<std::endl;
656 	}
657 }
658 
659 /**
660  * @Pre {scaler should NOT contain any zero entries}
661  */
662 template <class IT, class NT>
EWiseScale(NT ** scaler,IT m_scaler,IT n_scaler)663 void SpDCCols<IT,NT>::EWiseScale(NT ** scaler, IT m_scaler, IT n_scaler)
664 {
665 	if(m == m_scaler && n == n_scaler)
666 	{
667 		if(nnz > 0)
668 			dcsc->EWiseScale (scaler);
669 	}
670 	else
671 	{
672 		std::cout<< "Matrices do not conform for EWiseScale !"<<std::endl;
673 	}
674 }
675 
676 
677 /****************************************************************************/
678 /********************* PUBLIC MEMBER FUNCTIONS ******************************/
679 /****************************************************************************/
680 
681 template <class IT, class NT>
CreateImpl(IT * _cp,IT * _jc,IT * _ir,NT * _numx,IT _nz,IT _nzc,IT _m,IT _n)682 void SpDCCols<IT,NT>::CreateImpl(IT * _cp, IT * _jc, IT * _ir, NT * _numx, IT _nz, IT _nzc, IT _m, IT _n)
683 {
684     m = _m;
685     n = _n;
686     nnz =  _nz;
687 
688     if(nnz > 0)
689         dcsc = new Dcsc<IT,NT>(_cp, _jc, _ir, _numx, _nz, _nzc, false);	// memory not owned by DCSC
690     else
691         dcsc = NULL;
692 }
693 
694 template <class IT, class NT>
CreateImpl(const std::vector<IT> & essentials)695 void SpDCCols<IT,NT>::CreateImpl(const std::vector<IT> & essentials)
696 {
697 	assert(essentials.size() == esscount);
698 	nnz = essentials[0];
699 	m = essentials[1];
700 	n = essentials[2];
701 
702 	if(nnz > 0)
703 		dcsc = new Dcsc<IT,NT>(nnz,essentials[3]);
704 	else
705 		dcsc = NULL;
706 }
707 
708 template <class IT, class NT>
CreateImpl(IT size,IT nRow,IT nCol,std::tuple<IT,IT,NT> * mytuples)709 void SpDCCols<IT,NT>::CreateImpl(IT size, IT nRow, IT nCol, std::tuple<IT, IT, NT> * mytuples)
710 {
711 	SpTuples<IT,NT> tuples(size, nRow, nCol, mytuples);
712 	tuples.SortColBased();
713 
714 #ifdef DEBUG
715   std::pair<IT,IT> rlim = tuples.RowLimits();
716   std::pair<IT,IT> clim = tuples.ColLimits();
717 
718   std::ofstream oput;
719   std::stringstream ss;
720   std::string rank;
721 	int myrank;
722 	MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
723 	ss << myrank;
724 	ss >> rank;
725   std::string ofilename = "Read";
726 	ofilename += rank;
727 	oput.open(ofilename.c_str(), std::ios_base::app );
728 	oput << "Creating of dimensions " << nRow << "-by-" << nCol << " of size: " << size <<
729 			" with row range (" << rlim.first  << "," << rlim.second << ") and column range (" << clim.first  << "," << clim.second << ")" << std::endl;
730 	if(tuples.getnnz() > 0)
731 	{
732 		IT minfr = joker::get<0>(tuples.front());
733 		IT minto = joker::get<1>(tuples.front());
734 		IT maxfr = joker::get<0>(tuples.back());
735 		IT maxto = joker::get<1>(tuples.back());
736 
737 		oput << "Min: " << minfr << ", " << minto << "; Max: " << maxfr << ", " << maxto << std::endl;
738 	}
739 	oput.close();
740 #endif
741 
742 	SpDCCols<IT,NT> object(tuples, false);
743 	*this = object;
744 }
745 
746 
747 template <class IT, class NT>
GetEssentials() const748 std::vector<IT> SpDCCols<IT,NT>::GetEssentials() const
749 {
750 	std::vector<IT> essentials(esscount);
751 	essentials[0] = nnz;
752 	essentials[1] = m;
753 	essentials[2] = n;
754 	essentials[3] = (nnz > 0) ? dcsc->nzc : 0;
755 	return essentials;
756 }
757 
758 template <class IT, class NT>
759 template <typename NNT>
operator SpDCCols<IT,NNT>() const760 SpDCCols<IT,NT>::operator SpDCCols<IT,NNT> () const
761 {
762 	Dcsc<IT,NNT> * convert;
763 	if(nnz > 0)
764 		convert = new Dcsc<IT,NNT>(*dcsc);
765 	else
766 		convert = NULL;
767 
768 	return SpDCCols<IT,NNT>(m, n, convert);
769 }
770 
771 
772 template <class IT, class NT>
773 template <typename NIT, typename NNT>
operator SpDCCols<NIT,NNT>() const774 SpDCCols<IT,NT>::operator SpDCCols<NIT,NNT> () const
775 {
776 	Dcsc<NIT,NNT> * convert;
777 	if(nnz > 0)
778 		convert = new Dcsc<NIT,NNT>(*dcsc);
779 	else
780 		convert = NULL;
781 
782 	return SpDCCols<NIT,NNT>(m, n, convert);
783 }
784 
785 
786 template <class IT, class NT>
GetArrays() const787 Arr<IT,NT> SpDCCols<IT,NT>::GetArrays() const
788 {
789 	Arr<IT,NT> arr(3,1);
790 
791 	if(nnz > 0)
792 	{
793 		arr.indarrs[0] = LocArr<IT,IT>(dcsc->cp, dcsc->nzc+1);
794 		arr.indarrs[1] = LocArr<IT,IT>(dcsc->jc, dcsc->nzc);
795 		arr.indarrs[2] = LocArr<IT,IT>(dcsc->ir, dcsc->nz);
796 		arr.numarrs[0] = LocArr<NT,IT>(dcsc->numx, dcsc->nz);
797 	}
798 	else
799 	{
800 		arr.indarrs[0] = LocArr<IT,IT>(NULL, 0);
801 		arr.indarrs[1] = LocArr<IT,IT>(NULL, 0);
802 		arr.indarrs[2] = LocArr<IT,IT>(NULL, 0);
803 		arr.numarrs[0] = LocArr<NT,IT>(NULL, 0);
804 
805 	}
806 	return arr;
807 }
808 
809 /**
810   * O(nnz log(nnz)) time Transpose function
811   * \remarks Performs a lexicographical sort
812   * \remarks Mutator function (replaces the calling object with its transpose)
813   */
814 template <class IT, class NT>
Transpose()815 void SpDCCols<IT,NT>::Transpose()
816 {
817 	if(nnz > 0)
818 	{
819 		SpTuples<IT,NT> Atuples(*this);
820 		Atuples.SortRowBased();
821 
822 		// destruction of (*this) is handled by the assignment operator
823 		*this = SpDCCols<IT,NT>(Atuples,true);
824 	}
825 	else
826 	{
827 		*this = SpDCCols<IT,NT>(0, n, m, 0);
828 	}
829 }
830 
831 
832 /**
833   * O(nnz log(nnz)) time Transpose function
834   * \remarks Performs a lexicographical sort
835   * \remarks Const function (doesn't mutate the calling object)
836   */
837 template <class IT, class NT>
TransposeConst() const838 SpDCCols<IT,NT> SpDCCols<IT,NT>::TransposeConst() const
839 {
840 	SpTuples<IT,NT> Atuples(*this);
841 	Atuples.SortRowBased();
842 
843 	return SpDCCols<IT,NT>(Atuples,true);
844 }
845 
846 /**
847  * O(nnz log(nnz)) time Transpose function
848  * \remarks Performs a lexicographical sort
849  * \remarks Const function (doesn't mutate the calling object)
850  */
851 template <class IT, class NT>
TransposeConstPtr() const852 SpDCCols<IT,NT> * SpDCCols<IT,NT>::TransposeConstPtr() const
853 {
854 	SpTuples<IT,NT> Atuples(*this);
855 	Atuples.SortRowBased();
856 
857 	return new SpDCCols<IT,NT>(Atuples,true);
858 }
859 
860 /**
861   * Splits the matrix into two parts, simply by cutting along the columns
862   * Simple algorithm that doesn't intend to split perfectly, but it should do a pretty good job
863   * Practically destructs the calling object also (frees most of its memory)
864   * \todo {special case of ColSplit, to be deprecated...}
865   */
866 template <class IT, class NT>
Split(SpDCCols<IT,NT> & partA,SpDCCols<IT,NT> & partB)867 void SpDCCols<IT,NT>::Split(SpDCCols<IT,NT> & partA, SpDCCols<IT,NT> & partB)
868 {
869 	IT cut = n/2;
870 	if(cut == 0)
871 	{
872 		std::cout<< "Matrix is too small to be splitted" << std::endl;
873 		return;
874 	}
875 
876 	Dcsc<IT,NT> *Adcsc = NULL;
877 	Dcsc<IT,NT> *Bdcsc = NULL;
878 
879 	if(nnz != 0)
880 	{
881 		dcsc->Split(Adcsc, Bdcsc, cut);
882 	}
883 
884 	partA = SpDCCols<IT,NT> (m, cut, Adcsc);
885 	partB = SpDCCols<IT,NT> (m, n-cut, Bdcsc);
886 
887 	// handle destruction through assignment operator
888 	*this = SpDCCols<IT, NT>();
889 }
890 
891 /**
892  * Splits the matrix into "parts", simply by cutting along the columns
893  * Simple algorithm that doesn't intend to split perfectly, but it should do a pretty good job
894  * Practically destructs the calling object also (frees most of its memory)
895  */
896 template <class IT, class NT>
ColSplit(int parts,std::vector<SpDCCols<IT,NT>> & matrices)897 void SpDCCols<IT,NT>::ColSplit(int parts, std::vector< SpDCCols<IT,NT> > & matrices)
898 {
899     if(parts < 2)
900     {
901         matrices.emplace_back(*this);
902     }
903     else
904     {
905         std::vector<IT> cuts(parts-1);
906         for(int i=0; i< (parts-1); ++i)
907         {
908             cuts[i] = (i+1) * (n/parts);
909         }
910         if(n < parts)
911         {
912             std::cout<< "Matrix is too small to be splitted" << std::endl;
913             return;
914         }
915         std::vector< Dcsc<IT,NT> * > dcscs(parts, NULL);
916 
917         if(nnz != 0)
918         {
919             dcsc->ColSplit(dcscs, cuts);
920         }
921 
922         for(int i=0; i< (parts-1); ++i)
923         {
924             SpDCCols<IT,NT> matrix = SpDCCols<IT,NT>(m, (n/parts), dcscs[i]);
925             matrices.emplace_back(matrix);
926         }
927         SpDCCols<IT,NT> matrix = SpDCCols<IT,NT>(m, n-cuts[parts-2], dcscs[parts-1]);
928         matrices.emplace_back(matrix);
929     }
930     *this = SpDCCols<IT, NT>();		    // handle destruction through assignment operator
931 }
932 
933 
934 /**
935  * Concatenates (merges) multiple matrices (cut along the columns) into 1 piece
936  * ColSplit() method should have been executed on the object beforehand
937  */
938 template <class IT, class NT>
ColConcatenate(std::vector<SpDCCols<IT,NT>> & matrices)939 void SpDCCols<IT,NT>::ColConcatenate(std::vector< SpDCCols<IT,NT> > & matrices)
940 {
941     std::vector< SpDCCols<IT,NT> * > nonempties;
942     std::vector< Dcsc<IT,NT> * > dcscs;
943     std::vector< IT > offsets;
944     IT runningoffset = 0;
945 
946     for(size_t i=0; i< matrices.size(); ++i)
947     {
948         if(matrices[i].nnz != 0)
949         {
950             nonempties.push_back(&(matrices[i]));
951             dcscs.push_back(matrices[i].dcsc);
952             offsets.push_back(runningoffset);
953         }
954         runningoffset += matrices[i].n;
955     }
956 
957     if(nonempties.size() < 1)
958     {
959 #ifdef DEBUG
960         std::cout << "Nothing to ColConcatenate" << std::endl;
961 #endif
962         n = runningoffset;
963     }
964     else if(nonempties.size() < 2)
965     {
966         *this =  *(nonempties[0]);
967         n = runningoffset;
968     }
969     else // nonempties.size() > 1
970     {
971         Dcsc<IT,NT> * Cdcsc = new Dcsc<IT,NT>();
972         Cdcsc->ColConcatenate(dcscs, offsets);
973         *this = SpDCCols<IT,NT> (nonempties[0]->m, runningoffset, Cdcsc);
974     }
975 
976     // destruct parameters
977     for(size_t i=0; i< matrices.size(); ++i)
978     {
979         matrices[i] = SpDCCols<IT,NT>();
980     }
981 }
982 
983 
984 
985 /**
986   * Merges two matrices (cut along the columns) into 1 piece
987   * Split method should have been executed on the object beforehand
988  **/
989 template <class IT, class NT>
Merge(SpDCCols<IT,NT> & partA,SpDCCols<IT,NT> & partB)990 void SpDCCols<IT,NT>::Merge(SpDCCols<IT,NT> & partA, SpDCCols<IT,NT> & partB)
991 {
992 	assert( partA.m == partB.m );
993 
994 	Dcsc<IT,NT> * Cdcsc = new Dcsc<IT,NT>();
995 
996 	if(partA.nnz == 0 && partB.nnz == 0)
997 	{
998 		Cdcsc = NULL;
999 	}
1000 	else if(partA.nnz == 0)
1001 	{
1002 		Cdcsc = new Dcsc<IT,NT>(*(partB.dcsc));
1003     std::transform(Cdcsc->jc, Cdcsc->jc + Cdcsc->nzc, Cdcsc->jc, std::bind2nd(std::plus<IT>(), partA.n));
1004 	}
1005 	else if(partB.nnz == 0)
1006 	{
1007 		Cdcsc = new Dcsc<IT,NT>(*(partA.dcsc));
1008 	}
1009 	else
1010 	{
1011 		Cdcsc->Merge(partA.dcsc, partB.dcsc, partA.n);
1012 	}
1013 	*this = SpDCCols<IT,NT> (partA.m, partA.n + partB.n, Cdcsc);
1014 
1015 	partA = SpDCCols<IT, NT>();
1016 	partB = SpDCCols<IT, NT>();
1017 }
1018 
1019 /**
1020  * C += A*B' (Using OuterProduct Algorithm)
1021  * This version is currently limited to multiplication of matrices with the same precision
1022  * (e.g. it can't multiply double-precision matrices with booleans)
1023  * The multiplication is on the specified semiring (passed as parameter)
1024  */
1025 template <class IT, class NT>
1026 template <class SR>
PlusEq_AnXBt(const SpDCCols<IT,NT> & A,const SpDCCols<IT,NT> & B)1027 int SpDCCols<IT,NT>::PlusEq_AnXBt(const SpDCCols<IT,NT> & A, const SpDCCols<IT,NT> & B)
1028 {
1029 	if(A.isZero() || B.isZero())
1030 	{
1031 		return -1;	// no need to do anything
1032 	}
1033 	Isect<IT> *isect1, *isect2, *itr1, *itr2, *cols, *rows;
1034 	SpHelper::SpIntersect(*(A.dcsc), *(B.dcsc), cols, rows, isect1, isect2, itr1, itr2);
1035 
1036 	IT kisect = static_cast<IT>(itr1-isect1);		// size of the intersection ((itr1-isect1) == (itr2-isect2))
1037 	if(kisect == 0)
1038 	{
1039 		DeleteAll(isect1, isect2, cols, rows);
1040 		return -1;
1041 	}
1042 
1043 	StackEntry< NT, std::pair<IT,IT> > * multstack;
1044 	IT cnz = SpHelper::SpCartesian< SR > (*(A.dcsc), *(B.dcsc), kisect, isect1, isect2, multstack);
1045 	DeleteAll(isect1, isect2, cols, rows);
1046 
1047 	IT mdim = A.m;
1048 	IT ndim = B.m;		// since B has already been transposed
1049 	if(isZero())
1050 	{
1051 		dcsc = new Dcsc<IT,NT>(multstack, mdim, ndim, cnz);
1052 	}
1053 	else
1054 	{
1055 		dcsc->AddAndAssign(multstack, mdim, ndim, cnz);
1056 	}
1057 	nnz = dcsc->nz;
1058 
1059 	delete [] multstack;
1060 	return 1;
1061 }
1062 
1063 /**
1064  * C += A*B (Using ColByCol Algorithm)
1065  * This version is currently limited to multiplication of matrices with the same precision
1066  * (e.g. it can't multiply double-precision matrices with booleans)
1067  * The multiplication is on the specified semiring (passed as parameter)
1068  */
1069 template <class IT, class NT>
1070 template <typename SR>
PlusEq_AnXBn(const SpDCCols<IT,NT> & A,const SpDCCols<IT,NT> & B)1071 int SpDCCols<IT,NT>::PlusEq_AnXBn(const SpDCCols<IT,NT> & A, const SpDCCols<IT,NT> & B)
1072 {
1073 	if(A.isZero() || B.isZero())
1074 	{
1075 		return -1;	// no need to do anything
1076 	}
1077 	StackEntry< NT, std::pair<IT,IT> > * multstack;
1078 	int cnz = SpHelper::SpColByCol< SR > (*(A.dcsc), *(B.dcsc), A.n, multstack);
1079 
1080 	IT mdim = A.m;
1081 	IT ndim = B.n;
1082 	if(isZero())
1083 	{
1084 		dcsc = new Dcsc<IT,NT>(multstack, mdim, ndim, cnz);
1085 	}
1086 	else
1087 	{
1088 		dcsc->AddAndAssign(multstack, mdim, ndim, cnz);
1089 	}
1090 	nnz = dcsc->nz;
1091 
1092 	delete [] multstack;
1093 	return 1;
1094 }
1095 
1096 
1097 template <class IT, class NT>
1098 template <typename SR>
PlusEq_AtXBn(const SpDCCols<IT,NT> & A,const SpDCCols<IT,NT> & B)1099 int SpDCCols<IT,NT>::PlusEq_AtXBn(const SpDCCols<IT,NT> & A, const SpDCCols<IT,NT> & B)
1100 {
1101 	std::cout << "PlusEq_AtXBn function has not been implemented yet !" << std::endl;
1102 	return 0;
1103 }
1104 
1105 template <class IT, class NT>
1106 template <typename SR>
PlusEq_AtXBt(const SpDCCols<IT,NT> & A,const SpDCCols<IT,NT> & B)1107 int SpDCCols<IT,NT>::PlusEq_AtXBt(const SpDCCols<IT,NT> & A, const SpDCCols<IT,NT> & B)
1108 {
1109 	std::cout << "PlusEq_AtXBt function has not been implemented yet !" << std::endl;
1110 	return 0;
1111 }
1112 
1113 
1114 template <class IT, class NT>
operator ()(IT ri,IT ci) const1115 SpDCCols<IT,NT> SpDCCols<IT,NT>::operator() (IT ri, IT ci) const
1116 {
1117 	IT * itr = std::find(dcsc->jc, dcsc->jc + dcsc->nzc, ci);
1118 	if(itr != dcsc->jc + dcsc->nzc)
1119 	{
1120 		IT irbeg = dcsc->cp[itr - dcsc->jc];
1121 		IT irend = dcsc->cp[itr - dcsc->jc + 1];
1122 
1123 		IT * ele = std::find(dcsc->ir + irbeg, dcsc->ir + irend, ri);
1124 		if(ele != dcsc->ir + irend)
1125 		{
1126 			SpDCCols<IT,NT> SingEleMat(1, 1, 1, 1);	// 1-by-1 matrix with 1 nonzero
1127 			*(SingEleMat.dcsc->numx) = dcsc->numx[ele - dcsc->ir];
1128 			*(SingEleMat.dcsc->ir) = *ele;
1129 			*(SingEleMat.dcsc->jc) = *itr;
1130 			(SingEleMat.dcsc->cp)[0] = 0;
1131 			(SingEleMat.dcsc->cp)[1] = 1;
1132             return SingEleMat;
1133 		}
1134 		else
1135 		{
1136 			return SpDCCols<IT,NT>();  // 0-by-0 empty matrix
1137 		}
1138 	}
1139 	else
1140 	{
1141 		return SpDCCols<IT,NT>();	 // 0-by-0 empty matrix
1142 	}
1143 }
1144 
1145 /**
1146  * The almighty indexing polyalgorithm
1147  * Calls different subroutines depending the sparseness of ri/ci
1148  */
1149 template <class IT, class NT>
operator ()(const std::vector<IT> & ri,const std::vector<IT> & ci) const1150 SpDCCols<IT,NT> SpDCCols<IT,NT>::operator() (const std::vector<IT> & ri, const std::vector<IT> & ci) const
1151 {
1152 	typedef PlusTimesSRing<NT,NT> PT;
1153 
1154 	IT rsize = ri.size();
1155 	IT csize = ci.size();
1156 
1157 	if(rsize == 0 && csize == 0)
1158 	{
1159 		// return an m x n matrix of complete zeros
1160 		// since we don't know whether columns or rows are indexed
1161 		return SpDCCols<IT,NT> (0, m, n, 0);
1162 	}
1163 	else if(rsize == 0)
1164 	{
1165 		return ColIndex(ci);
1166 	}
1167 	else if(csize == 0)
1168 	{
1169 		SpDCCols<IT,NT> LeftMatrix(rsize, rsize, this->m, ri, true);
1170 		return LeftMatrix.OrdColByCol< PT >(*this);
1171 	}
1172 	else	// this handles the (rsize=1 && csize=1) case as well
1173 	{
1174 		SpDCCols<IT,NT> LeftMatrix(rsize, rsize, this->m, ri, true);
1175 		SpDCCols<IT,NT> RightMatrix(csize, this->n, csize, ci, false);
1176 		return LeftMatrix.OrdColByCol< PT >( OrdColByCol< PT >(RightMatrix) );
1177 	}
1178 }
1179 
1180 template <class IT, class NT>
put(std::ofstream & outfile) const1181 std::ofstream & SpDCCols<IT,NT>::put(std::ofstream & outfile) const
1182 {
1183 	if(nnz == 0)
1184 	{
1185 		outfile << "Matrix doesn't have any nonzeros" <<std::endl;
1186 		return outfile;
1187 	}
1188 	SpTuples<IT,NT> tuples(*this);
1189 	outfile << tuples << std::endl;
1190 	return outfile;
1191 }
1192 
1193 
1194 template <class IT, class NT>
get(std::ifstream & infile)1195 std::ifstream & SpDCCols<IT,NT>::get(std::ifstream & infile)
1196 {
1197 	std::cout << "Getting... SpDCCols" << std::endl;
1198 	IT m, n, nnz;
1199 	infile >> m >> n >> nnz;
1200 	SpTuples<IT,NT> tuples(nnz, m, n);
1201 	infile >> tuples;
1202 	tuples.SortColBased();
1203 
1204 	SpDCCols<IT,NT> object(tuples, false);
1205 	*this = object;
1206 	return infile;
1207 }
1208 
1209 
1210 template<class IT, class NT>
PrintInfo(std::ofstream & out) const1211 void SpDCCols<IT,NT>::PrintInfo(std::ofstream &  out) const
1212 {
1213 	out << "m: " << m ;
1214 	out << ", n: " << n ;
1215 	out << ", nnz: "<< nnz ;
1216 
1217 	if(splits > 0)
1218 	{
1219 		out << ", local splits: " << splits << std::endl;
1220 	}
1221 	else
1222 	{
1223 		if(dcsc != NULL)
1224 		{
1225 			out << ", nzc: "<< dcsc->nzc << std::endl;
1226 		}
1227 		else
1228 		{
1229 			out <<", nzc: "<< 0 << std::endl;
1230 		}
1231 	}
1232 }
1233 
1234 template<class IT, class NT>
PrintInfo() const1235 void SpDCCols<IT,NT>::PrintInfo() const
1236 {
1237 	std::cout << "m: " << m ;
1238 	std::cout << ", n: " << n ;
1239 	std::cout << ", nnz: "<< nnz ;
1240 
1241 	if(splits > 0)
1242 	{
1243 		std::cout << ", local splits: " << splits << std::endl;
1244 	}
1245 	else
1246 	{
1247 		if(dcsc != NULL)
1248 		{
1249 			std::cout << ", nzc: "<< dcsc->nzc << std::endl;
1250 		}
1251 		else
1252 		{
1253 			std::cout <<", nzc: "<< 0 << std::endl;
1254 		}
1255 
1256 		if(m < PRINT_LIMIT && n < PRINT_LIMIT)	// small enough to print
1257 		{
1258 			NT ** A = SpHelper::allocate2D<NT>(m,n);
1259 			for(IT i=0; i< m; ++i)
1260 				for(IT j=0; j<n; ++j)
1261 					A[i][j] = NT();
1262 			if(dcsc != NULL)
1263 			{
1264 				for(IT i=0; i< dcsc->nzc; ++i)
1265 				{
1266 					for(IT j = dcsc->cp[i]; j<dcsc->cp[i+1]; ++j)
1267 					{
1268 						IT colid = dcsc->jc[i];
1269 						IT rowid = dcsc->ir[j];
1270 						A[rowid][colid] = dcsc->numx[j];
1271 					}
1272 				}
1273 			}
1274 			for(IT i=0; i< m; ++i)
1275 			{
1276 				for(IT j=0; j<n; ++j)
1277 				{
1278 					std::cout << std::setiosflags(std::ios::fixed) << std::setprecision(2) << A[i][j];
1279 					std::cout << " ";
1280 				}
1281 				std::cout << std::endl;
1282 			}
1283 			SpHelper::deallocate2D(A,m);
1284 		}
1285 	}
1286 }
1287 
1288 
1289 /****************************************************************************/
1290 /********************* PRIVATE CONSTRUCTORS/DESTRUCTORS *********************/
1291 /****************************************************************************/
1292 
1293 //! Construct SpDCCols from Dcsc
1294 template <class IT, class NT>
SpDCCols(IT nRow,IT nCol,Dcsc<IT,NT> * mydcsc)1295 SpDCCols<IT,NT>::SpDCCols(IT nRow, IT nCol, Dcsc<IT,NT> * mydcsc)
1296 :dcsc(mydcsc), m(nRow), n(nCol), splits(0)
1297 {
1298 	if (mydcsc == NULL)
1299 		nnz = 0;
1300 	else
1301 		nnz = mydcsc->nz;
1302 }
1303 
1304 //! Create a logical matrix from (row/column) indices array, used for indexing only
1305 template <class IT, class NT>
SpDCCols(IT size,IT nRow,IT nCol,const std::vector<IT> & indices,bool isRow)1306 SpDCCols<IT,NT>::SpDCCols (IT size, IT nRow, IT nCol, const std::vector<IT> & indices, bool isRow)
1307 :m(nRow), n(nCol), nnz(size), splits(0)
1308 {
1309 	if(size > 0)
1310 		dcsc = new Dcsc<IT,NT>(size,indices,isRow);
1311 	else
1312 		dcsc = NULL;
1313 }
1314 
1315 
1316 /****************************************************************************/
1317 /************************* PRIVATE MEMBER FUNCTIONS *************************/
1318 /****************************************************************************/
1319 
1320 template <class IT, class NT>
CopyDcsc(Dcsc<IT,NT> * source)1321 inline void SpDCCols<IT,NT>::CopyDcsc(Dcsc<IT,NT> * source)
1322 {
1323 	// source dcsc will be NULL if number of nonzeros is zero
1324 	if(source != NULL)
1325 		dcsc = new Dcsc<IT,NT>(*source);
1326 	else
1327 		dcsc = NULL;
1328 }
1329 
1330 /**
1331  * \return An indexed SpDCCols object without using multiplication
1332  * \pre ci is sorted and is not completely empty.
1333  * \remarks it is OK for some indices ci[i] to be empty in the indexed SpDCCols matrix
1334  *	[i.e. in the output, nzc does not need to be equal to n]
1335  */
1336 template <class IT, class NT>
ColIndex(const std::vector<IT> & ci) const1337 SpDCCols<IT,NT> SpDCCols<IT,NT>::ColIndex(const std::vector<IT> & ci) const
1338 {
1339 	IT csize = ci.size();
1340 	if(nnz == 0)	// nothing to index
1341 	{
1342 		return SpDCCols<IT,NT>(0, m, csize, 0);
1343 	}
1344 	else if(ci.empty())
1345 	{
1346 		return SpDCCols<IT,NT>(0, m,0, 0);
1347 	}
1348 
1349 	// First pass for estimation
1350 	IT estsize = 0;
1351 	IT estnzc = 0;
1352 	for(IT i=0, j=0;  i< dcsc->nzc && j < csize;)
1353 	{
1354 		if((dcsc->jc)[i] < ci[j])
1355 		{
1356 			++i;
1357 		}
1358 		else if ((dcsc->jc)[i] > ci[j])
1359 		{
1360 			++j;
1361 		}
1362 		else
1363 		{
1364 			estsize +=  (dcsc->cp)[i+1] - (dcsc->cp)[i];
1365 			++estnzc;
1366 			++i;
1367 			++j;
1368 		}
1369 	}
1370 
1371 	SpDCCols<IT,NT> SubA(estsize, m, csize, estnzc);
1372 	if(estnzc == 0)
1373 	{
1374 		return SubA;		// no need to run the second pass
1375 	}
1376 	SubA.dcsc->cp[0] = 0;
1377 	IT cnzc = 0;
1378 	IT cnz = 0;
1379 	for(IT i=0, j=0;  i < dcsc->nzc && j < csize;)
1380 	{
1381 		if((dcsc->jc)[i] < ci[j])
1382 		{
1383 			++i;
1384 		}
1385 		else if ((dcsc->jc)[i] > ci[j])		// an empty column for the output
1386 		{
1387 			++j;
1388 		}
1389 		else
1390 		{
1391 			IT columncount = (dcsc->cp)[i+1] - (dcsc->cp)[i];
1392 			SubA.dcsc->jc[cnzc++] = j;
1393 			SubA.dcsc->cp[cnzc] = SubA.dcsc->cp[cnzc-1] + columncount;
1394 			std::copy(dcsc->ir + dcsc->cp[i], dcsc->ir + dcsc->cp[i+1], SubA.dcsc->ir + cnz);
1395 			std::copy(dcsc->numx + dcsc->cp[i], dcsc->numx + dcsc->cp[i+1], SubA.dcsc->numx + cnz);
1396 			cnz += columncount;
1397 			++i;
1398 			++j;
1399 		}
1400 	}
1401 	return SubA;
1402 }
1403 
1404 template <class IT, class NT>
1405 template <typename SR, typename NTR>
OrdOutProdMult(const SpDCCols<IT,NTR> & rhs) const1406 SpDCCols< IT, typename promote_trait<NT,NTR>::T_promote > SpDCCols<IT,NT>::OrdOutProdMult(const SpDCCols<IT,NTR> & rhs) const
1407 {
1408 	typedef typename promote_trait<NT,NTR>::T_promote T_promote;
1409 
1410 	if(isZero() || rhs.isZero())
1411 	{
1412 		return SpDCCols< IT, T_promote > (0, m, rhs.n, 0);		// return an empty matrix
1413 	}
1414 	SpDCCols<IT,NTR> Btrans = rhs.TransposeConst();
1415 
1416 	Isect<IT> *isect1, *isect2, *itr1, *itr2, *cols, *rows;
1417 	SpHelper::SpIntersect(*dcsc, *(Btrans.dcsc), cols, rows, isect1, isect2, itr1, itr2);
1418 
1419 	IT kisect = static_cast<IT>(itr1-isect1);		// size of the intersection ((itr1-isect1) == (itr2-isect2))
1420 	if(kisect == 0)
1421 	{
1422 		DeleteAll(isect1, isect2, cols, rows);
1423 		return SpDCCols< IT, T_promote > (0, m, rhs.n, 0);
1424 	}
1425 	StackEntry< T_promote, std::pair<IT,IT> > * multstack;
1426 	IT cnz = SpHelper::SpCartesian< SR > (*dcsc, *(Btrans.dcsc), kisect, isect1, isect2, multstack);
1427 	DeleteAll(isect1, isect2, cols, rows);
1428 
1429 	Dcsc<IT, T_promote> * mydcsc = NULL;
1430 	if(cnz > 0)
1431 	{
1432 		mydcsc = new Dcsc< IT,T_promote > (multstack, m, rhs.n, cnz);
1433 		delete [] multstack;
1434 	}
1435 	return SpDCCols< IT,T_promote > (m, rhs.n, mydcsc);
1436 }
1437 
1438 
1439 template <class IT, class NT>
1440 template <typename SR, typename NTR>
OrdColByCol(const SpDCCols<IT,NTR> & rhs) const1441 SpDCCols< IT, typename promote_trait<NT,NTR>::T_promote > SpDCCols<IT,NT>::OrdColByCol(const SpDCCols<IT,NTR> & rhs) const
1442 {
1443 	typedef typename promote_trait<NT,NTR>::T_promote T_promote;
1444 
1445 	if(isZero() || rhs.isZero())
1446 	{
1447 		return SpDCCols<IT, T_promote> (0, m, rhs.n, 0);		// return an empty matrix
1448 	}
1449 	StackEntry< T_promote, std::pair<IT,IT> > * multstack;
1450 	IT cnz = SpHelper::SpColByCol< SR > (*dcsc, *(rhs.dcsc), n, multstack);
1451 
1452 	Dcsc<IT,T_promote > * mydcsc = NULL;
1453 	if(cnz > 0)
1454 	{
1455 		mydcsc = new Dcsc< IT,T_promote > (multstack, m, rhs.n, cnz);
1456 		delete [] multstack;
1457 	}
1458 	return SpDCCols< IT,T_promote > (m, rhs.n, mydcsc);
1459 }
1460 
1461 }
1462