1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc --------------------------*/
6 /****************************************************************/
7 /*
8 Copyright (c) 2010-2017, The Regents of the University of California
9
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 THE SOFTWARE.
27 */
28
29
30 #include "SpDCCols.h"
31 #include "Deleter.h"
32 #include <algorithm>
33 #include <functional>
34 #include <vector>
35 #include <climits>
36 #include <iomanip>
37 #include <cassert>
38
39 namespace combblas {
40
41 /****************************************************************************/
42 /********************* PUBLIC CONSTRUCTORS/DESTRUCTORS **********************/
43 /****************************************************************************/
44
45 template <class IT, class NT>
46 const IT SpDCCols<IT,NT>::esscount = static_cast<IT>(4);
47
48
49 template <class IT, class NT>
SpDCCols()50 SpDCCols<IT,NT>::SpDCCols():dcsc(NULL), m(0), n(0), nnz(0), splits(0){
51 }
52
53 // Allocate all the space necessary
54 template <class IT, class NT>
SpDCCols(IT size,IT nRow,IT nCol,IT nzc)55 SpDCCols<IT,NT>::SpDCCols(IT size, IT nRow, IT nCol, IT nzc)
56 :m(nRow), n(nCol), nnz(size), splits(0)
57 {
58 if(nnz > 0)
59 dcsc = new Dcsc<IT,NT>(nnz, nzc);
60 else
61 dcsc = NULL;
62 }
63
64 template <class IT, class NT>
~SpDCCols()65 SpDCCols<IT,NT>::~SpDCCols()
66 {
67 if(nnz > 0)
68 {
69 if(dcsc != NULL)
70 {
71 if(splits > 0)
72 {
73 for(int i=0; i<splits; ++i)
74 delete dcscarr[i];
75 delete [] dcscarr;
76 }
77 else
78 {
79 delete dcsc;
80 }
81 }
82 }
83 }
84
85 // Copy constructor (constructs a new object. i.e. this is NEVER called on an existing object)
86 // Derived's copy constructor can safely call Base's default constructor as base has no data members
87 template <class IT, class NT>
SpDCCols(const SpDCCols<IT,NT> & rhs)88 SpDCCols<IT,NT>::SpDCCols(const SpDCCols<IT,NT> & rhs)
89 : m(rhs.m), n(rhs.n), nnz(rhs.nnz), splits(rhs.splits)
90 {
91 if(splits > 0)
92 {
93 for(int i=0; i<splits; ++i)
94 CopyDcsc(rhs.dcscarr[i]);
95 }
96 else
97 {
98 CopyDcsc(rhs.dcsc);
99 }
100 }
101
102 /**
103 * Constructor for converting SpTuples matrix -> SpDCCols (may use a private memory heap)
104 * @param[in] rhs if transpose=true,
105 * \n then rhs is assumed to be a row sorted SpTuples object
106 * \n else rhs is assumed to be a column sorted SpTuples object
107 **/
108 template <class IT, class NT>
SpDCCols(const SpTuples<IT,NT> & rhs,bool transpose)109 SpDCCols<IT,NT>::SpDCCols(const SpTuples<IT, NT> & rhs, bool transpose)
110 : m(rhs.m), n(rhs.n), nnz(rhs.nnz), splits(0)
111 {
112
113 if(nnz == 0) // m by n matrix of complete zeros
114 {
115 if(transpose) std::swap(m,n);
116 dcsc = NULL;
117 }
118 else
119 {
120 if(transpose)
121 {
122 std::swap(m,n);
123 IT localnzc = 1;
124 for(IT i=1; i< rhs.nnz; ++i)
125 {
126 if(rhs.rowindex(i) != rhs.rowindex(i-1))
127 {
128 ++localnzc;
129 }
130 }
131 dcsc = new Dcsc<IT,NT>(rhs.nnz,localnzc);
132 dcsc->jc[0] = rhs.rowindex(0);
133 dcsc->cp[0] = 0;
134
135 for(IT i=0; i<rhs.nnz; ++i)
136 {
137 dcsc->ir[i] = rhs.colindex(i); // copy rhs.jc to ir since this transpose=true
138 dcsc->numx[i] = rhs.numvalue(i);
139 }
140
141 IT jspos = 1;
142 for(IT i=1; i<rhs.nnz; ++i)
143 {
144 if(rhs.rowindex(i) != dcsc->jc[jspos-1])
145 {
146 dcsc->jc[jspos] = rhs.rowindex(i); // copy rhs.ir to jc since this transpose=true
147 dcsc->cp[jspos++] = i;
148 }
149 }
150 dcsc->cp[jspos] = rhs.nnz;
151 }
152 else
153 {
154 IT localnzc = 1;
155 for(IT i=1; i<rhs.nnz; ++i)
156 {
157 if(rhs.colindex(i) != rhs.colindex(i-1))
158 {
159 ++localnzc;
160 }
161 }
162 dcsc = new Dcsc<IT,NT>(rhs.nnz,localnzc);
163 dcsc->jc[0] = rhs.colindex(0);
164 dcsc->cp[0] = 0;
165
166 for(IT i=0; i<rhs.nnz; ++i)
167 {
168 dcsc->ir[i] = rhs.rowindex(i); // copy rhs.ir to ir since this transpose=false
169 dcsc->numx[i] = rhs.numvalue(i);
170 }
171
172 IT jspos = 1;
173 for(IT i=1; i<rhs.nnz; ++i)
174 {
175 if(rhs.colindex(i) != dcsc->jc[jspos-1])
176 {
177 dcsc->jc[jspos] = rhs.colindex(i); // copy rhs.jc to jc since this transpose=true
178 dcsc->cp[jspos++] = i;
179 }
180 }
181 dcsc->cp[jspos] = rhs.nnz;
182 }
183 }
184 }
185
186
187
188
189 /**
190 * Multithreaded Constructor for converting tuples matrix -> SpDCCols
191 * @param[in] rhs if transpose=true,
192 * \n then tuples is assumed to be a row sorted list of tuple objects
193 * \n else tuples is assumed to be a column sorted list of tuple objects
194 **/
195
196
197 template <class IT, class NT>
SpDCCols(IT nRow,IT nCol,IT nTuples,const std::tuple<IT,IT,NT> * tuples,bool transpose)198 SpDCCols<IT,NT>::SpDCCols(IT nRow, IT nCol, IT nTuples, const std::tuple<IT, IT, NT>* tuples, bool transpose)
199 : m(nRow), n(nCol), nnz(nTuples), splits(0)
200 {
201
202 if(nnz == 0) // m by n matrix of complete zeros
203 {
204 dcsc = NULL;
205 }
206 else
207 {
208 int totThreads=1;
209 #ifdef _OPENMP
210 #pragma omp parallel
211 {
212 totThreads = omp_get_num_threads();
213 }
214 #endif
215
216 std::vector <IT> tstart(totThreads);
217 std::vector <IT> tend(totThreads);
218 std::vector <IT> tdisp(totThreads+1);
219
220 // extra memory, but replaces an O(nnz) loop by an O(nzc) loop
221 IT* temp_jc = new IT[nTuples];
222 IT* temp_cp = new IT[nTuples];
223
224 #ifdef _OPENMP
225 #pragma omp parallel
226 #endif
227 {
228 int threadID = 0;
229 #ifdef _OPENMP
230 threadID = omp_get_thread_num();
231 #endif
232 IT start = threadID * (nTuples / totThreads);
233 IT end = (threadID + 1) * (nTuples / totThreads);
234 if(threadID == (totThreads-1)) end = nTuples;
235 IT curpos=start;
236 if(end>start) // no work for the current thread
237 {
238 temp_jc[start] = std::get<1>(tuples[start]);
239 temp_cp[start] = start;
240 for (IT i = start+1; i < end; ++i)
241 {
242 if(std::get<1>(tuples[i]) != temp_jc[curpos] )
243 {
244 temp_jc[++curpos] = std::get<1>(tuples[i]);
245 temp_cp[curpos] = i;
246 }
247 }
248 }
249
250 tstart[threadID] = start;
251 if(end>start) tend[threadID] = curpos+1;
252 else tend[threadID] = end; // start=end
253 }
254
255
256 // serial part
257 for(int t=totThreads-1; t>0; --t)
258 {
259 if(tend[t] > tstart[t] && tend[t-1] > tstart[t-1])
260 {
261 if(temp_jc[tstart[t]] == temp_jc[tend[t-1]-1])
262 {
263 tstart[t] ++;
264 }
265 }
266 }
267
268 tdisp[0] = 0;
269 for(int t=0; t<totThreads; ++t)
270 {
271 tdisp[t+1] = tdisp[t] + tend[t] - tstart[t];
272 }
273
274 IT localnzc = tdisp[totThreads];
275 dcsc = new Dcsc<IT,NT>(nTuples,localnzc);
276
277 #ifdef _OPENMP
278 #pragma omp parallel
279 #endif
280 {
281 int threadID = 0;
282 #ifdef _OPENMP
283 threadID = omp_get_thread_num();
284 #endif
285 std::copy(temp_jc + tstart[threadID], temp_jc + tend[threadID], dcsc->jc + tdisp[threadID]);
286 std::copy(temp_cp + tstart[threadID], temp_cp + tend[threadID], dcsc->cp + tdisp[threadID]);
287 }
288 dcsc->cp[localnzc] = nTuples;
289
290 delete [] temp_jc;
291 delete [] temp_cp;
292
293 #ifdef _OPENMP
294 #pragma omp parallel for schedule (static)
295 #endif
296 for(IT i=0; i<nTuples; ++i)
297 {
298 dcsc->ir[i] = std::get<0>(tuples[i]);
299 dcsc->numx[i] = std::get<2>(tuples[i]);
300 }
301 }
302
303 if(transpose) Transpose(); // this is not efficient, think to improve later. We included this parameter anyway to make this constructor different from another constracttor when the fourth argument is passed as 0.
304 }
305
306
307
308 /*
309 template <class IT, class NT>
310 SpDCCols<IT,NT>::SpDCCols(IT nRow, IT nCol, IT nTuples, const tuple<IT, IT, NT>* tuples)
311 : m(nRow), n(nCol), nnz(nTuples), splits(0)
312 {
313
314 if(nnz == 0) // m by n matrix of complete zeros
315 {
316 dcsc = NULL;
317 }
318 else
319 {
320 IT localnzc = 1;
321 #pragma omp parallel for schedule (static) default(shared) reduction(+:localnzc)
322 for(IT i=1; i<nTuples; ++i) // not scaling well, try my own version
323 {
324 if(std::get<1>(tuples[i]) != std::get<1>(tuples[i-1]))
325 {
326 ++localnzc;
327 }
328 }
329
330 dcsc = new Dcsc<IT,NT>(nTuples,localnzc);
331 dcsc->jc[0] = std::get<1>(tuples[0]);
332 dcsc->cp[0] = 0;
333
334 #pragma omp parallel for schedule (static)
335 for(IT i=0; i<nTuples; ++i)
336 {
337 dcsc->ir[i] = std::get<0>(tuples[i]);
338 dcsc->numx[i] = std::get<2>(tuples[i]);
339 }
340
341 IT jspos = 1;
342 for(IT i=1; i<nTuples; ++i) // now this loop
343 {
344 if(std::get<1>(tuples[i]) != dcsc->jc[jspos-1])
345 {
346 dcsc->jc[jspos] = std::get<1>(tuples[i]);
347 dcsc->cp[jspos++] = i;
348 }
349 }
350 dcsc->cp[jspos] = nTuples;
351 }
352 }
353 */
354
355 /****************************************************************************/
356 /************************** PUBLIC OPERATORS ********************************/
357 /****************************************************************************/
358
359 /**
360 * The assignment operator operates on an existing object
361 * The assignment operator is the only operator that is not inherited.
362 * But there is no need to call base's assigment operator as it has no data members
363 */
364 template <class IT, class NT>
operator =(const SpDCCols<IT,NT> & rhs)365 SpDCCols<IT,NT> & SpDCCols<IT,NT>::operator=(const SpDCCols<IT,NT> & rhs)
366 {
367 // this pointer stores the address of the class instance
368 // check for self assignment using address comparison
369 if(this != &rhs)
370 {
371 if(dcsc != NULL && nnz > 0)
372 {
373 delete dcsc;
374 }
375 if(rhs.dcsc != NULL)
376 {
377 dcsc = new Dcsc<IT,NT>(*(rhs.dcsc));
378 nnz = rhs.nnz;
379 }
380 else
381 {
382 dcsc = NULL;
383 nnz = 0;
384 }
385
386 m = rhs.m;
387 n = rhs.n;
388 splits = rhs.splits;
389 }
390 return *this;
391 }
392
393 template <class IT, class NT>
operator +=(const SpDCCols<IT,NT> & rhs)394 SpDCCols<IT,NT> & SpDCCols<IT,NT>::operator+= (const SpDCCols<IT,NT> & rhs)
395 {
396 // this pointer stores the address of the class instance
397 // check for self assignment using address comparison
398 if(this != &rhs)
399 {
400 if(m == rhs.m && n == rhs.n)
401 {
402 if(rhs.nnz == 0)
403 {
404 return *this;
405 }
406 else if(nnz == 0)
407 {
408 dcsc = new Dcsc<IT,NT>(*(rhs.dcsc));
409 nnz = dcsc->nz;
410 }
411 else
412 {
413 (*dcsc) += (*(rhs.dcsc));
414 nnz = dcsc->nz;
415 }
416 }
417 else
418 {
419 std::cout<< "Not addable: " << m << "!=" << rhs.m << " or " << n << "!=" << rhs.n <<std::endl;
420 }
421 }
422 else
423 {
424 std::cout<< "Missing feature (A+A): Use multiply with 2 instead !"<<std::endl;
425 }
426 return *this;
427 }
428
429 template <class IT, class NT>
430 template <typename _UnaryOperation, typename GlobalIT>
PruneI(_UnaryOperation __unary_op,bool inPlace,GlobalIT rowOffset,GlobalIT colOffset)431 SpDCCols<IT,NT>* SpDCCols<IT,NT>::PruneI(_UnaryOperation __unary_op, bool inPlace, GlobalIT rowOffset, GlobalIT colOffset)
432 {
433 if(nnz > 0)
434 {
435 Dcsc<IT,NT>* ret = dcsc->PruneI (__unary_op, inPlace, rowOffset, colOffset);
436 if (inPlace)
437 {
438 nnz = dcsc->nz;
439
440 if(nnz == 0)
441 {
442 delete dcsc;
443 dcsc = NULL;
444 }
445 return NULL;
446 }
447 else
448 {
449 // wrap the new pruned Dcsc into a new SpDCCols
450 SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
451 retcols->dcsc = ret;
452 retcols->nnz = retcols->dcsc->nz;
453 retcols->n = n;
454 retcols->m = m;
455 return retcols;
456 }
457 }
458 else
459 {
460 if (inPlace)
461 {
462 return NULL;
463 }
464 else
465 {
466 SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
467 retcols->dcsc = NULL;
468 retcols->nnz = 0;
469 retcols->n = n;
470 retcols->m = m;
471 return retcols;
472 }
473 }
474 }
475
476 template <class IT, class NT>
477 template <typename _UnaryOperation>
Prune(_UnaryOperation __unary_op,bool inPlace)478 SpDCCols<IT,NT>* SpDCCols<IT,NT>::Prune(_UnaryOperation __unary_op, bool inPlace)
479 {
480 if(nnz > 0)
481 {
482 Dcsc<IT,NT>* ret = dcsc->Prune (__unary_op, inPlace);
483 if (inPlace)
484 {
485 nnz = dcsc->nz;
486
487 if(nnz == 0)
488 {
489 delete dcsc;
490 dcsc = NULL;
491 }
492 return NULL;
493 }
494 else
495 {
496 // wrap the new pruned Dcsc into a new SpDCCols
497 SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
498 retcols->dcsc = ret;
499 retcols->nnz = retcols->dcsc->nz;
500 retcols->n = n;
501 retcols->m = m;
502 return retcols;
503 }
504 }
505 else
506 {
507 if (inPlace)
508 {
509 return NULL;
510 }
511 else
512 {
513 SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
514 retcols->dcsc = NULL;
515 retcols->nnz = 0;
516 retcols->n = n;
517 retcols->m = m;
518 return retcols;
519 }
520 }
521 }
522
523
524
525 template <class IT, class NT>
526 template <typename _BinaryOperation>
PruneColumn(NT * pvals,_BinaryOperation __binary_op,bool inPlace)527 SpDCCols<IT,NT>* SpDCCols<IT,NT>::PruneColumn(NT* pvals, _BinaryOperation __binary_op, bool inPlace)
528 {
529 if(nnz > 0)
530 {
531 Dcsc<IT,NT>* ret = dcsc->PruneColumn (pvals, __binary_op, inPlace);
532 if (inPlace)
533 {
534 nnz = dcsc->nz;
535
536 if(nnz == 0)
537 {
538 delete dcsc;
539 dcsc = NULL;
540 }
541 return NULL;
542 }
543 else
544 {
545 // wrap the new pruned Dcsc into a new SpDCCols
546 SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
547 retcols->dcsc = ret;
548 retcols->nnz = retcols->dcsc->nz;
549 retcols->n = n;
550 retcols->m = m;
551 return retcols;
552 }
553 }
554 else
555 {
556 if (inPlace)
557 {
558 return NULL;
559 }
560 else
561 {
562 SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
563 retcols->dcsc = NULL;
564 retcols->nnz = 0;
565 retcols->n = n;
566 retcols->m = m;
567 return retcols;
568 }
569 }
570 }
571
572
573 template <class IT, class NT>
574 template <typename _BinaryOperation>
PruneColumn(IT * pinds,NT * pvals,_BinaryOperation __binary_op,bool inPlace)575 SpDCCols<IT,NT>* SpDCCols<IT,NT>::PruneColumn(IT* pinds, NT* pvals, _BinaryOperation __binary_op, bool inPlace)
576 {
577 if(nnz > 0)
578 {
579 Dcsc<IT,NT>* ret = dcsc->PruneColumn (pinds, pvals, __binary_op, inPlace);
580 if (inPlace)
581 {
582 nnz = dcsc->nz;
583
584 if(nnz == 0)
585 {
586 delete dcsc;
587 dcsc = NULL;
588 }
589 return NULL;
590 }
591 else
592 {
593 // wrap the new pruned Dcsc into a new SpDCCols
594 SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
595 retcols->dcsc = ret;
596 retcols->nnz = retcols->dcsc->nz;
597 retcols->n = n;
598 retcols->m = m;
599 return retcols;
600 }
601 }
602 else
603 {
604 if (inPlace)
605 {
606 return NULL;
607 }
608 else
609 {
610 SpDCCols<IT,NT>* retcols = new SpDCCols<IT, NT>();
611 retcols->dcsc = NULL;
612 retcols->nnz = 0;
613 retcols->n = n;
614 retcols->m = m;
615 return retcols;
616 }
617 }
618 }
619
620
621
622 template <class IT, class NT>
EWiseMult(const SpDCCols<IT,NT> & rhs,bool exclude)623 void SpDCCols<IT,NT>::EWiseMult (const SpDCCols<IT,NT> & rhs, bool exclude)
624 {
625 if(this != &rhs)
626 {
627 if(m == rhs.m && n == rhs.n)
628 {
629 if(rhs.nnz == 0)
630 {
631 if(exclude) // then we don't exclude anything
632 {
633 return;
634 }
635 else // A .* zeros() is zeros()
636 {
637 *this = SpDCCols<IT,NT>(0,m,n,0); // completely reset the matrix
638 }
639 }
640 else if (rhs.nnz != 0 && nnz != 0)
641 {
642 dcsc->EWiseMult (*(rhs.dcsc), exclude);
643 nnz = dcsc->nz;
644 if(nnz == 0 )
645 dcsc = NULL;
646 }
647 }
648 else
649 {
650 std::cout<< "Matrices do not conform for A .* op(B) !"<<std::endl;
651 }
652 }
653 else
654 {
655 std::cout<< "Missing feature (A .* A): Use Square_EWise() instead !"<<std::endl;
656 }
657 }
658
659 /**
660 * @Pre {scaler should NOT contain any zero entries}
661 */
662 template <class IT, class NT>
EWiseScale(NT ** scaler,IT m_scaler,IT n_scaler)663 void SpDCCols<IT,NT>::EWiseScale(NT ** scaler, IT m_scaler, IT n_scaler)
664 {
665 if(m == m_scaler && n == n_scaler)
666 {
667 if(nnz > 0)
668 dcsc->EWiseScale (scaler);
669 }
670 else
671 {
672 std::cout<< "Matrices do not conform for EWiseScale !"<<std::endl;
673 }
674 }
675
676
677 /****************************************************************************/
678 /********************* PUBLIC MEMBER FUNCTIONS ******************************/
679 /****************************************************************************/
680
681 template <class IT, class NT>
CreateImpl(IT * _cp,IT * _jc,IT * _ir,NT * _numx,IT _nz,IT _nzc,IT _m,IT _n)682 void SpDCCols<IT,NT>::CreateImpl(IT * _cp, IT * _jc, IT * _ir, NT * _numx, IT _nz, IT _nzc, IT _m, IT _n)
683 {
684 m = _m;
685 n = _n;
686 nnz = _nz;
687
688 if(nnz > 0)
689 dcsc = new Dcsc<IT,NT>(_cp, _jc, _ir, _numx, _nz, _nzc, false); // memory not owned by DCSC
690 else
691 dcsc = NULL;
692 }
693
694 template <class IT, class NT>
CreateImpl(const std::vector<IT> & essentials)695 void SpDCCols<IT,NT>::CreateImpl(const std::vector<IT> & essentials)
696 {
697 assert(essentials.size() == esscount);
698 nnz = essentials[0];
699 m = essentials[1];
700 n = essentials[2];
701
702 if(nnz > 0)
703 dcsc = new Dcsc<IT,NT>(nnz,essentials[3]);
704 else
705 dcsc = NULL;
706 }
707
708 template <class IT, class NT>
CreateImpl(IT size,IT nRow,IT nCol,std::tuple<IT,IT,NT> * mytuples)709 void SpDCCols<IT,NT>::CreateImpl(IT size, IT nRow, IT nCol, std::tuple<IT, IT, NT> * mytuples)
710 {
711 SpTuples<IT,NT> tuples(size, nRow, nCol, mytuples);
712 tuples.SortColBased();
713
714 #ifdef DEBUG
715 std::pair<IT,IT> rlim = tuples.RowLimits();
716 std::pair<IT,IT> clim = tuples.ColLimits();
717
718 std::ofstream oput;
719 std::stringstream ss;
720 std::string rank;
721 int myrank;
722 MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
723 ss << myrank;
724 ss >> rank;
725 std::string ofilename = "Read";
726 ofilename += rank;
727 oput.open(ofilename.c_str(), std::ios_base::app );
728 oput << "Creating of dimensions " << nRow << "-by-" << nCol << " of size: " << size <<
729 " with row range (" << rlim.first << "," << rlim.second << ") and column range (" << clim.first << "," << clim.second << ")" << std::endl;
730 if(tuples.getnnz() > 0)
731 {
732 IT minfr = joker::get<0>(tuples.front());
733 IT minto = joker::get<1>(tuples.front());
734 IT maxfr = joker::get<0>(tuples.back());
735 IT maxto = joker::get<1>(tuples.back());
736
737 oput << "Min: " << minfr << ", " << minto << "; Max: " << maxfr << ", " << maxto << std::endl;
738 }
739 oput.close();
740 #endif
741
742 SpDCCols<IT,NT> object(tuples, false);
743 *this = object;
744 }
745
746
747 template <class IT, class NT>
GetEssentials() const748 std::vector<IT> SpDCCols<IT,NT>::GetEssentials() const
749 {
750 std::vector<IT> essentials(esscount);
751 essentials[0] = nnz;
752 essentials[1] = m;
753 essentials[2] = n;
754 essentials[3] = (nnz > 0) ? dcsc->nzc : 0;
755 return essentials;
756 }
757
758 template <class IT, class NT>
759 template <typename NNT>
operator SpDCCols<IT,NNT>() const760 SpDCCols<IT,NT>::operator SpDCCols<IT,NNT> () const
761 {
762 Dcsc<IT,NNT> * convert;
763 if(nnz > 0)
764 convert = new Dcsc<IT,NNT>(*dcsc);
765 else
766 convert = NULL;
767
768 return SpDCCols<IT,NNT>(m, n, convert);
769 }
770
771
772 template <class IT, class NT>
773 template <typename NIT, typename NNT>
operator SpDCCols<NIT,NNT>() const774 SpDCCols<IT,NT>::operator SpDCCols<NIT,NNT> () const
775 {
776 Dcsc<NIT,NNT> * convert;
777 if(nnz > 0)
778 convert = new Dcsc<NIT,NNT>(*dcsc);
779 else
780 convert = NULL;
781
782 return SpDCCols<NIT,NNT>(m, n, convert);
783 }
784
785
786 template <class IT, class NT>
GetArrays() const787 Arr<IT,NT> SpDCCols<IT,NT>::GetArrays() const
788 {
789 Arr<IT,NT> arr(3,1);
790
791 if(nnz > 0)
792 {
793 arr.indarrs[0] = LocArr<IT,IT>(dcsc->cp, dcsc->nzc+1);
794 arr.indarrs[1] = LocArr<IT,IT>(dcsc->jc, dcsc->nzc);
795 arr.indarrs[2] = LocArr<IT,IT>(dcsc->ir, dcsc->nz);
796 arr.numarrs[0] = LocArr<NT,IT>(dcsc->numx, dcsc->nz);
797 }
798 else
799 {
800 arr.indarrs[0] = LocArr<IT,IT>(NULL, 0);
801 arr.indarrs[1] = LocArr<IT,IT>(NULL, 0);
802 arr.indarrs[2] = LocArr<IT,IT>(NULL, 0);
803 arr.numarrs[0] = LocArr<NT,IT>(NULL, 0);
804
805 }
806 return arr;
807 }
808
809 /**
810 * O(nnz log(nnz)) time Transpose function
811 * \remarks Performs a lexicographical sort
812 * \remarks Mutator function (replaces the calling object with its transpose)
813 */
814 template <class IT, class NT>
Transpose()815 void SpDCCols<IT,NT>::Transpose()
816 {
817 if(nnz > 0)
818 {
819 SpTuples<IT,NT> Atuples(*this);
820 Atuples.SortRowBased();
821
822 // destruction of (*this) is handled by the assignment operator
823 *this = SpDCCols<IT,NT>(Atuples,true);
824 }
825 else
826 {
827 *this = SpDCCols<IT,NT>(0, n, m, 0);
828 }
829 }
830
831
832 /**
833 * O(nnz log(nnz)) time Transpose function
834 * \remarks Performs a lexicographical sort
835 * \remarks Const function (doesn't mutate the calling object)
836 */
837 template <class IT, class NT>
TransposeConst() const838 SpDCCols<IT,NT> SpDCCols<IT,NT>::TransposeConst() const
839 {
840 SpTuples<IT,NT> Atuples(*this);
841 Atuples.SortRowBased();
842
843 return SpDCCols<IT,NT>(Atuples,true);
844 }
845
846 /**
847 * O(nnz log(nnz)) time Transpose function
848 * \remarks Performs a lexicographical sort
849 * \remarks Const function (doesn't mutate the calling object)
850 */
851 template <class IT, class NT>
TransposeConstPtr() const852 SpDCCols<IT,NT> * SpDCCols<IT,NT>::TransposeConstPtr() const
853 {
854 SpTuples<IT,NT> Atuples(*this);
855 Atuples.SortRowBased();
856
857 return new SpDCCols<IT,NT>(Atuples,true);
858 }
859
860 /**
861 * Splits the matrix into two parts, simply by cutting along the columns
862 * Simple algorithm that doesn't intend to split perfectly, but it should do a pretty good job
863 * Practically destructs the calling object also (frees most of its memory)
864 * \todo {special case of ColSplit, to be deprecated...}
865 */
866 template <class IT, class NT>
Split(SpDCCols<IT,NT> & partA,SpDCCols<IT,NT> & partB)867 void SpDCCols<IT,NT>::Split(SpDCCols<IT,NT> & partA, SpDCCols<IT,NT> & partB)
868 {
869 IT cut = n/2;
870 if(cut == 0)
871 {
872 std::cout<< "Matrix is too small to be splitted" << std::endl;
873 return;
874 }
875
876 Dcsc<IT,NT> *Adcsc = NULL;
877 Dcsc<IT,NT> *Bdcsc = NULL;
878
879 if(nnz != 0)
880 {
881 dcsc->Split(Adcsc, Bdcsc, cut);
882 }
883
884 partA = SpDCCols<IT,NT> (m, cut, Adcsc);
885 partB = SpDCCols<IT,NT> (m, n-cut, Bdcsc);
886
887 // handle destruction through assignment operator
888 *this = SpDCCols<IT, NT>();
889 }
890
891 /**
892 * Splits the matrix into "parts", simply by cutting along the columns
893 * Simple algorithm that doesn't intend to split perfectly, but it should do a pretty good job
894 * Practically destructs the calling object also (frees most of its memory)
895 */
896 template <class IT, class NT>
ColSplit(int parts,std::vector<SpDCCols<IT,NT>> & matrices)897 void SpDCCols<IT,NT>::ColSplit(int parts, std::vector< SpDCCols<IT,NT> > & matrices)
898 {
899 if(parts < 2)
900 {
901 matrices.emplace_back(*this);
902 }
903 else
904 {
905 std::vector<IT> cuts(parts-1);
906 for(int i=0; i< (parts-1); ++i)
907 {
908 cuts[i] = (i+1) * (n/parts);
909 }
910 if(n < parts)
911 {
912 std::cout<< "Matrix is too small to be splitted" << std::endl;
913 return;
914 }
915 std::vector< Dcsc<IT,NT> * > dcscs(parts, NULL);
916
917 if(nnz != 0)
918 {
919 dcsc->ColSplit(dcscs, cuts);
920 }
921
922 for(int i=0; i< (parts-1); ++i)
923 {
924 SpDCCols<IT,NT> matrix = SpDCCols<IT,NT>(m, (n/parts), dcscs[i]);
925 matrices.emplace_back(matrix);
926 }
927 SpDCCols<IT,NT> matrix = SpDCCols<IT,NT>(m, n-cuts[parts-2], dcscs[parts-1]);
928 matrices.emplace_back(matrix);
929 }
930 *this = SpDCCols<IT, NT>(); // handle destruction through assignment operator
931 }
932
933
934 /**
935 * Concatenates (merges) multiple matrices (cut along the columns) into 1 piece
936 * ColSplit() method should have been executed on the object beforehand
937 */
938 template <class IT, class NT>
ColConcatenate(std::vector<SpDCCols<IT,NT>> & matrices)939 void SpDCCols<IT,NT>::ColConcatenate(std::vector< SpDCCols<IT,NT> > & matrices)
940 {
941 std::vector< SpDCCols<IT,NT> * > nonempties;
942 std::vector< Dcsc<IT,NT> * > dcscs;
943 std::vector< IT > offsets;
944 IT runningoffset = 0;
945
946 for(size_t i=0; i< matrices.size(); ++i)
947 {
948 if(matrices[i].nnz != 0)
949 {
950 nonempties.push_back(&(matrices[i]));
951 dcscs.push_back(matrices[i].dcsc);
952 offsets.push_back(runningoffset);
953 }
954 runningoffset += matrices[i].n;
955 }
956
957 if(nonempties.size() < 1)
958 {
959 #ifdef DEBUG
960 std::cout << "Nothing to ColConcatenate" << std::endl;
961 #endif
962 n = runningoffset;
963 }
964 else if(nonempties.size() < 2)
965 {
966 *this = *(nonempties[0]);
967 n = runningoffset;
968 }
969 else // nonempties.size() > 1
970 {
971 Dcsc<IT,NT> * Cdcsc = new Dcsc<IT,NT>();
972 Cdcsc->ColConcatenate(dcscs, offsets);
973 *this = SpDCCols<IT,NT> (nonempties[0]->m, runningoffset, Cdcsc);
974 }
975
976 // destruct parameters
977 for(size_t i=0; i< matrices.size(); ++i)
978 {
979 matrices[i] = SpDCCols<IT,NT>();
980 }
981 }
982
983
984
985 /**
986 * Merges two matrices (cut along the columns) into 1 piece
987 * Split method should have been executed on the object beforehand
988 **/
989 template <class IT, class NT>
Merge(SpDCCols<IT,NT> & partA,SpDCCols<IT,NT> & partB)990 void SpDCCols<IT,NT>::Merge(SpDCCols<IT,NT> & partA, SpDCCols<IT,NT> & partB)
991 {
992 assert( partA.m == partB.m );
993
994 Dcsc<IT,NT> * Cdcsc = new Dcsc<IT,NT>();
995
996 if(partA.nnz == 0 && partB.nnz == 0)
997 {
998 Cdcsc = NULL;
999 }
1000 else if(partA.nnz == 0)
1001 {
1002 Cdcsc = new Dcsc<IT,NT>(*(partB.dcsc));
1003 std::transform(Cdcsc->jc, Cdcsc->jc + Cdcsc->nzc, Cdcsc->jc, std::bind2nd(std::plus<IT>(), partA.n));
1004 }
1005 else if(partB.nnz == 0)
1006 {
1007 Cdcsc = new Dcsc<IT,NT>(*(partA.dcsc));
1008 }
1009 else
1010 {
1011 Cdcsc->Merge(partA.dcsc, partB.dcsc, partA.n);
1012 }
1013 *this = SpDCCols<IT,NT> (partA.m, partA.n + partB.n, Cdcsc);
1014
1015 partA = SpDCCols<IT, NT>();
1016 partB = SpDCCols<IT, NT>();
1017 }
1018
1019 /**
1020 * C += A*B' (Using OuterProduct Algorithm)
1021 * This version is currently limited to multiplication of matrices with the same precision
1022 * (e.g. it can't multiply double-precision matrices with booleans)
1023 * The multiplication is on the specified semiring (passed as parameter)
1024 */
1025 template <class IT, class NT>
1026 template <class SR>
PlusEq_AnXBt(const SpDCCols<IT,NT> & A,const SpDCCols<IT,NT> & B)1027 int SpDCCols<IT,NT>::PlusEq_AnXBt(const SpDCCols<IT,NT> & A, const SpDCCols<IT,NT> & B)
1028 {
1029 if(A.isZero() || B.isZero())
1030 {
1031 return -1; // no need to do anything
1032 }
1033 Isect<IT> *isect1, *isect2, *itr1, *itr2, *cols, *rows;
1034 SpHelper::SpIntersect(*(A.dcsc), *(B.dcsc), cols, rows, isect1, isect2, itr1, itr2);
1035
1036 IT kisect = static_cast<IT>(itr1-isect1); // size of the intersection ((itr1-isect1) == (itr2-isect2))
1037 if(kisect == 0)
1038 {
1039 DeleteAll(isect1, isect2, cols, rows);
1040 return -1;
1041 }
1042
1043 StackEntry< NT, std::pair<IT,IT> > * multstack;
1044 IT cnz = SpHelper::SpCartesian< SR > (*(A.dcsc), *(B.dcsc), kisect, isect1, isect2, multstack);
1045 DeleteAll(isect1, isect2, cols, rows);
1046
1047 IT mdim = A.m;
1048 IT ndim = B.m; // since B has already been transposed
1049 if(isZero())
1050 {
1051 dcsc = new Dcsc<IT,NT>(multstack, mdim, ndim, cnz);
1052 }
1053 else
1054 {
1055 dcsc->AddAndAssign(multstack, mdim, ndim, cnz);
1056 }
1057 nnz = dcsc->nz;
1058
1059 delete [] multstack;
1060 return 1;
1061 }
1062
1063 /**
1064 * C += A*B (Using ColByCol Algorithm)
1065 * This version is currently limited to multiplication of matrices with the same precision
1066 * (e.g. it can't multiply double-precision matrices with booleans)
1067 * The multiplication is on the specified semiring (passed as parameter)
1068 */
1069 template <class IT, class NT>
1070 template <typename SR>
PlusEq_AnXBn(const SpDCCols<IT,NT> & A,const SpDCCols<IT,NT> & B)1071 int SpDCCols<IT,NT>::PlusEq_AnXBn(const SpDCCols<IT,NT> & A, const SpDCCols<IT,NT> & B)
1072 {
1073 if(A.isZero() || B.isZero())
1074 {
1075 return -1; // no need to do anything
1076 }
1077 StackEntry< NT, std::pair<IT,IT> > * multstack;
1078 int cnz = SpHelper::SpColByCol< SR > (*(A.dcsc), *(B.dcsc), A.n, multstack);
1079
1080 IT mdim = A.m;
1081 IT ndim = B.n;
1082 if(isZero())
1083 {
1084 dcsc = new Dcsc<IT,NT>(multstack, mdim, ndim, cnz);
1085 }
1086 else
1087 {
1088 dcsc->AddAndAssign(multstack, mdim, ndim, cnz);
1089 }
1090 nnz = dcsc->nz;
1091
1092 delete [] multstack;
1093 return 1;
1094 }
1095
1096
1097 template <class IT, class NT>
1098 template <typename SR>
PlusEq_AtXBn(const SpDCCols<IT,NT> & A,const SpDCCols<IT,NT> & B)1099 int SpDCCols<IT,NT>::PlusEq_AtXBn(const SpDCCols<IT,NT> & A, const SpDCCols<IT,NT> & B)
1100 {
1101 std::cout << "PlusEq_AtXBn function has not been implemented yet !" << std::endl;
1102 return 0;
1103 }
1104
1105 template <class IT, class NT>
1106 template <typename SR>
PlusEq_AtXBt(const SpDCCols<IT,NT> & A,const SpDCCols<IT,NT> & B)1107 int SpDCCols<IT,NT>::PlusEq_AtXBt(const SpDCCols<IT,NT> & A, const SpDCCols<IT,NT> & B)
1108 {
1109 std::cout << "PlusEq_AtXBt function has not been implemented yet !" << std::endl;
1110 return 0;
1111 }
1112
1113
1114 template <class IT, class NT>
operator ()(IT ri,IT ci) const1115 SpDCCols<IT,NT> SpDCCols<IT,NT>::operator() (IT ri, IT ci) const
1116 {
1117 IT * itr = std::find(dcsc->jc, dcsc->jc + dcsc->nzc, ci);
1118 if(itr != dcsc->jc + dcsc->nzc)
1119 {
1120 IT irbeg = dcsc->cp[itr - dcsc->jc];
1121 IT irend = dcsc->cp[itr - dcsc->jc + 1];
1122
1123 IT * ele = std::find(dcsc->ir + irbeg, dcsc->ir + irend, ri);
1124 if(ele != dcsc->ir + irend)
1125 {
1126 SpDCCols<IT,NT> SingEleMat(1, 1, 1, 1); // 1-by-1 matrix with 1 nonzero
1127 *(SingEleMat.dcsc->numx) = dcsc->numx[ele - dcsc->ir];
1128 *(SingEleMat.dcsc->ir) = *ele;
1129 *(SingEleMat.dcsc->jc) = *itr;
1130 (SingEleMat.dcsc->cp)[0] = 0;
1131 (SingEleMat.dcsc->cp)[1] = 1;
1132 return SingEleMat;
1133 }
1134 else
1135 {
1136 return SpDCCols<IT,NT>(); // 0-by-0 empty matrix
1137 }
1138 }
1139 else
1140 {
1141 return SpDCCols<IT,NT>(); // 0-by-0 empty matrix
1142 }
1143 }
1144
1145 /**
1146 * The almighty indexing polyalgorithm
1147 * Calls different subroutines depending the sparseness of ri/ci
1148 */
1149 template <class IT, class NT>
operator ()(const std::vector<IT> & ri,const std::vector<IT> & ci) const1150 SpDCCols<IT,NT> SpDCCols<IT,NT>::operator() (const std::vector<IT> & ri, const std::vector<IT> & ci) const
1151 {
1152 typedef PlusTimesSRing<NT,NT> PT;
1153
1154 IT rsize = ri.size();
1155 IT csize = ci.size();
1156
1157 if(rsize == 0 && csize == 0)
1158 {
1159 // return an m x n matrix of complete zeros
1160 // since we don't know whether columns or rows are indexed
1161 return SpDCCols<IT,NT> (0, m, n, 0);
1162 }
1163 else if(rsize == 0)
1164 {
1165 return ColIndex(ci);
1166 }
1167 else if(csize == 0)
1168 {
1169 SpDCCols<IT,NT> LeftMatrix(rsize, rsize, this->m, ri, true);
1170 return LeftMatrix.OrdColByCol< PT >(*this);
1171 }
1172 else // this handles the (rsize=1 && csize=1) case as well
1173 {
1174 SpDCCols<IT,NT> LeftMatrix(rsize, rsize, this->m, ri, true);
1175 SpDCCols<IT,NT> RightMatrix(csize, this->n, csize, ci, false);
1176 return LeftMatrix.OrdColByCol< PT >( OrdColByCol< PT >(RightMatrix) );
1177 }
1178 }
1179
1180 template <class IT, class NT>
put(std::ofstream & outfile) const1181 std::ofstream & SpDCCols<IT,NT>::put(std::ofstream & outfile) const
1182 {
1183 if(nnz == 0)
1184 {
1185 outfile << "Matrix doesn't have any nonzeros" <<std::endl;
1186 return outfile;
1187 }
1188 SpTuples<IT,NT> tuples(*this);
1189 outfile << tuples << std::endl;
1190 return outfile;
1191 }
1192
1193
1194 template <class IT, class NT>
get(std::ifstream & infile)1195 std::ifstream & SpDCCols<IT,NT>::get(std::ifstream & infile)
1196 {
1197 std::cout << "Getting... SpDCCols" << std::endl;
1198 IT m, n, nnz;
1199 infile >> m >> n >> nnz;
1200 SpTuples<IT,NT> tuples(nnz, m, n);
1201 infile >> tuples;
1202 tuples.SortColBased();
1203
1204 SpDCCols<IT,NT> object(tuples, false);
1205 *this = object;
1206 return infile;
1207 }
1208
1209
1210 template<class IT, class NT>
PrintInfo(std::ofstream & out) const1211 void SpDCCols<IT,NT>::PrintInfo(std::ofstream & out) const
1212 {
1213 out << "m: " << m ;
1214 out << ", n: " << n ;
1215 out << ", nnz: "<< nnz ;
1216
1217 if(splits > 0)
1218 {
1219 out << ", local splits: " << splits << std::endl;
1220 }
1221 else
1222 {
1223 if(dcsc != NULL)
1224 {
1225 out << ", nzc: "<< dcsc->nzc << std::endl;
1226 }
1227 else
1228 {
1229 out <<", nzc: "<< 0 << std::endl;
1230 }
1231 }
1232 }
1233
1234 template<class IT, class NT>
PrintInfo() const1235 void SpDCCols<IT,NT>::PrintInfo() const
1236 {
1237 std::cout << "m: " << m ;
1238 std::cout << ", n: " << n ;
1239 std::cout << ", nnz: "<< nnz ;
1240
1241 if(splits > 0)
1242 {
1243 std::cout << ", local splits: " << splits << std::endl;
1244 }
1245 else
1246 {
1247 if(dcsc != NULL)
1248 {
1249 std::cout << ", nzc: "<< dcsc->nzc << std::endl;
1250 }
1251 else
1252 {
1253 std::cout <<", nzc: "<< 0 << std::endl;
1254 }
1255
1256 if(m < PRINT_LIMIT && n < PRINT_LIMIT) // small enough to print
1257 {
1258 NT ** A = SpHelper::allocate2D<NT>(m,n);
1259 for(IT i=0; i< m; ++i)
1260 for(IT j=0; j<n; ++j)
1261 A[i][j] = NT();
1262 if(dcsc != NULL)
1263 {
1264 for(IT i=0; i< dcsc->nzc; ++i)
1265 {
1266 for(IT j = dcsc->cp[i]; j<dcsc->cp[i+1]; ++j)
1267 {
1268 IT colid = dcsc->jc[i];
1269 IT rowid = dcsc->ir[j];
1270 A[rowid][colid] = dcsc->numx[j];
1271 }
1272 }
1273 }
1274 for(IT i=0; i< m; ++i)
1275 {
1276 for(IT j=0; j<n; ++j)
1277 {
1278 std::cout << std::setiosflags(std::ios::fixed) << std::setprecision(2) << A[i][j];
1279 std::cout << " ";
1280 }
1281 std::cout << std::endl;
1282 }
1283 SpHelper::deallocate2D(A,m);
1284 }
1285 }
1286 }
1287
1288
1289 /****************************************************************************/
1290 /********************* PRIVATE CONSTRUCTORS/DESTRUCTORS *********************/
1291 /****************************************************************************/
1292
1293 //! Construct SpDCCols from Dcsc
1294 template <class IT, class NT>
SpDCCols(IT nRow,IT nCol,Dcsc<IT,NT> * mydcsc)1295 SpDCCols<IT,NT>::SpDCCols(IT nRow, IT nCol, Dcsc<IT,NT> * mydcsc)
1296 :dcsc(mydcsc), m(nRow), n(nCol), splits(0)
1297 {
1298 if (mydcsc == NULL)
1299 nnz = 0;
1300 else
1301 nnz = mydcsc->nz;
1302 }
1303
1304 //! Create a logical matrix from (row/column) indices array, used for indexing only
1305 template <class IT, class NT>
SpDCCols(IT size,IT nRow,IT nCol,const std::vector<IT> & indices,bool isRow)1306 SpDCCols<IT,NT>::SpDCCols (IT size, IT nRow, IT nCol, const std::vector<IT> & indices, bool isRow)
1307 :m(nRow), n(nCol), nnz(size), splits(0)
1308 {
1309 if(size > 0)
1310 dcsc = new Dcsc<IT,NT>(size,indices,isRow);
1311 else
1312 dcsc = NULL;
1313 }
1314
1315
1316 /****************************************************************************/
1317 /************************* PRIVATE MEMBER FUNCTIONS *************************/
1318 /****************************************************************************/
1319
1320 template <class IT, class NT>
CopyDcsc(Dcsc<IT,NT> * source)1321 inline void SpDCCols<IT,NT>::CopyDcsc(Dcsc<IT,NT> * source)
1322 {
1323 // source dcsc will be NULL if number of nonzeros is zero
1324 if(source != NULL)
1325 dcsc = new Dcsc<IT,NT>(*source);
1326 else
1327 dcsc = NULL;
1328 }
1329
1330 /**
1331 * \return An indexed SpDCCols object without using multiplication
1332 * \pre ci is sorted and is not completely empty.
1333 * \remarks it is OK for some indices ci[i] to be empty in the indexed SpDCCols matrix
1334 * [i.e. in the output, nzc does not need to be equal to n]
1335 */
1336 template <class IT, class NT>
ColIndex(const std::vector<IT> & ci) const1337 SpDCCols<IT,NT> SpDCCols<IT,NT>::ColIndex(const std::vector<IT> & ci) const
1338 {
1339 IT csize = ci.size();
1340 if(nnz == 0) // nothing to index
1341 {
1342 return SpDCCols<IT,NT>(0, m, csize, 0);
1343 }
1344 else if(ci.empty())
1345 {
1346 return SpDCCols<IT,NT>(0, m,0, 0);
1347 }
1348
1349 // First pass for estimation
1350 IT estsize = 0;
1351 IT estnzc = 0;
1352 for(IT i=0, j=0; i< dcsc->nzc && j < csize;)
1353 {
1354 if((dcsc->jc)[i] < ci[j])
1355 {
1356 ++i;
1357 }
1358 else if ((dcsc->jc)[i] > ci[j])
1359 {
1360 ++j;
1361 }
1362 else
1363 {
1364 estsize += (dcsc->cp)[i+1] - (dcsc->cp)[i];
1365 ++estnzc;
1366 ++i;
1367 ++j;
1368 }
1369 }
1370
1371 SpDCCols<IT,NT> SubA(estsize, m, csize, estnzc);
1372 if(estnzc == 0)
1373 {
1374 return SubA; // no need to run the second pass
1375 }
1376 SubA.dcsc->cp[0] = 0;
1377 IT cnzc = 0;
1378 IT cnz = 0;
1379 for(IT i=0, j=0; i < dcsc->nzc && j < csize;)
1380 {
1381 if((dcsc->jc)[i] < ci[j])
1382 {
1383 ++i;
1384 }
1385 else if ((dcsc->jc)[i] > ci[j]) // an empty column for the output
1386 {
1387 ++j;
1388 }
1389 else
1390 {
1391 IT columncount = (dcsc->cp)[i+1] - (dcsc->cp)[i];
1392 SubA.dcsc->jc[cnzc++] = j;
1393 SubA.dcsc->cp[cnzc] = SubA.dcsc->cp[cnzc-1] + columncount;
1394 std::copy(dcsc->ir + dcsc->cp[i], dcsc->ir + dcsc->cp[i+1], SubA.dcsc->ir + cnz);
1395 std::copy(dcsc->numx + dcsc->cp[i], dcsc->numx + dcsc->cp[i+1], SubA.dcsc->numx + cnz);
1396 cnz += columncount;
1397 ++i;
1398 ++j;
1399 }
1400 }
1401 return SubA;
1402 }
1403
1404 template <class IT, class NT>
1405 template <typename SR, typename NTR>
OrdOutProdMult(const SpDCCols<IT,NTR> & rhs) const1406 SpDCCols< IT, typename promote_trait<NT,NTR>::T_promote > SpDCCols<IT,NT>::OrdOutProdMult(const SpDCCols<IT,NTR> & rhs) const
1407 {
1408 typedef typename promote_trait<NT,NTR>::T_promote T_promote;
1409
1410 if(isZero() || rhs.isZero())
1411 {
1412 return SpDCCols< IT, T_promote > (0, m, rhs.n, 0); // return an empty matrix
1413 }
1414 SpDCCols<IT,NTR> Btrans = rhs.TransposeConst();
1415
1416 Isect<IT> *isect1, *isect2, *itr1, *itr2, *cols, *rows;
1417 SpHelper::SpIntersect(*dcsc, *(Btrans.dcsc), cols, rows, isect1, isect2, itr1, itr2);
1418
1419 IT kisect = static_cast<IT>(itr1-isect1); // size of the intersection ((itr1-isect1) == (itr2-isect2))
1420 if(kisect == 0)
1421 {
1422 DeleteAll(isect1, isect2, cols, rows);
1423 return SpDCCols< IT, T_promote > (0, m, rhs.n, 0);
1424 }
1425 StackEntry< T_promote, std::pair<IT,IT> > * multstack;
1426 IT cnz = SpHelper::SpCartesian< SR > (*dcsc, *(Btrans.dcsc), kisect, isect1, isect2, multstack);
1427 DeleteAll(isect1, isect2, cols, rows);
1428
1429 Dcsc<IT, T_promote> * mydcsc = NULL;
1430 if(cnz > 0)
1431 {
1432 mydcsc = new Dcsc< IT,T_promote > (multstack, m, rhs.n, cnz);
1433 delete [] multstack;
1434 }
1435 return SpDCCols< IT,T_promote > (m, rhs.n, mydcsc);
1436 }
1437
1438
1439 template <class IT, class NT>
1440 template <typename SR, typename NTR>
OrdColByCol(const SpDCCols<IT,NTR> & rhs) const1441 SpDCCols< IT, typename promote_trait<NT,NTR>::T_promote > SpDCCols<IT,NT>::OrdColByCol(const SpDCCols<IT,NTR> & rhs) const
1442 {
1443 typedef typename promote_trait<NT,NTR>::T_promote T_promote;
1444
1445 if(isZero() || rhs.isZero())
1446 {
1447 return SpDCCols<IT, T_promote> (0, m, rhs.n, 0); // return an empty matrix
1448 }
1449 StackEntry< T_promote, std::pair<IT,IT> > * multstack;
1450 IT cnz = SpHelper::SpColByCol< SR > (*dcsc, *(rhs.dcsc), n, multstack);
1451
1452 Dcsc<IT,T_promote > * mydcsc = NULL;
1453 if(cnz > 0)
1454 {
1455 mydcsc = new Dcsc< IT,T_promote > (multstack, m, rhs.n, cnz);
1456 delete [] multstack;
1457 }
1458 return SpDCCols< IT,T_promote > (m, rhs.n, mydcsc);
1459 }
1460
1461 }
1462