1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 05/15/2016 --------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc, Adam Lugowski ------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2016, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 #include "dcsc.h"
31 #include <algorithm>
32 #include <functional>
33 #include <iostream>
34 #include "Friends.h"
35 #include "SpHelper.h"
36 
37 namespace combblas {
38 
39 template <class IT, class NT>
Dcsc()40 Dcsc<IT,NT>::Dcsc ():cp(NULL), jc(NULL), ir(NULL), numx(NULL),nz(0), nzc(0), memowned(true){}
41 
42 template <class IT, class NT>
Dcsc(IT nnz,IT nzcol)43 Dcsc<IT,NT>::Dcsc (IT nnz, IT nzcol): nz(nnz),nzc(nzcol),memowned(true)
44 {
45 	assert (nz != 0);
46 	assert (nzc != 0);
47 	cp = new IT[nzc+1];
48 	jc = new IT[nzc];
49 	ir = new IT[nz];
50 	numx = new NT[nz];
51 }
52 
53 //! GetIndices helper function for StackEntry arrays
54 template <class IT, class NT>
getindices(StackEntry<NT,std::pair<IT,IT>> * multstack,IT & rindex,IT & cindex,IT & j,IT nnz)55 inline void Dcsc<IT,NT>::getindices (StackEntry<NT, std::pair<IT,IT> > * multstack, IT & rindex, IT & cindex, IT & j, IT nnz)
56 {
57 	if(j<nnz)
58 	{
59 		cindex = multstack[j].key.first;
60 		rindex = multstack[j].key.second;
61 	}
62 	else
63 	{
64 		rindex = std::numeric_limits<IT>::max();
65 		cindex = std::numeric_limits<IT>::max();
66 	}
67 	++j;
68 }
69 
70 template <class IT, class NT>
AddAndAssign(StackEntry<NT,std::pair<IT,IT>> * multstack,IT mdim,IT ndim,IT nnz)71 Dcsc<IT,NT> & Dcsc<IT,NT>::AddAndAssign (StackEntry<NT, std::pair<IT,IT> > * multstack, IT mdim, IT ndim, IT nnz)
72 {
73 	if(nnz == 0)	return *this;
74 
75 	IT estnzc = nzc + nnz;
76 	IT estnz  = nz + nnz;
77 	Dcsc<IT,NT> temp(estnz, estnzc);
78 
79 	IT curnzc = 0;		// number of nonzero columns constructed so far
80 	IT curnz = 0;
81 	IT i = 0;
82 	IT j = 0;
83 	IT rindex, cindex;
84 	getindices(multstack, rindex, cindex,j,nnz);
85 
86 	temp.cp[0] = 0;
87 	while(i< nzc && cindex < std::numeric_limits<IT>::max())	// i runs over columns of "this",  j runs over all the nonzeros of "multstack"
88 	{
89 		if(jc[i] > cindex)
90 		{
91 			IT columncount = 0;
92 			temp.jc[curnzc++] = cindex;
93 			do
94 			{
95 				temp.ir[curnz] 		= rindex;
96 				temp.numx[curnz++] 	= multstack[j-1].value;
97 
98 				getindices(multstack, rindex, cindex,j,nnz);
99 				++columncount;
100 			}
101 			while(temp.jc[curnzc-1] == cindex);	// loop until cindex changes
102 
103 			temp.cp[curnzc] = temp.cp[curnzc-1] + columncount;
104 		}
105 		else if(jc[i] < cindex)
106 		{
107 			temp.jc[curnzc++] = jc[i++];
108 			for(IT k = cp[i-1]; k< cp[i]; ++k)
109 			{
110 				temp.ir[curnz] 		= ir[k];
111 				temp.numx[curnz++] 	= numx[k];
112 			}
113 			temp.cp[curnzc] = temp.cp[curnzc-1] + (cp[i] - cp[i-1]);
114 		}
115 		else	// they are equal, merge the column
116 		{
117 			temp.jc[curnzc++] = jc[i];
118 			IT ii = cp[i];
119 			IT prevnz = curnz;
120 			while (ii < cp[i+1] && cindex == jc[i])	// cindex would be MAX if multstack is deplated
121 			{
122 				if (ir[ii] < rindex)
123 				{
124 					temp.ir[curnz] = ir[ii];
125 					temp.numx[curnz++] = numx[ii++];
126 				}
127 				else if (ir[ii] > rindex)
128 				{
129 					temp.ir[curnz] = rindex;
130 					temp.numx[curnz++] = multstack[j-1].value;
131 
132 					getindices(multstack, rindex, cindex,j,nnz);
133 				}
134 				else
135 				{
136 					temp.ir[curnz] = ir[ii];
137 					temp.numx[curnz++] = numx[ii++] + multstack[j-1].value;
138 
139 					getindices(multstack, rindex, cindex,j,nnz);
140 				}
141 			}
142 			while (ii < cp[i+1])
143 			{
144 				temp.ir[curnz] = ir[ii];
145 				temp.numx[curnz++] = numx[ii++];
146 			}
147 			while (cindex == jc[i])
148 			{
149 				temp.ir[curnz] = rindex;
150 				temp.numx[curnz++] = multstack[j-1].value;
151 
152 				getindices(multstack, rindex, cindex,j,nnz);
153 			}
154 			temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
155 			++i;
156 		}
157 	}
158 	while(i< nzc)
159 	{
160 		temp.jc[curnzc++] = jc[i++];
161 		for(IT k = cp[i-1]; k< cp[i]; ++k)
162 		{
163 			temp.ir[curnz] 		= ir[k];
164 			temp.numx[curnz++] 	= numx[k];
165 		}
166 		temp.cp[curnzc] = temp.cp[curnzc-1] + (cp[i] - cp[i-1]);
167 	}
168 	while(cindex < std::numeric_limits<IT>::max())
169 	{
170 		IT columncount = 0;
171 		temp.jc[curnzc++] = cindex;
172 		do
173 		{
174 			temp.ir[curnz] 		= rindex;
175 			temp.numx[curnz++] 	= multstack[j-1].value;
176 
177 			getindices(multstack, rindex, cindex,j,nnz);
178 			++columncount;
179 		}
180 		while(temp.jc[curnzc-1] == cindex);	// loop until cindex changes
181 
182 		temp.cp[curnzc] = temp.cp[curnzc-1] + columncount;
183 	}
184 	temp.Resize(curnzc, curnz);
185 	*this = temp;
186 	return *this;
187 }
188 
189 
190 /**
191   * Creates DCSC structure from an array of StackEntry's
192   * \remark Complexity: O(nnz)
193   */
194 template <class IT, class NT>
Dcsc(StackEntry<NT,std::pair<IT,IT>> * multstack,IT mdim,IT ndim,IT nnz)195 Dcsc<IT,NT>::Dcsc (StackEntry<NT, std::pair<IT,IT> > * multstack, IT mdim, IT ndim, IT nnz): nz(nnz),memowned(true)
196 {
197 	nzc = std::min(ndim, nnz);	// nzc can't exceed any of those
198 
199 	assert(nz != 0 );
200 	cp = new IT[nzc+1];	// to be shrinked
201 	jc = new IT[nzc];	// to be shrinked
202 	ir = new IT[nz];
203 	numx = new NT[nz];
204 
205 	IT curnzc = 0;				// number of nonzero columns constructed so far
206 	IT cindex = multstack[0].key.first;
207 	IT rindex = multstack[0].key.second;
208 
209 	ir[0]	= rindex;
210 	numx[0] = multstack[0].value;
211 	jc[curnzc] = cindex;
212 	cp[curnzc] = 0;
213 	++curnzc;
214 
215 	for(IT i=1; i<nz; ++i)
216 	{
217 		cindex = multstack[i].key.first;
218 		rindex = multstack[i].key.second;
219 
220 		ir[i]	= rindex;
221 		numx[i] = multstack[i].value;
222 		if(cindex != jc[curnzc-1])
223 		{
224 			jc[curnzc] = cindex;
225 			cp[curnzc++] = i;
226 		}
227 	}
228 	cp[curnzc] = nz;
229 	Resize(curnzc, nz);	// only shrink cp & jc arrays
230 }
231 
232 /**
233   * Create a logical matrix from (row/column) indices array
234   * \remark This function should only be used for indexing
235   * \remark For these temporary matrices nz = nzc (which are both equal to nnz)
236   */
237 template <class IT, class NT>
Dcsc(IT nnz,const std::vector<IT> & indices,bool isRow)238 Dcsc<IT,NT>::Dcsc (IT nnz, const std::vector<IT> & indices, bool isRow): nz(nnz),nzc(nnz),memowned(true)
239 {
240 	assert((nnz != 0) && (indices.size() == nnz));
241 	cp = new IT[nnz+1];
242 	jc = new IT[nnz];
243 	ir = new IT[nnz];
244 	numx = new NT[nnz];
245 
246 	SpHelper::iota(cp, cp+nnz+1, 0);  // insert sequential values {0,1,2,..}
247 	std::fill_n(numx, nnz, static_cast<NT>(1));
248 
249 	if(isRow)
250 	{
251 		SpHelper::iota(ir, ir+nnz, 0);
252 		std::copy (indices.begin(), indices.end(), jc);
253 	}
254 	else
255 	{
256 		SpHelper::iota(jc, jc+nnz, 0);
257 		std::copy (indices.begin(), indices.end(), ir);
258 	}
259 }
260 
261 
262 template <class IT, class NT>
263 template <typename NNT>
operator Dcsc<IT,NNT>() const264 Dcsc<IT,NT>::operator Dcsc<IT,NNT>() const
265 {
266 	Dcsc<IT,NNT> convert(nz, nzc);
267 
268 	for(IT i=0; i< nz; ++i)
269 	{
270 		convert.numx[i] = static_cast<NNT>(numx[i]);
271 	}
272 	std::copy(ir, ir+nz, convert.ir);	// copy(first, last, result)
273 	std::copy(jc, jc+nzc, convert.jc);
274 	std::copy(cp, cp+nzc+1, convert.cp);
275 	return convert;
276 }
277 
278 template <class IT, class NT>
279 template <typename NIT, typename NNT>
operator Dcsc<NIT,NNT>() const280 Dcsc<IT,NT>::operator Dcsc<NIT,NNT>() const
281 {
282 	Dcsc<NIT,NNT> convert(nz, nzc);
283 
284 	for(IT i=0; i< nz; ++i)
285 		convert.numx[i] = static_cast<NNT>(numx[i]);
286 	for(IT i=0; i< nz; ++i)
287 		convert.ir[i] = static_cast<NIT>(ir[i]);
288 	for(IT i=0; i< nzc; ++i)
289 		convert.jc[i] = static_cast<NIT>(jc[i]);
290 	for(IT i=0; i<= nzc; ++i)
291 		convert.cp[i] = static_cast<NIT>(cp[i]);
292 	return convert;
293 }
294 
295 template <class IT, class NT>
Dcsc(const Dcsc<IT,NT> & rhs)296 Dcsc<IT,NT>::Dcsc (const Dcsc<IT,NT> & rhs): nz(rhs.nz), nzc(rhs.nzc),memowned(true)
297 {
298 	if(nz > 0)
299 	{
300 		numx = new NT[nz];
301 		ir = new IT[nz];
302 		std::copy(rhs.numx, rhs.numx + nz, numx);	// numx can be a non-POD type
303 		std::copy(rhs.ir, rhs.ir + nz, ir);
304 	}
305 	else
306 	{
307 		numx = NULL;
308 		ir = NULL;
309 	}
310 	if(nzc > 0)
311 	{
312 		jc = new IT[nzc];
313 		cp = new IT[nzc+1];
314 		std::copy(rhs.jc, rhs.jc + nzc, jc);
315 		std::copy(rhs.cp, rhs.cp + nzc + 1, cp);
316 	}
317 	else
318 	{
319 		jc = NULL;
320 		cp = NULL;
321 	}
322 }
323 
324 /**
325   * Assignment operator (called on an existing object)
326   */
327 template <class IT, class NT>
operator =(const Dcsc<IT,NT> & rhs)328 Dcsc<IT,NT> & Dcsc<IT,NT>::operator=(const Dcsc<IT,NT> & rhs)
329 {
330 	if(this != &rhs)
331 	{
332 		// make empty first !
333 		if(nz > 0)
334 		{
335 			delete[] numx;
336 			delete[] ir;
337 		}
338 		if(nzc > 0)
339 		{
340 			delete[] jc;
341 			delete[] cp;
342 		}
343 		nz = rhs.nz;
344 		nzc = rhs.nzc;
345 		if(nz > 0)
346 		{
347 			numx = new NT[nz];
348 			ir = new IT[nz];
349 			std::copy(rhs.numx, rhs.numx + nz, numx);	// numx can be a non-POD type
350 			std::copy(rhs.ir, rhs.ir + nz, ir);
351 		}
352 		else
353 		{
354 			numx = NULL;
355 			ir = NULL;
356 		}
357 		if(nzc > 0)
358 		{
359 			jc = new IT[nzc];
360 	                cp = new IT[nzc+1];
361         	        std::copy(rhs.jc, rhs.jc + nzc, jc);
362                 	std::copy(rhs.cp, rhs.cp + nzc + 1, cp);
363 		}
364 		else
365 		{
366 			jc = NULL;
367 			cp = NULL;
368 		}
369 	}
370 	return *this;
371 }
372 
373 template <class IT, class NT>
operator +=(const Dcsc<IT,NT> & rhs)374 Dcsc<IT, NT> & Dcsc<IT,NT>::operator+=(const Dcsc<IT,NT> & rhs)	// add and assign operator
375 {
376 	IT estnzc = nzc + rhs.nzc;
377 	IT estnz  = nz + rhs.nz;
378 	Dcsc<IT,NT> temp(estnz, estnzc);
379 
380 	IT curnzc = 0;
381 	IT curnz = 0;
382 	IT i = 0;
383 	IT j = 0;
384 	temp.cp[0] = 0;
385 	while(i< nzc && j<rhs.nzc)
386 	{
387 		if(jc[i] > rhs.jc[j])
388 		{
389 			temp.jc[curnzc++] = rhs.jc[j++];
390 			for(IT k = rhs.cp[j-1]; k< rhs.cp[j]; ++k)
391 			{
392 				temp.ir[curnz] 		= rhs.ir[k];
393 				temp.numx[curnz++] 	= rhs.numx[k];
394 			}
395 			temp.cp[curnzc] = temp.cp[curnzc-1] + (rhs.cp[j] - rhs.cp[j-1]);
396 		}
397 		else if(jc[i] < rhs.jc[j])
398 		{
399 			temp.jc[curnzc++] = jc[i++];
400 			for(IT k = cp[i-1]; k< cp[i]; k++)
401 			{
402 				temp.ir[curnz] 		= ir[k];
403 				temp.numx[curnz++] 	= numx[k];
404 			}
405 			temp.cp[curnzc] = temp.cp[curnzc-1] + (cp[i] - cp[i-1]);
406 		}
407 		else
408 		{
409 			temp.jc[curnzc++] = jc[i];
410 			IT ii = cp[i];
411 			IT jj = rhs.cp[j];
412 			IT prevnz = curnz;
413 			while (ii < cp[i+1] && jj < rhs.cp[j+1])
414 			{
415 				if (ir[ii] < rhs.ir[jj])
416 				{
417 					temp.ir[curnz] = ir[ii];
418 					temp.numx[curnz++] = numx[ii++];
419 				}
420 				else if (ir[ii] > rhs.ir[jj])
421 				{
422 					temp.ir[curnz] = rhs.ir[jj];
423 					temp.numx[curnz++] = rhs.numx[jj++];
424 				}
425 				else
426 				{
427 					temp.ir[curnz] = ir[ii];
428                     temp.numx[curnz++] = numx[ii++] + rhs.numx[jj++];       // might include zeros
429 				}
430 			}
431 			while (ii < cp[i+1])
432 			{
433 				temp.ir[curnz] = ir[ii];
434 				temp.numx[curnz++] = numx[ii++];
435 			}
436 			while (jj < rhs.cp[j+1])
437 			{
438 				temp.ir[curnz] = rhs.ir[jj];
439 				temp.numx[curnz++] = rhs.numx[jj++];
440 			}
441 			temp.cp[curnzc] = temp.cp[curnzc-1] + curnz-prevnz;
442 			++i;
443 			++j;
444 		}
445 	}
446 	while(i< nzc)
447 	{
448 		temp.jc[curnzc++] = jc[i++];
449 		for(IT k = cp[i-1]; k< cp[i]; ++k)
450 		{
451 			temp.ir[curnz] 	= ir[k];
452 			temp.numx[curnz++] = numx[k];
453 		}
454 		temp.cp[curnzc] = temp.cp[curnzc-1] + (cp[i] - cp[i-1]);
455 	}
456 	while(j < rhs.nzc)
457 	{
458 		temp.jc[curnzc++] = rhs.jc[j++];
459 		for(IT k = rhs.cp[j-1]; k< rhs.cp[j]; ++k)
460 		{
461 			temp.ir[curnz] 	= rhs.ir[k];
462 			temp.numx[curnz++] 	= rhs.numx[k];
463 		}
464 		temp.cp[curnzc] = temp.cp[curnzc-1] + (rhs.cp[j] - rhs.cp[j-1]);
465 	}
466 	temp.Resize(curnzc, curnz);
467 	*this = temp;
468 	return *this;
469 }
470 
471 template <class IT, class NT>
operator ==(const Dcsc<IT,NT> & rhs)472 bool Dcsc<IT,NT>::operator==(const Dcsc<IT,NT> & rhs)
473 {
474 	if(nzc != rhs.nzc) return false;
475 	bool same = std::equal(cp, cp+nzc+1, rhs.cp);
476 	same = same && std::equal(jc, jc+nzc, rhs.jc);
477 	same = same && std::equal(ir, ir+nz, rhs.ir);
478 
479 #ifdef DEBUG
480   std::vector<NT> error(nz);
481   std::transform(numx, numx+nz, rhs.numx, error.begin(), absdiff<NT>());
482   std::vector< std::pair<NT, NT> > error_original_pair(nz);
483 	for(IT i=0; i < nz; ++i)
484 		error_original_pair[i] = std::make_pair(error[i], numx[i]);
485 	if(error_original_pair.size() > 10)	// otherwise would crush for small data
486 	{
487 		partial_sort(error_original_pair.begin(), error_original_pair.begin()+10, error_original_pair.end(), std::greater< std::pair<NT,NT> >());
488     std::cout << "Highest 10 different entries are: " << std::endl;
489 		for(IT i=0; i < 10; ++i)
490 			std::cout << "Diff: " << error_original_pair[i].first << " on " << error_original_pair[i].second << std::endl;
491 	}
492 	else
493 	{
494 		sort(error_original_pair.begin(), error_original_pair.end(), std::greater< std::pair<NT,NT> >());
495 		std::cout << "Highest different entries are: " << std::endl;
496 		for(typename std::vector< std::pair<NT, NT> >::iterator it=error_original_pair.begin(); it != error_original_pair.end(); ++it)
497 			std::cout << "Diff: " << it->first << " on " << it->second << std::endl;
498 	}
499 	std::cout << "Same before num: " << same << std::endl;
500 #endif
501 
502 	ErrorTolerantEqual<NT> epsilonequal;
503 	same = same && std::equal(numx, numx+nz, rhs.numx, epsilonequal );
504 	return same;
505 }
506 
507 /**
508  * @param[in]   exclude if false,
509  *      \n              then operation is A = A .* B
510  *      \n              else operation is A = A .* not(B)
511  **/
512 template <class IT, class NT>
EWiseMult(const Dcsc<IT,NT> & rhs,bool exclude)513 void Dcsc<IT,NT>::EWiseMult(const Dcsc<IT,NT> & rhs, bool exclude)
514 {
515 	// We have a class with a friend function and a member function with the same name. Calling the friend function from the member function
516 	// might (if the signature is the same) give compilation errors if not preceded by :: that denotes the global scope.
517 	*this = combblas::EWiseMult((*this), &rhs, exclude);	// call the binary version
518 }
519 
520 
521 template <class IT, class NT>
522 template <typename _UnaryOperation, typename GlobalIT>
PruneI(_UnaryOperation __unary_op,bool inPlace,GlobalIT rowOffset,GlobalIT colOffset)523 Dcsc<IT,NT>* Dcsc<IT,NT>::PruneI(_UnaryOperation __unary_op, bool inPlace, GlobalIT rowOffset, GlobalIT colOffset)
524 {
525 	// Two-pass algorithm
526 	IT prunednnz = 0;
527 	IT prunednzc = 0;
528 	for(IT i=0; i<nzc; ++i)
529 	{
530 		bool colexists = false;
531 		for(IT j=cp[i]; j < cp[i+1]; ++j)
532 		{
533 			if(!(__unary_op(make_tuple(rowOffset+ir[j], colOffset+jc[i], numx[j])))) 	// keep this nonzero
534 			{
535 				++prunednnz;
536 				colexists = true;
537 			}
538 		}
539 		if(colexists) 	++prunednzc;
540 	}
541 	IT * oldcp = cp;
542 	IT * oldjc = jc;
543 	IT * oldir = ir;
544 	NT * oldnumx = numx;
545 
546 	cp = new IT[prunednzc+1];
547 	jc = new IT[prunednzc];
548 	ir = new IT[prunednnz];
549 	numx = new NT[prunednnz];
550 
551 	IT cnzc = 0;
552 	IT cnnz = 0;
553 	cp[cnzc] = 0;
554 	for(IT i=0; i<nzc; ++i)
555 	{
556 		for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
557 		{
558 			if(!(__unary_op(make_tuple(rowOffset+oldir[j], colOffset+oldjc[i], oldnumx[j])))) // keep this nonzero
559 			{
560 				ir[cnnz] = oldir[j];
561 				numx[cnnz++] = 	oldnumx[j];
562 			}
563 		}
564 		if(cnnz > cp[cnzc])
565 		{
566 			jc[cnzc] = oldjc[i];
567 			cp[cnzc+1] = cnnz;
568 			++cnzc;
569 		}
570 	}
571 	assert(cnzc == prunednzc);
572 	assert(cnnz == prunednnz);
573 	if (inPlace)
574 	{
575 		// delete the memory pointed by previous pointers
576 		DeleteAll(oldnumx, oldir, oldjc, oldcp);
577 		nz = cnnz;
578 		nzc = cnzc;
579 		return NULL;
580 	}
581 	else
582 	{
583 		// create a new object to store the data
584 		Dcsc<IT,NT>* ret = new Dcsc<IT,NT>();
585 		ret->cp = cp;
586 		ret->jc = jc;
587 		ret->ir = ir;
588 		ret->numx = numx;
589 		ret->nz = cnnz;
590 		ret->nzc = cnzc;
591 
592 		// put the previous pointers back
593 		cp = oldcp;
594 		jc = oldjc;
595 		ir = oldir;
596 		numx = oldnumx;
597 
598 		return ret;
599 	}
600 }
601 
602 template <class IT, class NT>
603 template <typename _UnaryOperation>
Prune(_UnaryOperation __unary_op,bool inPlace)604 Dcsc<IT,NT>* Dcsc<IT,NT>::Prune(_UnaryOperation __unary_op, bool inPlace)
605 {
606 	// Two-pass algorithm
607 	IT prunednnz = 0;
608 	IT prunednzc = 0;
609 	for(IT i=0; i<nzc; ++i)
610 	{
611 		bool colexists = false;
612 		for(IT j=cp[i]; j < cp[i+1]; ++j)
613 		{
614 			if(!(__unary_op(numx[j]))) 	// keep this nonzero
615 			{
616 				++prunednnz;
617 				colexists = true;
618 			}
619 		}
620 		if(colexists) 	++prunednzc;
621 	}
622 	IT * oldcp = cp;
623 	IT * oldjc = jc;
624 	IT * oldir = ir;
625 	NT * oldnumx = numx;
626 
627 	cp = new IT[prunednzc+1];
628 	jc = new IT[prunednzc];
629 	ir = new IT[prunednnz];
630 	numx = new NT[prunednnz];
631 
632 	IT cnzc = 0;
633 	IT cnnz = 0;
634 	cp[cnzc] = 0;
635 	for(IT i=0; i<nzc; ++i)
636 	{
637 		for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
638 		{
639 			if(!(__unary_op(oldnumx[j]))) // keep this nonzero
640 			{
641 				ir[cnnz] = oldir[j];
642 				numx[cnnz++] = 	oldnumx[j];
643 			}
644 		}
645 		if(cnnz > cp[cnzc])
646 		{
647 			jc[cnzc] = oldjc[i];
648 			cp[cnzc+1] = cnnz;
649 			++cnzc;
650 		}
651 	}
652 	assert(cnzc == prunednzc);
653 	assert(cnnz == prunednnz);
654 	if (inPlace)
655 	{
656 		// delete the memory pointed by previous pointers
657 		DeleteAll(oldnumx, oldir, oldjc, oldcp);
658 		nz = cnnz;
659 		nzc = cnzc;
660 		return NULL;
661 	}
662 	else
663 	{
664 		// create a new object to store the data
665 		Dcsc<IT,NT>* ret = new Dcsc<IT,NT>();
666 		ret->cp = cp;
667 		ret->jc = jc;
668 		ret->ir = ir;
669 		ret->numx = numx;
670 		ret->nz = cnnz;
671 		ret->nzc = cnzc;
672 
673 		// put the previous pointers back
674 		cp = oldcp;
675 		jc = oldjc;
676 		ir = oldir;
677 		numx = oldnumx;
678 
679 		return ret;
680 	}
681 }
682 
683 
684 template <class IT, class NT>
685 template <typename _BinaryOperation>
PruneColumn(NT * pvals,_BinaryOperation __binary_op,bool inPlace)686 Dcsc<IT,NT>* Dcsc<IT,NT>::PruneColumn(NT* pvals, _BinaryOperation __binary_op, bool inPlace)
687 {
688     // Two-pass algorithm
689     IT prunednnz = 0;
690     IT prunednzc = 0;
691     for(IT i=0; i<nzc; ++i)
692     {
693         bool colexists = false;
694         for(IT j=cp[i]; j < cp[i+1]; ++j)
695         {
696             IT colid = jc[i];
697             if(!(__binary_op(numx[j], pvals[colid]))) 	// keep this nonzero
698             {
699                 ++prunednnz;
700                 colexists = true;
701             }
702         }
703         if(colexists) 	++prunednzc;
704     }
705     IT * oldcp = cp;
706     IT * oldjc = jc;
707     IT * oldir = ir;
708     NT * oldnumx = numx;
709 
710     cp = new IT[prunednzc+1];
711     jc = new IT[prunednzc];
712     ir = new IT[prunednnz];
713     numx = new NT[prunednnz];
714 
715     IT cnzc = 0;
716     IT cnnz = 0;
717     cp[cnzc] = 0;
718     for(IT i=0; i<nzc; ++i)
719     {
720         for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
721         {
722             IT colid = oldjc[i];
723             if(!(__binary_op(oldnumx[j], pvals[colid])))
724             {
725                 ir[cnnz] = oldir[j];
726                 numx[cnnz++] = 	oldnumx[j];
727             }
728         }
729         if(cnnz > cp[cnzc])
730         {
731             jc[cnzc] = oldjc[i];
732             cp[cnzc+1] = cnnz;
733             ++cnzc;
734         }
735     }
736     assert(cnzc == prunednzc);
737     if (inPlace)
738     {
739         // delete the memory pointed by previous pointers
740         DeleteAll(oldnumx, oldir, oldjc, oldcp);
741         nz = cnnz;
742         nzc = cnzc;
743         return NULL;
744     }
745     else
746     {
747         // create a new object to store the data
748         Dcsc<IT,NT>* ret = new Dcsc<IT,NT>();
749         ret->cp = cp;
750         ret->jc = jc;
751         ret->ir = ir;
752         ret->numx = numx;
753         ret->nz = cnnz;
754         ret->nzc = cnzc;
755 
756         // put the previous pointers back
757         cp = oldcp;
758         jc = oldjc;
759         ir = oldir;
760         numx = oldnumx;
761 
762         return ret;
763     }
764 }
765 
766 
767 // prune selected columns indexed by pinds
768 template <class IT, class NT>
769 template <typename _BinaryOperation>
PruneColumn(IT * pinds,NT * pvals,_BinaryOperation __binary_op,bool inPlace)770 Dcsc<IT,NT>* Dcsc<IT,NT>::PruneColumn(IT* pinds, NT* pvals, _BinaryOperation __binary_op, bool inPlace)
771 {
772     // Two-pass algorithm
773     IT prunednnz = 0;
774     IT prunednzc = 0;
775     IT k = 0;
776     for(IT i=0; i<nzc; ++i)
777     {
778         bool colexists = false;
779         IT colid = jc[i];
780         if(colid==pinds[k]) // pinds is sorted
781         {
782             for(IT j=cp[i]; j < cp[i+1]; ++j)
783             {
784                 if(!(__binary_op(numx[j], pvals[k]))) 	// keep this nonzero
785                 {
786                     ++prunednnz;
787                     colexists = true;
788                 }
789             }
790             k++;
791         }
792         else // untouched columns
793         {
794             colexists = true;
795             prunednnz += (cp[i+1] - cp[i]);
796         }
797         if(colexists) 	++prunednzc;
798     }
799     IT * oldcp = cp;
800     IT * oldjc = jc;
801     IT * oldir = ir;
802     NT * oldnumx = numx;
803 
804     cp = new IT[prunednzc+1];
805     jc = new IT[prunednzc];
806     ir = new IT[prunednnz];
807     numx = new NT[prunednnz];
808 
809     IT cnzc = 0;
810     IT cnnz = 0;
811     cp[cnzc] = 0;
812     k = 0;
813     for(IT i=0; i<nzc; ++i)
814     {
815         IT colid = oldjc[i];
816         if(colid==pinds[k]) // prunned columns
817         {
818             for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
819             {
820                 if(!(__binary_op(oldnumx[j], pvals[k])))
821                 {
822                     ir[cnnz] = oldir[j];
823                     numx[cnnz++] = 	oldnumx[j];
824                 }
825             }
826             k++;
827         }
828         else // copy other columns
829         {
830             for(IT j = oldcp[i]; j < oldcp[i+1]; ++j)
831             {
832                 ir[cnnz] = oldir[j];
833                 numx[cnnz++] = 	oldnumx[j];
834             }
835         }
836         if(cnnz > cp[cnzc])
837         {
838             jc[cnzc] = oldjc[i];
839             cp[cnzc+1] = cnnz;
840             ++cnzc;
841         }
842     }
843     assert(cnzc == prunednzc);
844     if (inPlace)
845     {
846         // delete the memory pointed by previous pointers
847         DeleteAll(oldnumx, oldir, oldjc, oldcp);
848         nz = cnnz;
849         nzc = cnzc;
850         return NULL;
851     }
852     else
853     {
854         // create a new object to store the data
855         Dcsc<IT,NT>* ret = new Dcsc<IT,NT>();
856         ret->cp = cp;
857         ret->jc = jc;
858         ret->ir = ir;
859         ret->numx = numx;
860         ret->nz = cnnz;
861         ret->nzc = cnzc;
862 
863         // put the previous pointers back
864         cp = oldcp;
865         jc = oldjc;
866         ir = oldir;
867         numx = oldnumx;
868 
869         return ret;
870     }
871 }
872 
873 template <class IT, class NT>
EWiseScale(NT ** scaler)874 void Dcsc<IT,NT>::EWiseScale(NT ** scaler)
875 {
876 	for(IT i=0; i<nzc; ++i)
877 	{
878 		IT colid = jc[i];
879 		for(IT j=cp[i]; j < cp[i+1]; ++j)
880 		{
881 			IT rowid = ir[j];
882 			numx[j] *= scaler[rowid][colid];
883 		}
884 	}
885 }
886 
887 /**
888   * Updates entries of 2D dense array using __binary_op and entries of "this"
889   * @pre { __binary_op is a commutative operation}
890   */
891 template <class IT, class NT>
892 template <typename _BinaryOperation>
UpdateDense(NT ** array,_BinaryOperation __binary_op) const893 void Dcsc<IT,NT>::UpdateDense(NT ** array, _BinaryOperation __binary_op) const
894 {
895 	for(IT i=0; i<nzc; ++i)
896 	{
897 		IT colid = jc[i];
898 		for(IT j=cp[i]; j < cp[i+1]; ++j)
899 		{
900 			IT rowid = ir[j];
901 			array[rowid][colid] = __binary_op(array[rowid][colid], numx[j]);
902 		}
903 	}
904 }
905 
906 /**
907   * Construct an index array called aux
908   * Return the size of the contructed array
909   * Complexity O(nzc)
910  **/
911 template <class IT, class NT>
912 IT Dcsc<IT,NT>::ConstructAux(IT ndim, IT * & aux) const
913 {
914 	float cf  = static_cast<float>(ndim+1) / static_cast<float>(nzc);
915 	IT colchunks = static_cast<IT> ( ceil( static_cast<float>(ndim+1) / ceil(cf)) );
916 
917 	aux  = new IT[colchunks+1];
918 
919 	IT chunksize	= static_cast<IT>(ceil(cf));
920 	IT reg		= 0;
921 	IT curchunk	= 0;
922 	aux[curchunk++] = 0;
923 	for(IT i = 0; i< nzc; ++i)
924 	{
925 		if(jc[i] >= curchunk * chunksize)		// beginning of the next chunk
926 		{
927 			while(jc[i] >= curchunk * chunksize)	// consider any empty chunks
928 			{
929 				aux[curchunk++] = reg;
930 			}
931 		}
932 		reg = i+1;
933 	}
934 	while(curchunk <= colchunks)
935 	{
936 		aux[curchunk++] = reg;
937 	}
938 	return colchunks;
939 }
940 
941 /**
942   * Resizes cp & jc arrays to nzcnew, ir & numx arrays to nznew
943   * Zero overhead in case sizes stay the same
944  **/
945 template <class IT, class NT>
Resize(IT nzcnew,IT nznew)946 void Dcsc<IT,NT>::Resize(IT nzcnew, IT nznew)
947 {
948 	if(nzcnew == 0)
949 	{
950 		delete[] jc;
951 		delete[] cp;
952 		nzc = 0;
953 	}
954 	if(nznew == 0)
955 	{
956 		delete[] ir;
957 		delete[] numx;
958 		nz = 0;
959 	}
960 	if ( nzcnew == 0 && nznew == 0)
961 	{
962 		return;
963 	}
964 	if (nzcnew != nzc)
965 	{
966 		IT * tmpcp = cp;
967 		IT * tmpjc = jc;
968 		cp = new IT[nzcnew+1];
969 		jc = new IT[nzcnew];
970 		if(nzcnew > nzc)	// Grow it (copy all of the old elements)
971 		{
972 			std::copy(tmpcp, tmpcp+nzc+1, cp);	// copy(first, end, result)
973 			std::copy(tmpjc, tmpjc+nzc, jc);
974 		}
975 		else		// Shrink it (copy only a portion of the old elements)
976 		{
977 			std::copy(tmpcp, tmpcp+nzcnew+1, cp);
978 			std::copy(tmpjc, tmpjc+nzcnew, jc);
979 		}
980 		delete[] tmpcp;	// delete the memory pointed by previous pointers
981 		delete[] tmpjc;
982 		nzc = nzcnew;
983 	}
984 	if (nznew != nz)
985 	{
986 		NT * tmpnumx = numx;
987 		IT * tmpir = ir;
988 		numx = new NT[nznew];
989 		ir = new IT[nznew];
990 		if(nznew > nz)	// Grow it (copy all of the old elements)
991 		{
992 			std::copy(tmpnumx, tmpnumx+nz, numx);	// numx can be non-POD
993 			std::copy(tmpir, tmpir+nz, ir);
994 		}
995 		else	// Shrink it (copy only a portion of the old elements)
996 		{
997 			std::copy(tmpnumx, tmpnumx+nznew, numx);
998 			std::copy(tmpir, tmpir+nznew, ir);
999 		}
1000 		delete[] tmpnumx;	// delete the memory pointed by previous pointers
1001 		delete[] tmpir;
1002 		nz = nznew;
1003 	}
1004 }
1005 
1006 /**
1007   * The first part of the indexing algorithm described in the IPDPS'08 paper
1008   * @param[IT] colind {Column index to search}
1009   * Find the column with colind. If it exists, return the position of it.
1010   * It it doesn't exist, return value is undefined (implementation specific).
1011  **/
1012 template<class IT, class NT>
1013 IT Dcsc<IT,NT>::AuxIndex(const IT colind, bool & found, IT * aux, IT csize) const
1014 {
1015 	IT base = static_cast<IT>(floor((float) (colind/csize)));
1016 	IT start = aux[base];
1017 	IT end = aux[base+1];
1018 
1019 	IT * itr = std::find(jc + start, jc + end, colind);
1020 
1021 	found = (itr != jc + end);
1022 	return (itr-jc);
1023 }
1024 
1025 /**
1026  ** Split along the cut (a column index)
1027  ** Should work even when one of the splits have no nonzeros at all
1028  **/
1029 template<class IT, class NT>
Split(Dcsc<IT,NT> * & A,Dcsc<IT,NT> * & B,IT cut)1030 void Dcsc<IT,NT>::Split(Dcsc<IT,NT> * & A, Dcsc<IT,NT> * & B, IT cut)
1031 {
1032 	IT * itr = std::lower_bound(jc, jc+nzc, cut);
1033 	IT pos = itr - jc;
1034 
1035 	if(cp[pos] == 0)
1036 	{
1037 		A = NULL;
1038 	}
1039 	else
1040 	{
1041 		A = new Dcsc<IT,NT>(cp[pos], pos);
1042 		std::copy(jc, jc+pos, A->jc);
1043 		std::copy(cp, cp+pos+1, A->cp);
1044 		std::copy(ir, ir+cp[pos], A->ir);
1045 		std::copy(numx, numx + cp[pos], A->numx);	// copy(first, last, result)
1046 	}
1047 	if(nz-cp[pos] == 0)
1048 	{
1049 		B = NULL;
1050 	}
1051 	else
1052 	{
1053 		B = new Dcsc<IT,NT>(nz-cp[pos], nzc-pos);
1054 		std::copy(jc+pos, jc+ nzc, B->jc);
1055 		transform(B->jc, B->jc + (nzc-pos), B->jc, bind2nd(std::minus<IT>(), cut));
1056 		std::copy(cp+pos, cp+nzc+1, B->cp);
1057 		transform(B->cp, B->cp + (nzc-pos+1), B->cp, bind2nd(std::minus<IT>(), cp[pos]));
1058 		std::copy(ir+cp[pos], ir+nz, B->ir);
1059 		std::copy(numx+cp[pos], numx+nz, B->numx);	// copy(first, last, result)
1060 	}
1061 }
1062 
1063 /**
1064  ** Split along the cut(s) in terms of column indices
1065  ** Should work even when one of the splits have no nonzeros at all
1066  ** vector<IT> cuts is of length "size(parts)-1"
1067  ** \pre{ size(parts) >= 2}
1068  **/
1069 template<class IT, class NT>
ColSplit(std::vector<Dcsc<IT,NT> * > & parts,std::vector<IT> & cuts)1070 void Dcsc<IT,NT>::ColSplit(std::vector< Dcsc<IT,NT>* > & parts, std::vector<IT> & cuts)
1071 {
1072     IT * jcbegin = jc;
1073     std::vector<IT> pos; // pos has "parts-1" entries
1074     for(auto cutpoint = cuts.begin(); cutpoint != cuts.end(); ++cutpoint)
1075     {
1076         IT * itr = std::lower_bound(jcbegin, jc+nzc, *cutpoint);
1077         pos.push_back(itr - jc);
1078         jcbegin = itr;  // so that lower_bound searches a smaller vector
1079     }
1080 
1081     if(cp[pos[0]] == 0) // first piece
1082     {
1083         parts[0] = NULL;
1084     }
1085     else
1086     {
1087         parts[0] = new Dcsc<IT,NT>(cp[pos[0]], pos[0]); // Dcsc(nnz, nzc)
1088         std::copy(jc, jc+pos[0], parts[0]->jc);    // std::copy
1089         std::copy(cp, cp+pos[0]+1, parts[0]->cp);
1090         std::copy(ir, ir+cp[pos[0]], parts[0]->ir);
1091         std::copy(numx, numx + cp[pos[0]], parts[0]->numx);	// copy(first, last, result)
1092     }
1093     int ncuts =  cuts.size(); // all except last piece
1094     for(int i=1; i< ncuts; ++i) // treat the first piece differently
1095     {
1096         if(cp[pos[i]] - cp[pos[i-1]] == 0)
1097         {
1098             parts[i] =  NULL;
1099         }
1100         else
1101         {
1102             parts[i] = new Dcsc<IT,NT>(cp[pos[i]] - cp[pos[i-1]], pos[i] - pos[i-1]); // Dcsc(nnz, nzc)
1103             std::copy(jc+pos[i-1], jc+pos[i], parts[i]->jc);    // std::copy
1104             transform(parts[i]->jc, parts[i]->jc + (pos[i]-pos[i-1]), parts[i]->jc, bind2nd(std::minus<IT>(), cuts[i-1]));  // cuts[i-1] is well defined as i>=1
1105 
1106             std::copy(cp+pos[i-1], cp+pos[i]+1, parts[i]->cp);
1107             transform(parts[i]->cp, parts[i]->cp + (pos[i]-pos[i-1]+1), parts[i]->cp, bind2nd(std::minus<IT>(), cp[pos[i-1]]));
1108 
1109             std::copy(ir+cp[pos[i-1]], ir+cp[pos[i]], parts[i]->ir);
1110             std::copy(numx+cp[pos[i-1]], numx + cp[pos[i]], parts[i]->numx);	// copy(first, last, result)
1111         }
1112     }
1113     if(nz - cp[pos[ncuts-1]] == 0)
1114     {
1115         parts[ncuts] = NULL;
1116     }
1117     else
1118     {
1119         parts[ncuts] = new Dcsc<IT,NT>(nz-cp[pos[ncuts-1]], nzc-pos[ncuts-1]);  // ncuts = npieces -1
1120         std::copy(jc+pos[ncuts-1], jc+ nzc, parts[ncuts]->jc);
1121         transform(parts[ncuts]->jc, parts[ncuts]->jc + (nzc-pos[ncuts-1]), parts[ncuts]->jc, bind2nd(std::minus<IT>(), cuts[ncuts-1]));
1122 
1123         std::copy(cp+pos[ncuts-1], cp+nzc+1, parts[ncuts]->cp);
1124         transform(parts[ncuts]->cp, parts[ncuts]->cp + (nzc-pos[ncuts-1]+1), parts[ncuts]->cp, bind2nd(std::minus<IT>(), cp[pos[ncuts-1]]));
1125         std::copy(ir+cp[pos[ncuts-1]], ir+nz, parts[ncuts]->ir);
1126         std::copy(numx+cp[pos[ncuts-1]], numx+nz, parts[ncuts]->numx);
1127     }
1128 }
1129 
1130 
1131 // Assumes A and B are not NULL
1132 // When any is NULL, this function is not called anyway
1133 template<class IT, class NT>
Merge(const Dcsc<IT,NT> * A,const Dcsc<IT,NT> * B,IT cut)1134 void Dcsc<IT,NT>::Merge(const Dcsc<IT,NT> * A, const Dcsc<IT,NT> * B, IT cut)
1135 {
1136 	assert((A != NULL) && (B != NULL)); 	// handled at higher level
1137 	IT cnz = A->nz + B->nz;
1138 	IT cnzc = A->nzc + B->nzc;
1139 	if(cnz > 0)
1140 	{
1141 		*this = Dcsc<IT,NT>(cnz, cnzc);		// safe, because "this" can not be NULL inside a member function
1142 
1143 		std::copy(A->jc, A->jc + A->nzc, jc);	// copy(first, last, result)
1144 		std::copy(B->jc, B->jc + B->nzc, jc + A->nzc);
1145 		transform(jc + A->nzc, jc + cnzc, jc + A->nzc, bind2nd(std::plus<IT>(), cut));
1146 
1147 		std::copy(A->cp, A->cp + A->nzc, cp);
1148 		std::copy(B->cp, B->cp + B->nzc +1, cp + A->nzc);
1149 		transform(cp + A->nzc, cp+cnzc+1, cp + A->nzc, bind2nd(std::plus<IT>(), A->cp[A->nzc]));
1150 
1151 		std::copy(A->ir, A->ir + A->nz, ir);
1152 		std::copy(B->ir, B->ir + B->nz, ir + A->nz);
1153 
1154 		// since numx is potentially non-POD, we use std::copy
1155 		std::copy(A->numx, A->numx + A->nz, numx);
1156 		std::copy(B->numx, B->numx + B->nz, numx + A->nz);
1157 	}
1158 }
1159 
1160 
1161 /**
1162  * @pre {no member of "parts" is empty}
1163  * @pre {there are at least 2 members}
1164  * offsets arrays is "parallel to" parts array
1165  * it shows the starts of column numbers
1166  **/
1167 template<class IT, class NT>
ColConcatenate(std::vector<Dcsc<IT,NT> * > & parts,std::vector<IT> & offsets)1168 void Dcsc<IT,NT>::ColConcatenate(std::vector< Dcsc<IT,NT>* > & parts, std::vector<IT> & offsets)
1169 {
1170     IT cnz = 0;
1171     IT cnzc = 0;
1172     size_t nmembers = parts.size();
1173     for(size_t i=0; i< nmembers; ++i)
1174     {
1175         cnz += parts[i]->nz;
1176         cnzc += parts[i]->nzc;
1177     }
1178     if(cnz > 0)
1179     {
1180         *this = Dcsc<IT,NT>(cnz, cnzc);		// safe, because "this" can not be NULL inside a member function
1181 
1182         IT run_nz = 0;
1183         IT run_nzc = 0;
1184         for(size_t i=0; i< nmembers; ++i)
1185         {
1186             std::copy(parts[i]->jc, parts[i]->jc + parts[i]->nzc, jc + run_nzc);
1187             transform(jc + run_nzc, jc + run_nzc + parts[i]->nzc, jc + run_nzc, bind2nd(std::plus<IT>(), offsets[i]));
1188 
1189             // remember: cp[nzc] = nnz
1190             std::copy(parts[i]->cp, parts[i]->cp + parts[i]->nzc, cp + run_nzc);
1191             transform(cp + run_nzc, cp + run_nzc + parts[i]->nzc, cp + run_nzc, bind2nd(std::plus<IT>(),run_nz));
1192 
1193             std::copy(parts[i]->ir, parts[i]->ir + parts[i]->nz, ir + run_nz);
1194             std::copy(parts[i]->numx, parts[i]->numx + parts[i]->nz, numx + run_nz);
1195 
1196             run_nzc += parts[i]->nzc;
1197             run_nz += parts[i]->nz;
1198         }
1199         // adjust the last pointer
1200         cp[run_nzc] = run_nz;
1201     }
1202 }
1203 
1204 /**
1205  * param[in] nind { length(colsums), gives number of columns of A that contributes to C(:,i) }
1206  * Vector type VT is allowed to be different than matrix type (IT)
1207  * However, VT should be up-castable to IT (example: VT=int32_t, IT=int64_t)
1208  **/
1209 template<class IT, class NT>
1210 template<class VT>
FillColInds(const VT * colnums,IT nind,std::vector<std::pair<IT,IT>> & colinds,IT * aux,IT csize) const1211 void Dcsc<IT,NT>::FillColInds(const VT * colnums, IT nind, std::vector< std::pair<IT,IT> > & colinds, IT * aux, IT csize) const
1212 {
1213 	if ( aux == NULL || (nzc / nind) < THRESHOLD)   	// use scanning indexing
1214 	{
1215 		IT mink = std::min(nzc, nind);
1216 		std::pair<IT,IT> * isect = new std::pair<IT,IT>[mink];
1217 		std::pair<IT,IT> * range1 = new std::pair<IT,IT>[nzc];
1218 		std::pair<IT,IT> * range2 = new std::pair<IT,IT>[nind];
1219 
1220 		for(IT i=0; i < nzc; ++i)
1221 		{
1222 			range1[i] = std::make_pair(jc[i], i);	// get the actual nonzero value and the index to the ith nonzero
1223 		}
1224 		for(IT i=0; i < nind; ++i)
1225 		{
1226 			range2[i] = std::make_pair(static_cast<IT>(colnums[i]), 0);	// second is dummy as all the intersecting elements are copied from the first range
1227 		}
1228 
1229 		std::pair<IT,IT> * itr = set_intersection(range1, range1 + nzc, range2, range2+nind, isect, SpHelper::first_compare<IT> );
1230 		// isect now can iterate on a subset of the elements of range1
1231 		// meaning that the intersection can be accessed directly by isect[i] instead of range1[isect[i]]
1232 		// this is because the intersecting elements are COPIED to the output range "isect"
1233 
1234 		IT kisect = static_cast<IT>(itr-isect);		// size of the intersection
1235 		for(IT j=0, i =0; j< nind; ++j)
1236 		{
1237 			// the elements represented by jc[isect[i]] are a subset of the elements represented by colnums[j]
1238 			if( i == kisect || isect[i].first != static_cast<IT>(colnums[j]))
1239 			{
1240 				// not found, signal by setting first = second
1241 				colinds[j].first = 0;
1242 				colinds[j].second = 0;
1243 			}
1244 			else	// i < kisect && dcsc->jc[isect[i]] == colnums[j]
1245 			{
1246 				IT p = isect[i++].second;
1247 				colinds[j].first = cp[p];
1248 				colinds[j].second = cp[p+1];
1249 			}
1250 		}
1251 		DeleteAll(isect, range1, range2);
1252 	}
1253 	else	 	// use aux based indexing
1254 	{
1255 		bool found;
1256 		for(IT j =0; j< nind; ++j)
1257 		{
1258 			IT pos = AuxIndex(static_cast<IT>(colnums[j]), found, aux, csize);
1259 			if(found)
1260 			{
1261 				colinds[j].first = cp[pos];
1262 				colinds[j].second = cp[pos+1];
1263 			}
1264 			else 	// not found, signal by setting first = second
1265 			{
1266 				colinds[j].first = 0;
1267 				colinds[j].second = 0;
1268 			}
1269 		}
1270 	}
1271 }
1272 
1273 
1274 template <class IT, class NT>
~Dcsc()1275 Dcsc<IT,NT>::~Dcsc()
1276 {
1277 	if(nz > 0)			// dcsc may be empty
1278 	{
1279 		delete[] numx;
1280 		delete[] ir;
1281 	}
1282 	if(nzc > 0)
1283 	{
1284 		delete[] jc;
1285 		delete[] cp;
1286 	}
1287 }
1288 
1289 }
1290