1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc  --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 
31 #ifndef _SP_HELPER_H_
32 #define _SP_HELPER_H_
33 
34 #include <vector>
35 #include <limits>
36 #include <map>
37 #include <string>
38 #include <utility>
39 #include "SpDefs.h"
40 #include "StackEntry.h"
41 #include "promote.h"
42 #include "Isect.h"
43 #include "HeapEntry.h"
44 #include "SpImpl.h"
45 #include "hash.hpp"
46 
47 namespace combblas {
48 
49 template <class IT, class NT>
50 class Dcsc;
51 
52 class SpHelper
53 {
54 public:
55 
56     template <typename T>
find_order(const std::vector<T> & values)57     static std::vector<size_t> find_order(const std::vector<T> & values)
58     {
59         size_t index = 0;
60         std::vector< std::pair<T, size_t> > tosort;
61         for(auto & val: values)
62         {
63             tosort.push_back(std::make_pair(val,index++));
64         }
65         sort(tosort.begin(), tosort.end());
66         std::vector<size_t> permutation;
67         for(auto & sorted: tosort)
68         {
69             permutation.push_back(sorted.second);
70         }
71         return permutation;
72     }
73 
74     template <typename IT1, typename NT1, typename IT2, typename NT2>
75     static void push_to_vectors(std::vector<IT1> & rows, std::vector<IT1> & cols, std::vector<NT1> & vals, IT2 ii, IT2 jj, NT2 vv, int symmetric, bool onebased = true)
76     {
77         if(onebased)
78         {
79         	ii--;  /* adjust from 1-based to 0-based */
80         	jj--;
81         }
82         rows.push_back(ii);
83         cols.push_back(jj);
84         vals.push_back(vv);
85         if(symmetric && ii != jj)
86         {
87             rows.push_back(jj);
88             cols.push_back(ii);
89             vals.push_back(vv);
90         }
91     }
92 
ProcessLinesWithStringKeys(std::vector<std::map<std::string,uint64_t>> & allkeys,std::vector<std::string> & lines,int nprocs)93     static void ProcessLinesWithStringKeys(std::vector< std::map < std::string, uint64_t> > & allkeys, std::vector<std::string> & lines, int nprocs)
94     {
95     	std::string frstr, tostr;
96     	uint64_t frhash, tohash;
97     	double vv;
98     	for (auto itr=lines.begin(); itr != lines.end(); ++itr)
99     	{
100 		char fr[MAXVERTNAME];
101 		char to[MAXVERTNAME];
102 		sscanf(itr->c_str(), "%s %s %lg", fr, to, &vv);
103 		frstr = std::string(fr);
104 		tostr = std::string(to);
105 		MurmurHash3_x64_64(frstr.c_str(),frstr.size(),0,&frhash);
106 		MurmurHash3_x64_64(tostr.c_str(),tostr.size(),0,&tohash);
107 
108 		double range_fr = static_cast<double>(frhash) * static_cast<double>(nprocs);
109 		double range_to = static_cast<double>(tohash) * static_cast<double>(nprocs);
110     		size_t owner_fr = range_fr / static_cast<double>(std::numeric_limits<uint64_t>::max());
111     		size_t owner_to = range_to / static_cast<double>(std::numeric_limits<uint64_t>::max());
112 
113 		// cout << frstr << " with hash " << frhash << " is going to " << owner_fr << endl;
114 		// cout << tostr << " with hash " << tohash << " is going to " << owner_to << endl;
115 
116 		allkeys[owner_fr].insert(std::make_pair(frstr, frhash));
117 		allkeys[owner_to].insert(std::make_pair(tostr, tohash));
118    	}
119         lines.clear();
120     }
121 
122     template <typename IT1, typename NT1>
ProcessStrLinesNPermute(std::vector<IT1> & rows,std::vector<IT1> & cols,std::vector<NT1> & vals,std::vector<std::string> & lines,std::map<std::string,uint64_t> & ultperm)123     static void ProcessStrLinesNPermute(std::vector<IT1> & rows, std::vector<IT1> & cols, std::vector<NT1> & vals, std::vector<std::string> & lines, std::map<std::string, uint64_t> & ultperm)
124     {
125 	char * fr = new char[MAXVERTNAME];
126     	char * to = new char[MAXVERTNAME];
127     	std::string frstr, tostr;
128     	double vv;
129     	for (auto itr=lines.begin(); itr != lines.end(); ++itr)
130     	{
131 		sscanf(itr->c_str(), "%s %s %lg", fr, to, &vv);
132 		frstr = std::string(fr);
133 		tostr = std::string(to);
134 
135 		rows.emplace_back((IT1) ultperm[frstr]);
136 		cols.emplace_back((IT1) ultperm[tostr]);
137 		vals.emplace_back((NT1) vv);
138    	}
139 	delete [] fr;
140 	delete [] to;
141         lines.clear();
142     }
143 
144 
145 
146     template <typename IT1, typename NT1>
147     static void ProcessLines(std::vector<IT1> & rows, std::vector<IT1> & cols, std::vector<NT1> & vals, std::vector<std::string> & lines, int symmetric, int type, bool onebased = true)
148     {
149         if(type == 0)   // real
150         {
151             int64_t ii, jj;
152             double vv;
153             for (auto itr=lines.begin(); itr != lines.end(); ++itr)
154             {
155                 // string::c_str() -> Returns a pointer to an array that contains a null-terminated sequence of characters (i.e., a C-string)
156                 sscanf(itr->c_str(), "%lld %lld %lg", &ii, &jj, &vv);
157                 SpHelper::push_to_vectors(rows, cols, vals, ii, jj, vv, symmetric, onebased);
158             }
159         }
160         else if(type == 1) // integer
161         {
162             int64_t ii, jj, vv;
163             for (auto itr=lines.begin(); itr != lines.end(); ++itr)
164             {
165                 sscanf(itr->c_str(), "%lld %lld %lld", &ii, &jj, &vv);
166                 SpHelper::push_to_vectors(rows, cols, vals, ii, jj, vv, symmetric, onebased);
167             }
168         }
169         else if(type == 2) // pattern
170         {
171             int64_t ii, jj;
172             for (auto itr=lines.begin(); itr != lines.end(); ++itr)
173             {
174                 sscanf(itr->c_str(), "%lld %lld", &ii, &jj);
175                 SpHelper::push_to_vectors(rows, cols, vals, ii, jj, 1, symmetric, onebased);
176             }
177         }
178         else
179         {
180             std::cout << "COMBBLAS: Unrecognized matrix market scalar type" << std::endl;
181         }
182         lines.clear();
183     }
184 
185 
186 	template <typename T>
p2a(const std::vector<T> & v)187 	static const T * p2a (const std::vector<T> & v)   // pointer to array
188 	{
189     		if(v.empty()) return NULL;
190     		else return (&v[0]);
191 	}
192 
193 	template <typename T>
p2a(std::vector<T> & v)194 	static T * p2a (std::vector<T> & v)   // pointer to array
195 	{
196     		if(v.empty()) return NULL;
197     		else return (&v[0]);
198 	}
199 
200 
201 	template<typename _ForwardIterator>
is_sorted(_ForwardIterator __first,_ForwardIterator __last)202 	static bool is_sorted(_ForwardIterator __first, _ForwardIterator __last)
203 	{
204       		if (__first == __last)
205         		return true;
206 
207       		_ForwardIterator __next = __first;
208       		for (++__next; __next != __last; __first = __next, ++__next)
209         		if (*__next < *__first)
210           			return false;
211       		return true;
212     	}
213   	template<typename _ForwardIterator, typename _StrictWeakOrdering>
is_sorted(_ForwardIterator __first,_ForwardIterator __last,_StrictWeakOrdering __comp)214     	static bool is_sorted(_ForwardIterator __first, _ForwardIterator __last,  _StrictWeakOrdering __comp)
215     	{
216       		if (__first == __last)
217         		return true;
218 
219 		_ForwardIterator __next = __first;
220 		for (++__next; __next != __last; __first = __next, ++__next)
221 			if (__comp(*__next, *__first))
222           			return false;
223       		return true;
224 	}
225 	template<typename _ForwardIter, typename T>
iota(_ForwardIter __first,_ForwardIter __last,T __val)226 	static void iota(_ForwardIter __first, _ForwardIter __last, T __val)
227 	{
228 		while (__first != __last)
229 	     		*__first++ = __val++;
230 	}
231 	template<typename In, typename Out, typename UnPred>
copyIf(In first,In last,Out result,UnPred pred)232 	static Out copyIf(In first, In last, Out result, UnPred pred)
233 	{
234    		for ( ;first != last; ++first)
235       			if (pred(*first))
236          			*result++ = *first;
237    		return(result);
238 	}
239 
240 	template<typename T, typename I1, typename I2>
allocate2D(I1 m,I2 n)241 	static T ** allocate2D(I1 m, I2 n)
242 	{
243 		T ** array = new T*[m];
244 		for(I1 i = 0; i<m; ++i)
245 			array[i] = new T[n];
246 		return array;
247 	}
248 	template<typename T, typename I>
deallocate2D(T ** array,I m)249 	static void deallocate2D(T ** array, I m)
250 	{
251 		for(I i = 0; i<m; ++i)
252 			delete [] array[i];
253 		delete [] array;
254 	}
255 
256 
257 	template <typename SR, typename NT1, typename NT2, typename IT, typename OVT>
258 	static IT Popping(NT1 * numA, NT2 * numB, StackEntry< OVT, std::pair<IT,IT> > * multstack,
259 		 	IT & cnz, KNHeap< std::pair<IT,IT> , IT > & sHeap, Isect<IT> * isect1, Isect<IT> * isect2);
260 
261 	template <typename IT, typename NT1, typename NT2>
262 	static void SpIntersect(const Dcsc<IT,NT1> & Adcsc, const Dcsc<IT,NT2> & Bdcsc, Isect<IT>* & cols, Isect<IT>* & rows,
263 			Isect<IT>* & isect1, Isect<IT>* & isect2, Isect<IT>* & itr1, Isect<IT>* & itr2);
264 
265 	template <typename SR, typename IT, typename NT1, typename NT2, typename OVT>
266 	static IT SpCartesian(const Dcsc<IT,NT1> & Adcsc, const Dcsc<IT,NT2> & Bdcsc, IT kisect, Isect<IT> * isect1,
267 			Isect<IT> * isect2, StackEntry< OVT, std::pair<IT,IT> > * & multstack);
268 
269 	template <typename SR, typename IT, typename NT1, typename NT2, typename OVT>
270 	static IT SpColByCol(const Dcsc<IT,NT1> & Adcsc, const Dcsc<IT,NT2> & Bdcsc, IT nA,
271 			StackEntry< OVT, std::pair<IT,IT> > * & multstack);
272 
273 	template <typename NT, typename IT>
ShrinkArray(NT * & array,IT newsize)274 	static void ShrinkArray(NT * & array, IT newsize)
275 	{
276 		NT * narray = new NT[newsize];
277 		memcpy(narray, array, newsize*sizeof(NT));	// copy only a portion of the old elements
278 
279 		delete [] array;
280 		array = narray;
281 	}
282 
283 	template <typename NT, typename IT>
DoubleStack(StackEntry<NT,std::pair<IT,IT>> * & multstack,IT & cnzmax,IT add)284 	static void DoubleStack(StackEntry<NT, std::pair<IT,IT> > * & multstack, IT & cnzmax, IT add)
285 	{
286 		StackEntry<NT, std::pair<IT,IT> > * tmpstack = multstack;
287 		multstack = new StackEntry<NT, std::pair<IT,IT> >[2* cnzmax + add];
288 		memcpy(multstack, tmpstack, sizeof(StackEntry<NT, std::pair<IT,IT> >) * cnzmax);
289 
290 		cnzmax = 2*cnzmax + add;
291 		delete [] tmpstack;
292 	}
293 
294 	template <typename IT>
first_compare(std::pair<IT,IT> pair1,std::pair<IT,IT> pair2)295 	static bool first_compare(std::pair<IT, IT> pair1, std::pair<IT, IT> pair2)
296 	{ return pair1.first < pair2.first; }
297 
298 };
299 
300 
301 
302 
303 /**
304  * Pop an element, do the numerical semiring multiplication & insert the result into multstack
305  */
306 template <typename SR, typename NT1, typename NT2, typename IT, typename OVT>
Popping(NT1 * numA,NT2 * numB,StackEntry<OVT,std::pair<IT,IT>> * multstack,IT & cnz,KNHeap<std::pair<IT,IT>,IT> & sHeap,Isect<IT> * isect1,Isect<IT> * isect2)307 IT SpHelper::Popping(NT1 * numA, NT2 * numB, StackEntry< OVT, std::pair<IT,IT> > * multstack,
308 			IT & cnz, KNHeap< std::pair<IT,IT>,IT > & sHeap, Isect<IT> * isect1, Isect<IT> * isect2)
309 {
310 	std::pair<IT,IT> key;
311 	IT inc;
312 	sHeap.deleteMin(&key, &inc);
313 
314 	OVT value = SR::multiply(numA[isect1[inc].current], numB[isect2[inc].current]);
315 	if (!SR::returnedSAID())
316 	{
317 		if(cnz != 0)
318 		{
319 			if(multstack[cnz-1].key == key)	// already exists
320 			{
321 				multstack[cnz-1].value = SR::add(multstack[cnz-1].value, value);
322 			}
323 			else
324 			{
325 				multstack[cnz].value = value;
326 				multstack[cnz].key   = key;
327 				++cnz;
328 			}
329 		}
330 		else
331 		{
332 			multstack[cnz].value = value;
333 			multstack[cnz].key   = key;
334 			++cnz;
335 		}
336 	}
337 	return inc;
338 }
339 
340 /**
341   * Finds the intersecting row indices of Adcsc and col indices of Bdcsc
342   * @param[IT] Bdcsc {the transpose of the dcsc structure of matrix B}
343   * @param[IT] Adcsc {the dcsc structure of matrix A}
344   **/
345 template <typename IT, typename NT1, typename NT2>
SpIntersect(const Dcsc<IT,NT1> & Adcsc,const Dcsc<IT,NT2> & Bdcsc,Isect<IT> * & cols,Isect<IT> * & rows,Isect<IT> * & isect1,Isect<IT> * & isect2,Isect<IT> * & itr1,Isect<IT> * & itr2)346 void SpHelper::SpIntersect(const Dcsc<IT,NT1> & Adcsc, const Dcsc<IT,NT2> & Bdcsc, Isect<IT>* & cols, Isect<IT>* & rows,
347 				Isect<IT>* & isect1, Isect<IT>* & isect2, Isect<IT>* & itr1, Isect<IT>* & itr2)
348 {
349 	cols = new Isect<IT>[Adcsc.nzc];
350 	rows = new Isect<IT>[Bdcsc.nzc];
351 
352 	for(IT i=0; i < Adcsc.nzc; ++i)
353 	{
354 		cols[i].index	= Adcsc.jc[i];		// column index
355 		cols[i].size	= Adcsc.cp[i+1] - Adcsc.cp[i];
356 		cols[i].start	= Adcsc.cp[i];		// pointer to row indices
357 		cols[i].current = Adcsc.cp[i];		// pointer to row indices
358 	}
359 	for(IT i=0; i < Bdcsc.nzc; ++i)
360 	{
361 		rows[i].index	= Bdcsc.jc[i];		// column index
362 		rows[i].size	= Bdcsc.cp[i+1] - Bdcsc.cp[i];
363 		rows[i].start	= Bdcsc.cp[i];		// pointer to row indices
364 		rows[i].current = Bdcsc.cp[i];		// pointer to row indices
365 	}
366 
367 	/* A single set_intersection would only return the elements of one sequence
368 	 * But we also want random access to the other array's elements
369 	 * Thus we do the intersection twice
370 	 */
371 	IT mink = std::min(Adcsc.nzc, Bdcsc.nzc);
372 	isect1 = new Isect<IT>[mink];	// at most
373 	isect2 = new Isect<IT>[mink];	// at most
374 	itr1 = std::set_intersection(cols, cols + Adcsc.nzc, rows, rows + Bdcsc.nzc, isect1);
375 	itr2 = std::set_intersection(rows, rows + Bdcsc.nzc, cols, cols + Adcsc.nzc, isect2);
376 	// itr1 & itr2 are now pointing to one past the end of output sequences
377 }
378 
379 /**
380  * Performs cartesian product on the dcsc structures.
381  * Indices to perform the product are given by isect1 and isect2 arrays
382  * Returns the "actual" number of elements in the merged stack
383  * Bdcsc is "already transposed" (i.e. Bdcsc->ir gives column indices, and Bdcsc->jc gives row indices)
384  **/
385 template <typename SR, typename IT, typename NT1, typename NT2, typename OVT>
SpCartesian(const Dcsc<IT,NT1> & Adcsc,const Dcsc<IT,NT2> & Bdcsc,IT kisect,Isect<IT> * isect1,Isect<IT> * isect2,StackEntry<OVT,std::pair<IT,IT>> * & multstack)386 IT SpHelper::SpCartesian(const Dcsc<IT,NT1> & Adcsc, const Dcsc<IT,NT2> & Bdcsc, IT kisect, Isect<IT> * isect1,
387 		Isect<IT> * isect2, StackEntry< OVT, std::pair<IT,IT> > * & multstack)
388 {
389 	std::pair<IT,IT> supremum(std::numeric_limits<IT>::max(), std::numeric_limits<IT>::max());
390 	std::pair<IT,IT> infimum (std::numeric_limits<IT>::min(), std::numeric_limits<IT>::min());
391 
392 	KNHeap< std::pair<IT,IT> , IT > sHeapDcsc(supremum, infimum);
393 
394 	// Create a sequence heap that will eventually construct DCSC of C
395 	// The key to sort is pair<col_ind, row_ind> so that output is in column-major order
396 	for(IT i=0; i< kisect; ++i)
397 	{
398 		std::pair<IT,IT> key(Bdcsc.ir[isect2[i].current], Adcsc.ir[isect1[i].current]);
399 		sHeapDcsc.insert(key, i);
400 	}
401 
402 	IT cnz = 0;
403 	IT cnzmax = Adcsc.nz + Bdcsc.nz;	// estimate on the size of resulting matrix C
404 	multstack = new StackEntry< OVT, std::pair<IT,IT> > [cnzmax];
405 
406 	bool finished = false;
407 	while(!finished)		// multiplication loop  (complexity O(flops * log (kisect))
408 	{
409 		finished = true;
410 		if (cnz + kisect > cnzmax)		// double the size of multstack
411 		{
412 			DoubleStack(multstack, cnzmax, kisect);
413 		}
414 
415 		// inc: the list to increment its pointer in the k-list merging
416 		IT inc = Popping< SR >(Adcsc.numx, Bdcsc.numx, multstack, cnz, sHeapDcsc, isect1, isect2);
417 		isect1[inc].current++;
418 
419 		if(isect1[inc].current < isect1[inc].size + isect1[inc].start)
420 		{
421 			std::pair<IT,IT> key(Bdcsc.ir[isect2[inc].current], Adcsc.ir[isect1[inc].current]);
422 			sHeapDcsc.insert(key, inc);	// push the same element with a different key [increasekey]
423 			finished = false;
424 		}
425 		// No room to go in isect1[], but there is still room to go in isect2[i]
426 		else if(isect2[inc].current + 1 < isect2[inc].size + isect2[inc].start)
427 		{
428 			isect1[inc].current = isect1[inc].start;	// wrap-around
429 			isect2[inc].current++;
430 
431 			std::pair<IT,IT> key(Bdcsc.ir[isect2[inc].current], Adcsc.ir[isect1[inc].current]);
432 			sHeapDcsc.insert(key, inc);	// push the same element with a different key [increasekey]
433 			finished = false;
434 		}
435 		else // don't push, one of the lists has been deplated
436 		{
437 			kisect--;
438 			if(kisect != 0)
439 			{
440 				finished = false;
441 			}
442 		}
443 	}
444 	return cnz;
445 }
446 
447 
448 template <typename SR, typename IT, typename NT1, typename NT2, typename OVT>
SpColByCol(const Dcsc<IT,NT1> & Adcsc,const Dcsc<IT,NT2> & Bdcsc,IT nA,StackEntry<OVT,std::pair<IT,IT>> * & multstack)449 IT SpHelper::SpColByCol(const Dcsc<IT,NT1> & Adcsc, const Dcsc<IT,NT2> & Bdcsc, IT nA,
450 			StackEntry< OVT, std::pair<IT,IT> > * & multstack)
451 {
452 	IT cnz = 0;
453 	IT cnzmax = Adcsc.nz + Bdcsc.nz;	// estimate on the size of resulting matrix C
454 	multstack = new StackEntry<OVT, std::pair<IT,IT> >[cnzmax];
455 
456 	float cf  = static_cast<float>(nA+1) / static_cast<float>(Adcsc.nzc);
457 	IT csize = static_cast<IT>(ceil(cf));   // chunk size
458 	IT * aux;
459 	//IT auxsize = Adcsc.ConstructAux(nA, aux);
460 	Adcsc.ConstructAux(nA, aux);
461 
462 	for(IT i=0; i< Bdcsc.nzc; ++i)		// for all the columns of B
463 	{
464 		IT prevcnz = cnz;
465 		IT nnzcol = Bdcsc.cp[i+1] - Bdcsc.cp[i];
466 		HeapEntry<IT, NT1> * wset = new HeapEntry<IT, NT1>[nnzcol];
467 		// heap keys are just row indices (IT)
468 		// heap values are <numvalue, runrank>
469 		// heap size is nnz(B(:,i)
470 
471 		// colnums vector keeps column numbers requested from A
472 		std::vector<IT> colnums(nnzcol);
473 
474 		// colinds.first vector keeps indices to A.cp, i.e. it dereferences "colnums" vector (above),
475 		// colinds.second vector keeps the end indices (i.e. it gives the index to the last valid element of A.cpnack)
476 		std::vector< std::pair<IT,IT> > colinds(nnzcol);
477     std::copy(Bdcsc.ir + Bdcsc.cp[i], Bdcsc.ir + Bdcsc.cp[i+1], colnums.begin());
478 
479 		Adcsc.FillColInds(&colnums[0], colnums.size(), colinds, aux, csize);
480 		IT maxnnz = 0;	// max number of nonzeros in C(:,i)
481 		IT hsize = 0;
482 
483 		for(IT j = 0; (unsigned)j < colnums.size(); ++j)		// create the initial heap
484 		{
485 			if(colinds[j].first != colinds[j].second)	// current != end
486 			{
487 				wset[hsize++] = HeapEntry< IT,NT1 > (Adcsc.ir[colinds[j].first], j, Adcsc.numx[colinds[j].first]);
488 				maxnnz += colinds[j].second - colinds[j].first;
489 			}
490 		}
491 		std::make_heap(wset, wset+hsize);
492 
493 		if (cnz + maxnnz > cnzmax)		// double the size of multstack
494 		{
495 			SpHelper::DoubleStack(multstack, cnzmax, maxnnz);
496 		}
497 
498 		// No need to keep redefining key and hentry with each iteration of the loop
499 		while(hsize > 0)
500 		{
501 			std::pop_heap(wset, wset + hsize);         // result is stored in wset[hsize-1]
502 			IT locb = wset[hsize-1].runr;	// relative location of the nonzero in B's current column
503 
504 			// type promotion done here:
505 			// static T_promote multiply(const T1 & arg1, const T2 & arg2)
506 			//	return (static_cast<T_promote>(arg1) * static_cast<T_promote>(arg2) );
507 			OVT mrhs = SR::multiply(wset[hsize-1].num, Bdcsc.numx[Bdcsc.cp[i]+locb]);
508 			if (!SR::returnedSAID())
509 			{
510 				if(cnz != prevcnz && multstack[cnz-1].key.second == wset[hsize-1].key)	// if (cnz == prevcnz) => first nonzero for this column
511 				{
512 					multstack[cnz-1].value = SR::add(multstack[cnz-1].value, mrhs);
513 				}
514 				else
515 				{
516 					multstack[cnz].value = mrhs;
517 					multstack[cnz++].key = std::make_pair(Bdcsc.jc[i], wset[hsize-1].key);
518 					// first entry is the column index, as it is in column-major order
519 				}
520 			}
521 
522 			if( (++(colinds[locb].first)) != colinds[locb].second)	// current != end
523 			{
524 				// runr stays the same !
525 				wset[hsize-1].key = Adcsc.ir[colinds[locb].first];
526 				wset[hsize-1].num = Adcsc.numx[colinds[locb].first];
527 				std::push_heap(wset, wset+hsize);
528 			}
529 			else
530 			{
531 				--hsize;
532 			}
533 		}
534 		delete [] wset;
535 	}
536 	delete [] aux;
537 	return cnz;
538 }
539 
540 }
541 
542 #endif
543