1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc  --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 #ifndef _SP_IMPL_H_
31 #define _SP_IMPL_H_
32 
33 #include <iostream>
34 #include <vector>
35 #include "PreAllocatedSPA.h"
36 #include "Deleter.h"
37 
38 namespace combblas {
39 
40 template <class IT, class NT>
41 class Dcsc;
42 
43 template <class IT, class NT>
44 class Csc;
45 
46 template <class SR, class IT, class NUM, class IVT, class OVT>
47 struct SpImpl;
48 
49 //! Overload #1: DCSC
50 template <class SR, class IT, class NUM, class IVT, class OVT>
SpMXSpV(const Dcsc<IT,NUM> & Adcsc,int32_t mA,const int32_t * indx,const IVT * numx,int32_t veclen,std::vector<int32_t> & indy,std::vector<OVT> & numy,PreAllocatedSPA<OVT> & SPA)51 void SpMXSpV(const Dcsc<IT,NUM> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
52 			 std::vector<int32_t> & indy, std::vector< OVT > & numy, PreAllocatedSPA<OVT> & SPA)
53 {
54 	// ignoring SPA for now. However, a branching similar to the CSC case can be implemented
55     SpImpl<SR,IT,NUM,IVT,OVT>::SpMXSpV(Adcsc, mA, indx, numx, veclen, indy, numy);	// don't touch this
56 };
57 
58 
59 //! Overload #2: DCSC
60 template <class SR, class IT, class NUM, class IVT, class OVT>
SpMXSpV(const Dcsc<IT,NUM> & Adcsc,int32_t mA,const int32_t * indx,const IVT * numx,int32_t veclen,int32_t * indy,OVT * numy,int * cnts,int * dspls,int p_c)61 void SpMXSpV(const Dcsc<IT,NUM> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
62 			 int32_t * indy, OVT * numy, int * cnts, int * dspls, int p_c)
63 {
64 	SpImpl<SR,IT,NUM,IVT,OVT>::SpMXSpV(Adcsc, mA, indx, numx, veclen, indy, numy, cnts, dspls,p_c);	// don't touch this
65 };
66 
67 
68 //! Overload #3: DCSC
69 template <class SR, class IT, class NUM, class IVT, class OVT>
SpMXSpV_ForThreading(const Dcsc<IT,NUM> & Adcsc,int32_t mA,const int32_t * indx,const IVT * numx,int32_t veclen,std::vector<int32_t> & indy,std::vector<OVT> & numy,int32_t offset)70 void SpMXSpV_ForThreading(const Dcsc<IT,NUM> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
71                           std::vector<int32_t> & indy, std::vector< OVT > & numy, int32_t offset)
72 {
73     SpImpl<SR,IT,NUM,IVT,OVT>::SpMXSpV_ForThreading(Adcsc, mA, indx, numx, veclen, indy, numy, offset);	// don't touch this
74 };
75 
76 //! Overload #4: DCSC w/ preallocated SPA
77 template <class SR, class IT, class NUM, class IVT, class OVT>
SpMXSpV_ForThreading(const Dcsc<IT,NUM> & Adcsc,int32_t mA,const int32_t * indx,const IVT * numx,int32_t veclen,std::vector<int32_t> & indy,std::vector<OVT> & numy,int32_t offset,std::vector<OVT> & localy,BitMap & isthere,std::vector<uint32_t> & nzinds)78 void SpMXSpV_ForThreading(const Dcsc<IT,NUM> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
79                           std::vector<int32_t> & indy, std::vector< OVT > & numy, int32_t offset, std::vector<OVT> & localy, BitMap & isthere, std::vector<uint32_t> & nzinds)
80 {
81     SpImpl<SR,IT,NUM,IVT,OVT>::SpMXSpV_ForThreading(Adcsc, mA, indx, numx, veclen, indy, numy, offset, localy, isthere, nzinds);
82 };
83 
84 
85 
86 
87 
88 
89 
90 
91 
92 /*
93  The following two functions are base CSC implementation. All overloaded function calls will be routed to these functions.
94  */
95 // all CSC will fall to this
96 template <typename SR, typename IT, typename NUM, typename IVT, typename OVT>
97 void SpMXSpV_HeapSort(const Csc<IT,NUM> & Acsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen, std::vector<int32_t> & indy, std::vector<OVT> & numy, int32_t offset);
98 
99 // all PreAllocatedSPA will fall to this
100 template <class SR, class IT, class NUM, class IVT, class OVT>
101 void SpMXSpV_Bucket(const Csc<IT,NUM> & Acsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,std::vector<int32_t> & indy, std::vector< OVT > & numy, PreAllocatedSPA<OVT> & SPA);
102 
103 
104 
105 //! Overload #1: CSC
106 template <class SR, class IT, class NUM, class IVT, class OVT>
SpMXSpV(const Csc<IT,NUM> & Acsc,int32_t mA,const int32_t * indx,const IVT * numx,int32_t veclen,int32_t * indy,OVT * numy,int * cnts,int * dspls,int p_c)107 void SpMXSpV(const Csc<IT,NUM> & Acsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
108              int32_t * indy, OVT * numy, int * cnts, int * dspls, int p_c)
109 {
110         std::cout << "Optbuf enabled version is not yet supported with CSC matrices" << std::endl;
111 };
112 
113 
114 //! Overload #2: CSC
115 template <class SR, class IT, class NUM, class IVT, class OVT>
SpMXSpV(const Csc<IT,NUM> & Acsc,int32_t mA,const int32_t * indx,const IVT * numx,int32_t veclen,std::vector<int32_t> & indy,std::vector<OVT> & numy,PreAllocatedSPA<OVT> & SPA)116 void SpMXSpV(const Csc<IT,NUM> & Acsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
117              std::vector<int32_t> & indy, std::vector< OVT > & numy, PreAllocatedSPA<OVT> & SPA)
118 {
119     if(SPA.initialized)
120         SpMXSpV_Bucket<SR>(Acsc, mA, indx, numx, veclen, indy, numy, SPA);
121     else
122         SpMXSpV_HeapSort<SR>(Acsc, mA, indx, numx, veclen, indy, numy, 0);
123 
124 };
125 
126 //! Overload #3: CSC
127 template <class SR, class IT, class NUM, class IVT, class OVT>
SpMXSpV_ForThreading(const Csc<IT,NUM> & Acsc,int32_t mA,const int32_t * indx,const IVT * numx,int32_t veclen,std::vector<int32_t> & indy,std::vector<OVT> & numy,int32_t offset)128 void SpMXSpV_ForThreading(const Csc<IT,NUM> & Acsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
129                           std::vector<int32_t> & indy, std::vector< OVT > & numy, int32_t offset)
130 {
131     SpMXSpV_HeapSort<SR>(Acsc, mA, indx, numx, veclen, indy, numy, offset);
132 };
133 
134 //! Overload #4: CSC w/ preallocated SPA
135 template <class SR, class IT, class NUM, class IVT, class OVT>
SpMXSpV_ForThreading(const Csc<IT,NUM> & Acsc,int32_t mA,const int32_t * indx,const IVT * numx,int32_t veclen,std::vector<int32_t> & indy,std::vector<OVT> & numy,int32_t offset,std::vector<OVT> & localy,BitMap & isthere,std::vector<uint32_t> & nzinds)136 void SpMXSpV_ForThreading(const Csc<IT,NUM> & Acsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
137                           std::vector<int32_t> & indy, std::vector< OVT > & numy, int32_t offset, std::vector<OVT> & localy, BitMap & isthere, std::vector<uint32_t> & nzinds)
138 {
139 
140     SpMXSpV_HeapSort<SR>(Acsc, mA, indx, numx, veclen, indy, numy, offset);
141     // We can eventually call SpMXSpV_HeapMerge or SpMXSpV_SPA (not implemented for CSC yet)
142 };
143 
144 
145 
146 
147 
148 /**
149  * IT: The sparse matrix index type. Sparse vector index type is fixed to be int32_t
150  * It is the caller function's (inside ParFriends/Friends) job to convert any different types
151  * and ensure correctness. Rationale is efficiency, and the fact that we know for sure
152  * that 32-bit LOCAL indices are sufficient for all reasonable concurrencies and data sizes (as of 2011)
153  * \todo: As of 2015, this might not be true!!! (ABAB)
154  **/
155 template <class SR, class IT, class NUM, class IVT, class OVT>
156 struct SpImpl
157 {
158     static void SpMXSpV(const Dcsc<IT,NUM> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
159                         std::vector<int32_t> & indy, std::vector< OVT > & numy);	// specialize this
160 
SpMXSpVSpImpl161     static void SpMXSpV(const Dcsc<IT,NUM> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
162                         int32_t * indy, OVT * numy, int * cnts, int * dspls, int p_c)
163     {
164         std::cout << "Optbuf enabled version is not yet supported with general (non-boolean) matrices" << std::endl;
165     };
166 
167 
SpMXSpV_ForThreadingSpImpl168     static void SpMXSpV_ForThreading(const Dcsc<IT,NUM> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
169                                      std::vector<int32_t> & indy, std::vector<OVT> & numy, int32_t offset)
170     {
171         std::cout << "Threaded version is not yet supported with general (non-boolean) matrices" << std::endl;
172     };
SpMXSpV_ForThreadingSpImpl173 	static void SpMXSpV_ForThreading(const Dcsc<IT,NUM> & Acsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
174 									 std::vector<int32_t> & indy, std::vector<OVT> & numy, int32_t offset, std::vector<OVT> & localy, BitMap & isthere, std::vector<uint32_t> & nzinds)
175 	{
176 		std::cout << "Threaded version is not yet supported with general (non-boolean) matrices" << std::endl;
177 	};
178 };
179 
180 
181 
182 
183 template <class SR, class IT, class IVT, class OVT>
184 struct SpImpl<SR,IT,bool, IVT, OVT>	// specialization
185 {
186     static void SpMXSpV(const Dcsc<IT,bool> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
187                         std::vector<int32_t> & indy, std::vector< OVT > & numy);
188 
189     static void SpMXSpV(const Dcsc<IT,bool> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
190                         int32_t * indy, OVT * numy, int * cnts, int * dspls, int p_c);
191 
192     //! Dcsc and vector index types do not need to match
193     static void SpMXSpV_ForThreading(const Dcsc<IT,bool> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
194                                      std::vector<int32_t> & indy, std::vector<OVT> & numy, int32_t offset);
195     //! Dcsc and vector index types do not need to match
196     static void SpMXSpV_ForThreading(const Dcsc<IT,bool> & Adcsc, int32_t mA, const int32_t * indx, const IVT * numx, int32_t veclen,
197                                      std::vector<int32_t> & indy, std::vector<OVT> & numy, int32_t offset, std::vector<OVT> & localy, BitMap & isthere, std::vector<uint32_t> & nzinds);
198 };
199 
200 }
201 
202 #include "SpImpl.cpp"
203 
204 #endif