1 /****************************************************************/
2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */
3 /* version 1.6 -------------------------------------------------*/
4 /* date: 6/15/2017 ---------------------------------------------*/
5 /* authors: Ariful Azad, Aydin Buluc  --------------------------*/
6 /****************************************************************/
7 /*
8  Copyright (c) 2010-2017, The Regents of the University of California
9 
10  Permission is hereby granted, free of charge, to any person obtaining a copy
11  of this software and associated documentation files (the "Software"), to deal
12  in the Software without restriction, including without limitation the rights
13  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  copies of the Software, and to permit persons to whom the Software is
15  furnished to do so, subject to the following conditions:
16 
17  The above copyright notice and this permission notice shall be included in
18  all copies or substantial portions of the Software.
19 
20  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26  THE SOFTWARE.
27  */
28 
29 
30 #ifndef _PRE_ALLOCATED_SPA_H
31 #define _PRE_ALLOCATED_SPA_H
32 #include "BitMap.h"
33 
34 namespace combblas {
35 
36 /**
37   * This special data structure is used for optimizing BFS iterations
38   * by providing a pre-allocated SPA data structure
39   */
40 
41 template <class OVT > // output value type
42 class PreAllocatedSPA
43 {
44 public:
PreAllocatedSPA()45     PreAllocatedSPA():initialized(false) {};   // hide default constructor
46 
47     template <class LMAT>
PreAllocatedSPA(LMAT & A)48     PreAllocatedSPA(LMAT & A):initialized(true)  // the one and only constructor
49 	{
50         int64_t mA = A.getnrow();
51         if( A.getnsplit() > 0)  // multithreaded
52         {
53             int64_t perpiece =  mA / A.getnsplit();
54             for(int i=0; i<A.getnsplit(); ++i)
55             {
56                 if(i != A.getnsplit()-1)
57                 {
58                     V_isthere.push_back(BitMap(perpiece));
59                     V_localy.push_back(std::vector<OVT>(perpiece));
60 
61                     std::vector<bool> isthere(perpiece, false);
62                     for(auto colit = A.begcol(i); colit != A.endcol(i); ++colit)
63                     {
64                         for(auto nzit = A.begnz(colit,i); nzit != A.endnz(colit,i); ++nzit)
65                         {
66                             size_t rowid = nzit.rowid();
67                             if(!isthere[rowid])     isthere[rowid] = true;
68                         }
69                     }
70                     size_t maxvector = std::count(isthere.begin(), isthere.end(), true);
71                     V_inds.push_back(std::vector<uint32_t>(maxvector));
72                 }
73                 else
74                 {
75                     V_isthere.push_back(BitMap(mA - i*perpiece));
76                     V_localy.push_back(std::vector<OVT>(mA - i*perpiece));
77 
78                     std::vector<bool> isthere(mA - i*perpiece, false);
79                     for(auto colit = A.begcol(i); colit != A.endcol(i); ++colit)
80                     {
81                         for(auto nzit = A.begnz(colit,i); nzit != A.endnz(colit,i); ++nzit)
82                         {
83                             size_t rowid = nzit.rowid();
84                             if(!isthere[rowid])     isthere[rowid] = true;
85                         }
86                     }
87                     size_t maxvector = std::count(isthere.begin(), isthere.end(), true);
88                     V_inds.push_back(std::vector<uint32_t>(maxvector));
89                 }
90             }
91         }
92         else    // single threaded
93         {
94             V_isthere.push_back(BitMap(mA));
95             V_localy.push_back(std::vector<OVT>(mA));
96 
97             std::vector<bool> isthere(mA, false);
98             for(auto colit = A.begcol(); colit != A.endcol(); ++colit)
99             {
100                 for(auto nzit = A.begnz(colit); nzit != A.endnz(colit); ++nzit)
101                 {
102                     size_t rowid = nzit.rowid();
103                     if(!isthere[rowid])     isthere[rowid] = true;
104                 }
105             }
106             size_t maxvector = std::count(isthere.begin(), isthere.end(), true);
107             V_inds.push_back(std::vector<uint32_t>(maxvector));
108 
109         }
110     };
111 
112     // for manual splitting. just a hack. need to be fixed
113 
114     template <class LMAT>
PreAllocatedSPA(LMAT & A,int splits)115     PreAllocatedSPA(LMAT & A, int splits):initialized(true)
116     {
117         buckets = splits;
118         int64_t mA = A.getnrow();
119         V_isthere.push_back(BitMap(mA));
120         V_localy.push_back(std::vector<OVT>(mA));
121         V_inds.push_back(std::vector<uint32_t>(mA)); // for better indexing among threads
122 
123 
124 
125 
126 
127         std::vector<int32_t> nnzSplitA(buckets,0);
128         int32_t rowPerSplit = mA / splits;
129 
130 
131         //per thread because writing vector<bool> is not thread safe
132         for(int i=0; i<splits-1; i++)
133             V_isthereBool.push_back(std::vector<bool>(rowPerSplit));
134          V_isthereBool.push_back(std::vector<bool>(mA - (splits-1)*rowPerSplit));
135 
136 
137         //vector<bool> isthere(mA, false);
138         for(auto colit = A.begcol(); colit != A.endcol(); ++colit)
139         {
140             for(auto nzit = A.begnz(colit); nzit != A.endnz(colit); ++nzit)
141             {
142                 size_t rowid = nzit.rowid();
143                 //if(!isthere[rowid])     isthere[rowid] = true;
144                 size_t splitId = (rowid/rowPerSplit > splits-1) ? splits-1 : rowid/rowPerSplit;
145                 nnzSplitA[splitId]++;
146             }
147         }
148 
149 
150         // prefix sum
151         disp.resize(splits+1);
152         disp[0] = 0;
153         for(int i=0; i<splits; i++)
154         {
155             disp[i+1] = disp[i] + nnzSplitA[i];
156         }
157 
158         indSplitA.resize(disp[splits]);
159         numSplitA.resize(disp[splits]);
160 
161 
162     };
163 
164     // initialize an uninitialized SPA
165     template <class LMAT>
Init(LMAT & A,int splits)166     void Init(LMAT & A, int splits) // not done for DCSC matrices with A.getnsplit()
167     {
168         if(!initialized)
169         {
170             initialized = true;
171             buckets = splits;
172             int64_t mA = A.getnrow();
173             V_isthere.push_back(BitMap(mA));
174             V_localy.push_back(std::vector<OVT>(mA));
175             V_inds.push_back(std::vector<uint32_t>(mA)); // for better indexing among threads
176 
177             std::vector<int32_t> nnzSplitA(buckets,0);
178             int32_t rowPerSplit = mA / splits;
179 
180             for(int i=0; i<splits-1; i++)
181                 V_isthereBool.push_back(std::vector<bool>(rowPerSplit));
182             V_isthereBool.push_back(std::vector<bool>(mA - (splits-1)*rowPerSplit));
183 
184             //vector<bool> isthere(mA, false);
185             for(auto colit = A.begcol(); colit != A.endcol(); ++colit)
186             {
187                 for(auto nzit = A.begnz(colit); nzit != A.endnz(colit); ++nzit)
188                 {
189                     size_t rowid = nzit.rowid();
190                     //if(!isthere[rowid])     isthere[rowid] = true;
191                     size_t splitId = (rowid/rowPerSplit > splits-1) ? splits-1 : rowid/rowPerSplit;
192                     nnzSplitA[splitId]++;
193                 }
194             }
195 
196 
197             // prefix sum
198             disp.resize(splits+1);
199             disp[0] = 0;
200             for(int i=0; i<splits; i++)
201             {
202                 disp[i+1] = disp[i] + nnzSplitA[i];
203             }
204 
205             indSplitA.resize(disp[splits]);
206             numSplitA.resize(disp[splits]);
207         }
208     };
209 
210     int buckets; // number of buckets
211     std::vector< std::vector<uint32_t> > V_inds;  // ABAB: is this big enough?
212     std::vector< BitMap > V_isthere;
213     std::vector< std::vector<bool> > V_isthereBool; // for thread safe access
214     std::vector< std::vector<OVT> > V_localy;
215     bool initialized;
216     std::vector<int32_t> indSplitA;
217     std::vector<OVT> numSplitA;
218     std::vector<uint32_t> disp;
219 };
220 
221 }
222 
223 #endif
224 
225