1 /****************************************************************/ 2 /* Parallel Combinatorial BLAS Library (for Graph Computations) */ 3 /* version 1.6 -------------------------------------------------*/ 4 /* date: 6/15/2017 ---------------------------------------------*/ 5 /* authors: Ariful Azad, Aydin Buluc --------------------------*/ 6 /****************************************************************/ 7 /* 8 Copyright (c) 2010-2017, The Regents of the University of California 9 10 Permission is hereby granted, free of charge, to any person obtaining a copy 11 of this software and associated documentation files (the "Software"), to deal 12 in the Software without restriction, including without limitation the rights 13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 copies of the Software, and to permit persons to whom the Software is 15 furnished to do so, subject to the following conditions: 16 17 The above copyright notice and this permission notice shall be included in 18 all copies or substantial portions of the Software. 19 20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 26 THE SOFTWARE. 27 */ 28 29 30 #ifndef _PRE_ALLOCATED_SPA_H 31 #define _PRE_ALLOCATED_SPA_H 32 #include "BitMap.h" 33 34 namespace combblas { 35 36 /** 37 * This special data structure is used for optimizing BFS iterations 38 * by providing a pre-allocated SPA data structure 39 */ 40 41 template <class OVT > // output value type 42 class PreAllocatedSPA 43 { 44 public: PreAllocatedSPA()45 PreAllocatedSPA():initialized(false) {}; // hide default constructor 46 47 template <class LMAT> PreAllocatedSPA(LMAT & A)48 PreAllocatedSPA(LMAT & A):initialized(true) // the one and only constructor 49 { 50 int64_t mA = A.getnrow(); 51 if( A.getnsplit() > 0) // multithreaded 52 { 53 int64_t perpiece = mA / A.getnsplit(); 54 for(int i=0; i<A.getnsplit(); ++i) 55 { 56 if(i != A.getnsplit()-1) 57 { 58 V_isthere.push_back(BitMap(perpiece)); 59 V_localy.push_back(std::vector<OVT>(perpiece)); 60 61 std::vector<bool> isthere(perpiece, false); 62 for(auto colit = A.begcol(i); colit != A.endcol(i); ++colit) 63 { 64 for(auto nzit = A.begnz(colit,i); nzit != A.endnz(colit,i); ++nzit) 65 { 66 size_t rowid = nzit.rowid(); 67 if(!isthere[rowid]) isthere[rowid] = true; 68 } 69 } 70 size_t maxvector = std::count(isthere.begin(), isthere.end(), true); 71 V_inds.push_back(std::vector<uint32_t>(maxvector)); 72 } 73 else 74 { 75 V_isthere.push_back(BitMap(mA - i*perpiece)); 76 V_localy.push_back(std::vector<OVT>(mA - i*perpiece)); 77 78 std::vector<bool> isthere(mA - i*perpiece, false); 79 for(auto colit = A.begcol(i); colit != A.endcol(i); ++colit) 80 { 81 for(auto nzit = A.begnz(colit,i); nzit != A.endnz(colit,i); ++nzit) 82 { 83 size_t rowid = nzit.rowid(); 84 if(!isthere[rowid]) isthere[rowid] = true; 85 } 86 } 87 size_t maxvector = std::count(isthere.begin(), isthere.end(), true); 88 V_inds.push_back(std::vector<uint32_t>(maxvector)); 89 } 90 } 91 } 92 else // single threaded 93 { 94 V_isthere.push_back(BitMap(mA)); 95 V_localy.push_back(std::vector<OVT>(mA)); 96 97 std::vector<bool> isthere(mA, false); 98 for(auto colit = A.begcol(); colit != A.endcol(); ++colit) 99 { 100 for(auto nzit = A.begnz(colit); nzit != A.endnz(colit); ++nzit) 101 { 102 size_t rowid = nzit.rowid(); 103 if(!isthere[rowid]) isthere[rowid] = true; 104 } 105 } 106 size_t maxvector = std::count(isthere.begin(), isthere.end(), true); 107 V_inds.push_back(std::vector<uint32_t>(maxvector)); 108 109 } 110 }; 111 112 // for manual splitting. just a hack. need to be fixed 113 114 template <class LMAT> PreAllocatedSPA(LMAT & A,int splits)115 PreAllocatedSPA(LMAT & A, int splits):initialized(true) 116 { 117 buckets = splits; 118 int64_t mA = A.getnrow(); 119 V_isthere.push_back(BitMap(mA)); 120 V_localy.push_back(std::vector<OVT>(mA)); 121 V_inds.push_back(std::vector<uint32_t>(mA)); // for better indexing among threads 122 123 124 125 126 127 std::vector<int32_t> nnzSplitA(buckets,0); 128 int32_t rowPerSplit = mA / splits; 129 130 131 //per thread because writing vector<bool> is not thread safe 132 for(int i=0; i<splits-1; i++) 133 V_isthereBool.push_back(std::vector<bool>(rowPerSplit)); 134 V_isthereBool.push_back(std::vector<bool>(mA - (splits-1)*rowPerSplit)); 135 136 137 //vector<bool> isthere(mA, false); 138 for(auto colit = A.begcol(); colit != A.endcol(); ++colit) 139 { 140 for(auto nzit = A.begnz(colit); nzit != A.endnz(colit); ++nzit) 141 { 142 size_t rowid = nzit.rowid(); 143 //if(!isthere[rowid]) isthere[rowid] = true; 144 size_t splitId = (rowid/rowPerSplit > splits-1) ? splits-1 : rowid/rowPerSplit; 145 nnzSplitA[splitId]++; 146 } 147 } 148 149 150 // prefix sum 151 disp.resize(splits+1); 152 disp[0] = 0; 153 for(int i=0; i<splits; i++) 154 { 155 disp[i+1] = disp[i] + nnzSplitA[i]; 156 } 157 158 indSplitA.resize(disp[splits]); 159 numSplitA.resize(disp[splits]); 160 161 162 }; 163 164 // initialize an uninitialized SPA 165 template <class LMAT> Init(LMAT & A,int splits)166 void Init(LMAT & A, int splits) // not done for DCSC matrices with A.getnsplit() 167 { 168 if(!initialized) 169 { 170 initialized = true; 171 buckets = splits; 172 int64_t mA = A.getnrow(); 173 V_isthere.push_back(BitMap(mA)); 174 V_localy.push_back(std::vector<OVT>(mA)); 175 V_inds.push_back(std::vector<uint32_t>(mA)); // for better indexing among threads 176 177 std::vector<int32_t> nnzSplitA(buckets,0); 178 int32_t rowPerSplit = mA / splits; 179 180 for(int i=0; i<splits-1; i++) 181 V_isthereBool.push_back(std::vector<bool>(rowPerSplit)); 182 V_isthereBool.push_back(std::vector<bool>(mA - (splits-1)*rowPerSplit)); 183 184 //vector<bool> isthere(mA, false); 185 for(auto colit = A.begcol(); colit != A.endcol(); ++colit) 186 { 187 for(auto nzit = A.begnz(colit); nzit != A.endnz(colit); ++nzit) 188 { 189 size_t rowid = nzit.rowid(); 190 //if(!isthere[rowid]) isthere[rowid] = true; 191 size_t splitId = (rowid/rowPerSplit > splits-1) ? splits-1 : rowid/rowPerSplit; 192 nnzSplitA[splitId]++; 193 } 194 } 195 196 197 // prefix sum 198 disp.resize(splits+1); 199 disp[0] = 0; 200 for(int i=0; i<splits; i++) 201 { 202 disp[i+1] = disp[i] + nnzSplitA[i]; 203 } 204 205 indSplitA.resize(disp[splits]); 206 numSplitA.resize(disp[splits]); 207 } 208 }; 209 210 int buckets; // number of buckets 211 std::vector< std::vector<uint32_t> > V_inds; // ABAB: is this big enough? 212 std::vector< BitMap > V_isthere; 213 std::vector< std::vector<bool> > V_isthereBool; // for thread safe access 214 std::vector< std::vector<OVT> > V_localy; 215 bool initialized; 216 std::vector<int32_t> indSplitA; 217 std::vector<OVT> numSplitA; 218 std::vector<uint32_t> disp; 219 }; 220 221 } 222 223 #endif 224 225