1 // ============================================================================= 2 // === GPUQREngine/Include/GPUQREngine_BucketList.hpp ========================== 3 // ============================================================================= 4 // 5 // The BucketList is a principal class in the GPUQREngine. 6 // 7 // The BucketList manages a set of LLBundle structures in a doubly-linked list. 8 // During factorization, the BucketList logically manipulates the LLBundles, 9 // and depending on the configuration of each, generates GPU tasks to be added 10 // to the GPU work queue. 11 // 12 // ============================================================================= 13 14 #ifndef GPUQRENGINE_BUCKETLIST_HPP 15 #define GPUQRENGINE_BUCKETLIST_HPP 16 17 #include "GPUQREngine_Common.hpp" 18 #include "GPUQREngine_TaskDescriptor.hpp" 19 #include "GPUQREngine_LLBundle.hpp" 20 #include "GPUQREngine_Front.hpp" 21 22 struct TaskDescriptor; 23 class LLBundle; 24 25 class BucketList 26 { 27 public: 28 bool useFlag; // A flag indicating whether to use this 29 bool memory_ok; // A flag indicating whether the object 30 // was constructed properly 31 32 double *gpuF; // The gpu front pointer 33 34 Int *head; // The head idle tile index in the bucket 35 Int *next; // The next idle tile index in the bucket 36 Int *prev; // The prev idle tile index in the bucket 37 bool *triu; // Flag indicating whether the tile index 38 // is upper triangular 39 40 Int *bundleCount; // The # of bundles native to bucket index 41 Int *idleTileCount; // The # of idle tiles in bucket index 42 43 Front *front; 44 Int numRowTiles; // # row tiles of F 45 Int numColTiles; // # col tiles of F 46 Int numBuckets; // min(numRowTiles, numColTiles) 47 Int numIdleTiles; // Total # of idle tiles stored in buckets 48 Int PanelSize; // Max # of rowtiles that can fit in one bundle 49 Int TileSize; // Dimensions of tiles 50 Int Wavefront; // Index of first non-completed colBucket 51 Int LastBucket; // Index of last colBucket with idleTiles 52 // or bundles 53 54 Int ApplyGranularity; // The desired granularity (in col tiles) 55 // for applies 56 57 LLBundle *Bundles; // The bundles maintained by this scheduler 58 Int numBundles; // Total # of bundles 59 60 Workspace *wsMongoVT; // The VT blocks this bucket list scheduler owns 61 double **gpuVT; // Array of available VT slots within the VT struct 62 int VThead; // Index of the first available entry in VTlist 63 64 // Constructors operator new(long unsigned int,BucketList * p)65 void *operator new(long unsigned int, BucketList* p) 66 { 67 return p; 68 } 69 BucketList(Front *f, Int minApplyGranularity); 70 ~BucketList(); 71 72 // Bundle management functions 73 void Insert(Int tile, Int bucket, bool upperTriangular = false); 74 void Remove(Int tile, Int bucket); 75 #ifdef GPUQRENGINE_PIPELINING 76 Int RemoveHead(Int bucket); 77 #endif 78 79 // VT management functions 80 double *allocateVT(); 81 double *freeVT(double *gpuVT); 82 IsDone()83 bool IsDone() 84 { 85 // We're done if we have no bundles left with tasks. 86 return (numBundles == 0); 87 } 88 89 // // IsRReadyEarly experimental feature : not available in production use. 90 // bool IsRReadyEarly() 91 // { 92 // // If we're doing a dense factorization, we're never done early. 93 // if(front->isDense()) return false; 94 // 95 // // We can't pull the R factor early if we also need the CBlock. 96 // if(front->isStaged()) return false; 97 // 98 // // If we're doing a sparse factorization, we're done early if we're 99 // // past the pivot row. 100 // return (TILESIZE * (Wavefront-1) > front->sparseMeta.fp); 101 // } 102 103 // Initialize takes the BucketList and adds rowtiles in positions 104 // appropriate for the staircase of the problem. 105 void Initialize 106 ( 107 void 108 ); 109 110 // AdvanceBundles advances existing bundles, leaving the First tile behind 111 // and keeping a Shadow copy to support subsequent Apply tasks. 112 void AdvanceBundles 113 ( 114 void 115 ); 116 117 #ifdef GPUQRENGINE_PIPELINING 118 // GrowBundles looks for row tiles (or bundles) involved in a factorization 119 // and attempts to add those bundles or row tiles to a task currently set 120 // for a series of Apply tasks. This is also known as Pipelining. 121 void GrowBundles 122 ( 123 void 124 ); 125 #endif 126 127 // CreateBundles selects rowtiles up to PANELSIZE and creates a new bundle 128 // ready for factorization. 129 void CreateBundles 130 ( 131 void 132 ); 133 134 // PostProcess handles any cleanup operations following a kernel invocation 135 // including merging delta tiles with the main bundle and other fixups. 136 void PostProcess 137 ( 138 void 139 ); 140 141 // SkipBundleCreation determines whether we should skip creating a new 142 // bundle for the specified tile in the specified column bucket. 143 bool SkipBundleCreation 144 ( 145 Int tile, 146 Int colBucket 147 ); 148 149 // IsInternal determines whether a tile is completely within the bounds 150 // of the front because if it isn't then we will need to use the special 151 // edge case kernels. 152 bool IsInternal 153 ( 154 LLBundle& bundle, 155 int jLast 156 ); 157 158 // FillWorkQueue is responsible for filling the work queue with items and 159 // resolving generic TaskType entries on the bundles into concrete tasks 160 // to be performed by the GPU. 161 Int FillWorkQueue 162 ( 163 TaskDescriptor *queue, // The list of work items for the GPU 164 Int *queueIndex // The current index into the queue 165 ); 166 }; 167 168 #endif 169