1 // =============================================================================
2 // === GPUQREngine/Include/GPUQREngine_BucketList.hpp ==========================
3 // =============================================================================
4 //
5 // The BucketList is a principal class in the GPUQREngine.
6 //
7 // The BucketList manages a set of LLBundle structures in a doubly-linked list.
8 // During factorization, the BucketList logically manipulates the LLBundles,
9 // and depending on the configuration of each, generates GPU tasks to be added
10 // to the GPU work queue.
11 //
12 // =============================================================================
13 
14 #ifndef GPUQRENGINE_BUCKETLIST_HPP
15 #define GPUQRENGINE_BUCKETLIST_HPP
16 
17 #include "GPUQREngine_Common.hpp"
18 #include "GPUQREngine_TaskDescriptor.hpp"
19 #include "GPUQREngine_LLBundle.hpp"
20 #include "GPUQREngine_Front.hpp"
21 
22 struct TaskDescriptor;
23 class LLBundle;
24 
25 class BucketList
26 {
27 public:
28     bool useFlag;            // A flag indicating whether to use this
29     bool memory_ok;          // A flag indicating whether the object
30                              // was constructed properly
31 
32     double *gpuF;            // The gpu front pointer
33 
34     Int *head;               // The head idle tile index in the bucket
35     Int *next;               // The next idle tile index in the bucket
36     Int *prev;               // The prev idle tile index in the bucket
37     bool *triu;              // Flag indicating whether the tile index
38                              // is upper triangular
39 
40     Int *bundleCount;        // The # of bundles native to bucket index
41     Int *idleTileCount;      // The # of idle tiles in bucket index
42 
43     Front *front;
44     Int numRowTiles;         // # row tiles of F
45     Int numColTiles;         // # col tiles of F
46     Int numBuckets;          // min(numRowTiles, numColTiles)
47     Int numIdleTiles;        // Total # of idle tiles stored in buckets
48     Int PanelSize;           // Max # of rowtiles that can fit in one bundle
49     Int TileSize;            // Dimensions of tiles
50     Int Wavefront;           // Index of first non-completed colBucket
51     Int LastBucket;          // Index of last colBucket with idleTiles
52                              // or bundles
53 
54     Int ApplyGranularity;    // The desired granularity (in col tiles)
55                              // for applies
56 
57     LLBundle *Bundles;       // The bundles maintained by this scheduler
58     Int numBundles;          // Total # of bundles
59 
60     Workspace *wsMongoVT;    // The VT blocks this bucket list scheduler owns
61     double **gpuVT;          // Array of available VT slots within the VT struct
62     int VThead;              // Index of the first available entry in VTlist
63 
64     // Constructors
operator new(long unsigned int,BucketList * p)65     void *operator new(long unsigned int, BucketList* p)
66     {
67         return p;
68     }
69     BucketList(Front *f, Int minApplyGranularity);
70     ~BucketList();
71 
72     // Bundle management functions
73     void Insert(Int tile, Int bucket, bool upperTriangular = false);
74     void Remove(Int tile, Int bucket);
75     #ifdef GPUQRENGINE_PIPELINING
76     Int RemoveHead(Int bucket);
77     #endif
78 
79     // VT management functions
80     double *allocateVT();
81     double *freeVT(double *gpuVT);
82 
IsDone()83     bool IsDone()
84     {
85         // We're done if we have no bundles left with tasks.
86         return (numBundles == 0);
87     }
88 
89 //  // IsRReadyEarly experimental feature : not available in production use.
90 //  bool IsRReadyEarly()
91 //  {
92 //      // If we're doing a dense factorization, we're never done early.
93 //      if(front->isDense()) return false;
94 //
95 //      // We can't pull the R factor early if we also need the CBlock.
96 //      if(front->isStaged()) return false;
97 //
98 //      // If we're doing a sparse factorization, we're done early if we're
99 //      // past the pivot row.
100 //      return (TILESIZE * (Wavefront-1) > front->sparseMeta.fp);
101 //  }
102 
103     // Initialize takes the BucketList and adds rowtiles in positions
104     // appropriate for the staircase of the problem.
105     void Initialize
106     (
107         void
108     );
109 
110     // AdvanceBundles advances existing bundles, leaving the First tile behind
111     // and keeping a Shadow copy to support subsequent Apply tasks.
112     void AdvanceBundles
113     (
114         void
115     );
116 
117     #ifdef GPUQRENGINE_PIPELINING
118     // GrowBundles looks for row tiles (or bundles) involved in a factorization
119     // and attempts to add those bundles or row tiles to a task currently set
120     // for a series of Apply tasks. This is also known as Pipelining.
121     void GrowBundles
122     (
123         void
124     );
125     #endif
126 
127     // CreateBundles selects rowtiles up to PANELSIZE and creates a new bundle
128     // ready for factorization.
129     void CreateBundles
130     (
131         void
132     );
133 
134     // PostProcess handles any cleanup operations following a kernel invocation
135     // including merging delta tiles with the main bundle and other fixups.
136     void PostProcess
137     (
138         void
139     );
140 
141     // SkipBundleCreation determines whether we should skip creating a new
142     // bundle for the specified tile in the specified column bucket.
143     bool SkipBundleCreation
144     (
145         Int tile,
146         Int colBucket
147     );
148 
149     // IsInternal determines whether a tile is completely within the bounds
150     // of the front because if it isn't then we will need to use the special
151     // edge case kernels.
152     bool IsInternal
153     (
154         LLBundle& bundle,
155         int jLast
156     );
157 
158     // FillWorkQueue is responsible for filling the work queue with items and
159     // resolving generic TaskType entries on the bundles into concrete tasks
160     // to be performed by the GPU.
161     Int FillWorkQueue
162     (
163         TaskDescriptor *queue,  // The list of work items for the GPU
164         Int *queueIndex         // The current index into the queue
165     );
166 };
167 
168 #endif
169