1 // =============================================================================
2 // === GPUQREngine/Include/GPUQREngine_Scheduler.hpp ===========================
3 // =============================================================================
4 //
5 // The Scheduler is a principal class in the GPUQREngine.
6 //
7 // This class manages the input set of Fronts, creates BucketLists when
8 // necessary for factorization, and contains all logic required to coordinate
9 // the factorization and assembly tasks with the GPU.
10 //
11 // =============================================================================
12 
13 #ifndef GPUQRENGINE_SCHEDULER_HPP
14 #define GPUQRENGINE_SCHEDULER_HPP
15 
16 #include "GPUQREngine_Common.hpp"
17 #include "GPUQREngine_FrontState.hpp"
18 #include "GPUQREngine_TaskDescriptor.hpp"
19 #include "GPUQREngine_BucketList.hpp"
20 #include "GPUQREngine_LLBundle.hpp"
21 #include "GPUQREngine_Front.hpp"
22 
23 #define SSGPU_MINAPPLYGRANULARITY 16
24 
25 size_t ssgpu_maxQueueSize       // return size of scheduler queue
26 (
27     size_t gpuMemorySize        // size of GPU memory, in bytes
28 ) ;
29 
30 class Scheduler
31 {
32 private:
33     /* Scheduler.cpp */
34     bool initialize(size_t gpuMemorySize);
35 
36     /* Scheduler_Front.cpp */
37     bool pullFrontData(Int f);
38 
39     /* Scheduler_FillWorkQueue.cpp */
40     void fillTasks
41     (
42         Int f,                          // INPUT: Current front
43         TaskDescriptor *queue,          // INPUT: CPU Task entries
44         Int *queueIndex                 // IN/OUT: The index of the current entry
45     );
46 
47 public:
48     bool memory_ok;                     // Flag for the creating function to
49                                         // determine whether we had enough
50                                         // memory to initialize the Scheduler.
51     bool cuda_ok;                       // Flag for the creating function to
52                                         // determine whether we could
53                                         // successfully invoke the cuda
54                                         // initialization calls.
55 
56     Front *frontList;
57     Int numFronts;
58     Int numFrontsCompleted;
59 
60     int activeSet;
61 
62     BucketList *bucketLists;
63 
64     Int *afPerm;                        // Permutation of "active" fronts
65     Int *afPinv;                        // Inverse permutation of "active" fronts
66     Int numActiveFronts;
67 
68     Int maxQueueSize;
69     Workspace *workQueues[2];
70     Int numTasks[2];
71     Int minApplyGranularity;            // The minimum number of tiles for which
72                                         // we will group apply tasks
73 
74     bool *FrontDataPulled;              // A set of flags indicating whether R has
75                                         // been pulled off the GPU.
76     cudaEvent_t *eventFrontDataReady;   // A list of cudaEvents that are used to
77                                         // coordinate when the R factor is ready
78                                         // to be pulled from the GPU.
79     cudaEvent_t *eventFrontDataPulled;  // A list of cudaEvents that are used to
80                                         // coordinate when the R factor is finally
81                                         // finished transfering off the GPU.
82 
83     // Use multiple CUDA streams to coordinate kernel launches and asynchronous
84     // memory transfers between the host and the device:
85     //   kernelStreams : Launch kernels on alternating streams
86     //   H2D           : Asynchronous memory transfer stream (Host-to-Device)
87     //   D2H           : Asynchronous memory transfer stream (Device-to-Host)
88     cudaStream_t kernelStreams[2];
89     cudaStream_t memoryStreamH2D;
90     cudaStream_t memoryStreamD2H;
91 
92     /* Scheduler.cpp */
operator new(long unsigned int,Scheduler * p)93     void *operator new(long unsigned int, Scheduler* p){ return p; }
94     Scheduler(Front *fronts, Int numFronts, size_t gpuMemorySize);
95     ~Scheduler();
96 
97     /* Scheduler_Front.cpp */
98     void activateFront
99     (
100         Int f                   // The index of the front to operate on
101     );
102 
103     bool finishFront
104     (
105         Int f                   // The index of the front to operate on
106     );
107 
initializeBucketList(Int f)108     void initializeBucketList
109     (
110         Int f                   // The index of the front to operate on
111     )
112     {
113         // NOTE: tested by SPQR/Tcov, but not flagged as such in cov results
114         BucketList *dlbl = (&bucketLists[f]);
115         if(dlbl->useFlag) dlbl->Initialize();
116     }
117 
118     /* Scheduler_TransferData.cpp */
119     void transferData
120     (
121         void
122     );
123 
124     /* Scheduler_FillWorkQueue.cpp */
125     void fillWorkQueue
126     (
127         void
128     );
129 
130     /* Scheduler_LaunchKernel.cpp */
131     void launchKernel
132     (
133         void
134     );
135 
136     /* Scheduler_PostProcess.cpp */
137     bool postProcess
138     (
139         void
140     );
141 
toggleQueue(void)142     void toggleQueue
143     (
144         void
145     )
146     {
147         activeSet ^= 1;
148     }
149 
150     /* Stats */
151     float kernelTime;
152     Int numKernelLaunches;
153     Int gpuFlops;
154 
155 #ifdef GPUQRENGINE_RENDER
156     /* Debug stuff */
157     const char *TaskNames[21];
158     const char *StateNames[9];
159     int renderCount;
160     void render();
161 #endif
162 
163 #if 1
164     void debugDumpFront(Front *front);
165 #endif
166 };
167 
168 #endif
169