1 // ============================================================================= 2 // === GPUQREngine/Include/GPUQREngine_Scheduler.hpp =========================== 3 // ============================================================================= 4 // 5 // The Scheduler is a principal class in the GPUQREngine. 6 // 7 // This class manages the input set of Fronts, creates BucketLists when 8 // necessary for factorization, and contains all logic required to coordinate 9 // the factorization and assembly tasks with the GPU. 10 // 11 // ============================================================================= 12 13 #ifndef GPUQRENGINE_SCHEDULER_HPP 14 #define GPUQRENGINE_SCHEDULER_HPP 15 16 #include "GPUQREngine_Common.hpp" 17 #include "GPUQREngine_FrontState.hpp" 18 #include "GPUQREngine_TaskDescriptor.hpp" 19 #include "GPUQREngine_BucketList.hpp" 20 #include "GPUQREngine_LLBundle.hpp" 21 #include "GPUQREngine_Front.hpp" 22 23 #define SSGPU_MINAPPLYGRANULARITY 16 24 25 size_t ssgpu_maxQueueSize // return size of scheduler queue 26 ( 27 size_t gpuMemorySize // size of GPU memory, in bytes 28 ) ; 29 30 class Scheduler 31 { 32 private: 33 /* Scheduler.cpp */ 34 bool initialize(size_t gpuMemorySize); 35 36 /* Scheduler_Front.cpp */ 37 bool pullFrontData(Int f); 38 39 /* Scheduler_FillWorkQueue.cpp */ 40 void fillTasks 41 ( 42 Int f, // INPUT: Current front 43 TaskDescriptor *queue, // INPUT: CPU Task entries 44 Int *queueIndex // IN/OUT: The index of the current entry 45 ); 46 47 public: 48 bool memory_ok; // Flag for the creating function to 49 // determine whether we had enough 50 // memory to initialize the Scheduler. 51 bool cuda_ok; // Flag for the creating function to 52 // determine whether we could 53 // successfully invoke the cuda 54 // initialization calls. 55 56 Front *frontList; 57 Int numFronts; 58 Int numFrontsCompleted; 59 60 int activeSet; 61 62 BucketList *bucketLists; 63 64 Int *afPerm; // Permutation of "active" fronts 65 Int *afPinv; // Inverse permutation of "active" fronts 66 Int numActiveFronts; 67 68 Int maxQueueSize; 69 Workspace *workQueues[2]; 70 Int numTasks[2]; 71 Int minApplyGranularity; // The minimum number of tiles for which 72 // we will group apply tasks 73 74 bool *FrontDataPulled; // A set of flags indicating whether R has 75 // been pulled off the GPU. 76 cudaEvent_t *eventFrontDataReady; // A list of cudaEvents that are used to 77 // coordinate when the R factor is ready 78 // to be pulled from the GPU. 79 cudaEvent_t *eventFrontDataPulled; // A list of cudaEvents that are used to 80 // coordinate when the R factor is finally 81 // finished transfering off the GPU. 82 83 // Use multiple CUDA streams to coordinate kernel launches and asynchronous 84 // memory transfers between the host and the device: 85 // kernelStreams : Launch kernels on alternating streams 86 // H2D : Asynchronous memory transfer stream (Host-to-Device) 87 // D2H : Asynchronous memory transfer stream (Device-to-Host) 88 cudaStream_t kernelStreams[2]; 89 cudaStream_t memoryStreamH2D; 90 cudaStream_t memoryStreamD2H; 91 92 /* Scheduler.cpp */ operator new(long unsigned int,Scheduler * p)93 void *operator new(long unsigned int, Scheduler* p){ return p; } 94 Scheduler(Front *fronts, Int numFronts, size_t gpuMemorySize); 95 ~Scheduler(); 96 97 /* Scheduler_Front.cpp */ 98 void activateFront 99 ( 100 Int f // The index of the front to operate on 101 ); 102 103 bool finishFront 104 ( 105 Int f // The index of the front to operate on 106 ); 107 initializeBucketList(Int f)108 void initializeBucketList 109 ( 110 Int f // The index of the front to operate on 111 ) 112 { 113 // NOTE: tested by SPQR/Tcov, but not flagged as such in cov results 114 BucketList *dlbl = (&bucketLists[f]); 115 if(dlbl->useFlag) dlbl->Initialize(); 116 } 117 118 /* Scheduler_TransferData.cpp */ 119 void transferData 120 ( 121 void 122 ); 123 124 /* Scheduler_FillWorkQueue.cpp */ 125 void fillWorkQueue 126 ( 127 void 128 ); 129 130 /* Scheduler_LaunchKernel.cpp */ 131 void launchKernel 132 ( 133 void 134 ); 135 136 /* Scheduler_PostProcess.cpp */ 137 bool postProcess 138 ( 139 void 140 ); 141 toggleQueue(void)142 void toggleQueue 143 ( 144 void 145 ) 146 { 147 activeSet ^= 1; 148 } 149 150 /* Stats */ 151 float kernelTime; 152 Int numKernelLaunches; 153 Int gpuFlops; 154 155 #ifdef GPUQRENGINE_RENDER 156 /* Debug stuff */ 157 const char *TaskNames[21]; 158 const char *StateNames[9]; 159 int renderCount; 160 void render(); 161 #endif 162 163 #if 1 164 void debugDumpFront(Front *front); 165 #endif 166 }; 167 168 #endif 169