1 // ============================================================================= 2 // === GPUQREngine/Include/GPUQREngine_TaskDescriptor.hpp ====================== 3 // ============================================================================= 4 // 5 // The TaskType enum is used by the GPU UberKernel to determine which subkernel 6 // functionality to invoke. 7 // 8 // The TaskDescriptor struct wraps all metadata necessary to describe to the 9 // GPU how to perform one logical task. 10 // 11 // ============================================================================= 12 13 #ifndef GPUQRENGINE_TASKDESCRIPTOR_HPP 14 #define GPUQRENGINE_TASKDESCRIPTOR_HPP 15 16 enum TaskType 17 { 18 // Dummy Method (1 total) 19 TASKTYPE_Dummy, // Used only for initializing a task 20 21 // Factorize Methods (8 total) 22 TASKTYPE_GenericFactorize, // An uncategorized Factorize. 23 TASKTYPE_FactorizeVT_3x1, // Factorize tasks are resolved 24 TASKTYPE_FactorizeVT_2x1, // when the work queue is filled 25 TASKTYPE_FactorizeVT_1x1, // based on the problem geometry, 26 TASKTYPE_FactorizeVT_3x1e, // factorization state, and whether 27 TASKTYPE_FactorizeVT_2x1e, // the factorization is at an 28 TASKTYPE_FactorizeVT_1x1e, // "edge case." 29 TASKTYPE_FactorizeVT_3x1w, 30 31 // Apply Methods (4 total) 32 TASKTYPE_GenericApply, // An uncategorized Apply. 33 TASKTYPE_Apply3, // These tasks are likewise resolved 34 TASKTYPE_Apply2, // into their concrete types as the 35 TASKTYPE_Apply1, // work queue is filled. 36 37 #ifdef GPUQRENGINE_PIPELINING 38 // ApplyFactorize Methods (6 total) 39 TASKTYPE_GenericApplyFactorize, // An uncategorized Apply-Factorize. 40 TASKTYPE_Apply3_Factorize3, // These tasks are likewise resolved 41 TASKTYPE_Apply3_Factorize2, // into their concrete types as the 42 TASKTYPE_Apply2_Factorize3, // work queue is filled. 43 TASKTYPE_Apply2_Factorize2, 44 TASKTYPE_Apply2_Factorize1, 45 #endif 46 47 // Assembly Methods (2 total) 48 TASKTYPE_SAssembly, // Input matrix assembly 49 TASKTYPE_PackAssembly // Push assembly (child to parent) 50 }; 51 52 class Scheduler; 53 54 struct TaskDescriptor 55 { 56 /* Put pointers up front to guarantee word-alignment. */ 57 double *F; // Pointer to the frontal matrix 58 double *AuxAddress[4]; // Usage Notes 59 // SAssembly: 60 // AuxAddress[0] is SEntry* 61 // PackAssembly: 62 // AuxAddress[0] is *C 63 // AuxAddress[1] is *P 64 // AuxAddress[2] is *Rjmap 65 // AuxAddress[3] is *Rimap 66 // Apply, Factorize: 67 // AuxAddress[0] is VT 68 // ApplyFactorize: 69 // AuxAddress[0:1] are VT 70 71 TaskType Type; // The TaskType enum described above 72 int fm; // # Rows in the front 73 int fn; // # Cols in the front 74 75 int extra[10]; // Usage Notes 76 // SAssembly: 77 // extra[0] is Scount (unused) 78 // extra[1] is pstart 79 // extra[2] is pend 80 // PackAssembly: 81 // extra[0] is pn 82 // extra[1] is cm (unused) 83 // extra[2] is cn (unused) 84 // extra[3] is cTileSize 85 // extra[4] is cistart 86 // extra[5] is ciend 87 // extra[6] is cjstart 88 // extra[7] is cjend 89 // Apply: 90 // extra[0:2] are rowTiles 91 // extra[4:7] are colTiles 92 // Factorize: 93 // extra[0:2] are rowTiles 94 // extra[4] is colTiles 95 // ApplyFactorize: 96 // extra[0:3] are rowTiles 97 // extra[4:7] are colTiles 98 // extra[8] is delta 99 // extra[9] is secondMin 100 // 101 }; 102 103 104 // These two methods are implemented in TaskDescriptor_flops.cpp. 105 // They are used to rearrange tasks in the WorkQueue to promote a 106 // uniform distribution of work items in the queue. 107 Int getFlops 108 ( 109 TaskDescriptor *task // Task for which to compute the flops 110 ); 111 112 Int getWeightedFlops 113 ( 114 TaskDescriptor *task // Task for which to compute the flops 115 ); 116 117 #endif 118