1 // =============================================================================
2 // === GPUQREngine/Include/GPUQREngine_TaskDescriptor.hpp ======================
3 // =============================================================================
4 //
5 // The TaskType enum is used by the GPU UberKernel to determine which subkernel
6 // functionality to invoke.
7 //
8 // The TaskDescriptor struct wraps all metadata necessary to describe to the
9 // GPU how to perform one logical task.
10 //
11 // =============================================================================
12 
13 #ifndef GPUQRENGINE_TASKDESCRIPTOR_HPP
14 #define GPUQRENGINE_TASKDESCRIPTOR_HPP
15 
16 enum TaskType
17 {
18     // Dummy Method (1 total)
19     TASKTYPE_Dummy,                     // Used only for initializing a task
20 
21     // Factorize Methods (8 total)
22     TASKTYPE_GenericFactorize,          // An uncategorized Factorize.
23     TASKTYPE_FactorizeVT_3x1,           //   Factorize tasks are resolved
24     TASKTYPE_FactorizeVT_2x1,           //   when the work queue is filled
25     TASKTYPE_FactorizeVT_1x1,           //   based on the problem geometry,
26     TASKTYPE_FactorizeVT_3x1e,          //   factorization state, and whether
27     TASKTYPE_FactorizeVT_2x1e,          //   the factorization is at an
28     TASKTYPE_FactorizeVT_1x1e,          //   "edge case."
29     TASKTYPE_FactorizeVT_3x1w,
30 
31     // Apply Methods (4 total)
32     TASKTYPE_GenericApply,              // An uncategorized Apply.
33     TASKTYPE_Apply3,                    //   These tasks are likewise resolved
34     TASKTYPE_Apply2,                    //   into their concrete types as the
35     TASKTYPE_Apply1,                    //   work queue is filled.
36 
37     #ifdef GPUQRENGINE_PIPELINING
38     // ApplyFactorize Methods (6 total)
39     TASKTYPE_GenericApplyFactorize,     // An uncategorized Apply-Factorize.
40     TASKTYPE_Apply3_Factorize3,         //   These tasks are likewise resolved
41     TASKTYPE_Apply3_Factorize2,         //   into their concrete types as the
42     TASKTYPE_Apply2_Factorize3,         //   work queue is filled.
43     TASKTYPE_Apply2_Factorize2,
44     TASKTYPE_Apply2_Factorize1,
45     #endif
46 
47     // Assembly Methods (2 total)
48     TASKTYPE_SAssembly,                 // Input matrix assembly
49     TASKTYPE_PackAssembly               // Push assembly (child to parent)
50 };
51 
52 class Scheduler;
53 
54 struct TaskDescriptor
55 {
56     /* Put pointers up front to guarantee word-alignment. */
57     double *F;                          // Pointer to the frontal matrix
58     double *AuxAddress[4];              // Usage Notes
59                                         //   SAssembly:
60                                         //     AuxAddress[0]    is SEntry*
61                                         //   PackAssembly:
62                                         //     AuxAddress[0]    is *C
63                                         //     AuxAddress[1]    is *P
64                                         //     AuxAddress[2]    is *Rjmap
65                                         //     AuxAddress[3]    is *Rimap
66                                         //   Apply, Factorize:
67                                         //     AuxAddress[0]    is VT
68                                         //   ApplyFactorize:
69                                         //     AuxAddress[0:1] are VT
70 
71     TaskType Type;                      // The TaskType enum described above
72     int fm;                             // # Rows in the front
73     int fn;                             // # Cols in the front
74 
75     int extra[10];                      // Usage Notes
76                                         //   SAssembly:
77                                         //     extra[0]    is Scount    (unused)
78                                         //     extra[1]    is pstart
79                                         //     extra[2]    is pend
80                                         //   PackAssembly:
81                                         //     extra[0]    is pn
82                                         //     extra[1]    is cm        (unused)
83                                         //     extra[2]    is cn        (unused)
84                                         //     extra[3]    is cTileSize
85                                         //     extra[4]    is cistart
86                                         //     extra[5]    is ciend
87                                         //     extra[6]    is cjstart
88                                         //     extra[7]    is cjend
89                                         //   Apply:
90                                         //     extra[0:2] are rowTiles
91                                         //     extra[4:7] are colTiles
92                                         //   Factorize:
93                                         //     extra[0:2] are rowTiles
94                                         //     extra[4]    is colTiles
95                                         //   ApplyFactorize:
96                                         //     extra[0:3] are rowTiles
97                                         //     extra[4:7] are colTiles
98                                         //     extra[8]    is delta
99                                         //     extra[9]    is secondMin
100                                         //
101 };
102 
103 
104 // These two methods are implemented in TaskDescriptor_flops.cpp.
105 // They are used to rearrange tasks in the WorkQueue to promote a
106 // uniform distribution of work items in the queue.
107 Int getFlops
108 (
109     TaskDescriptor *task                // Task for which to compute the flops
110 );
111 
112 Int getWeightedFlops
113 (
114     TaskDescriptor *task                // Task for which to compute the flops
115 );
116 
117 #endif
118