1 // =============================================================================
2 // === GPUQREngine/Include/Kernel/sharedMemory.cu ==============================
3 // =============================================================================
4 
5 #ifndef GPUQRENGINE_SHAREDMEMORY_HPP
6 #define GPUQRENGINE_SHAREDMEMORY_HPP
7 
8 typedef union sharedMemory
9 {
10     struct
11     {
12         #define MAX_MCHUNK 12
13 
14         // shared memory for factorize kernel (tile case)
15         // size of A must match size of V in apply, below.
16         double A [PANELSIZE * TILESIZE + 1][TILESIZE + PADDING] ;
17         double T [TILESIZE + 1][TILESIZE + PADDING] ;
18         double Z [MAX_MCHUNK][TILESIZE+1] ;
19         double A1 [TILESIZE] ;
20         double V1 [TILESIZE] ;
21         double tau ;
22 
23         #undef MAX_MCHUNK
24     } factorize ;
25 
26     struct
27     {
28         #define MAX_COL_TILES 2
29 
30         // shared memory for block_apply kernels
31         // size of V must match size of A in factorize, above.
32         double V [PANELSIZE * TILESIZE + 1][TILESIZE + PADDING] ;
33         double C [TILESIZE][MAX_COL_TILES * TILESIZE + PADDING] ;
34 
35         #undef MAX_COL_TILES
36     } apply ;
37 
38     struct
39     {
40         int Rimap[PACKASSEMBLY_SHMEM_MAPINTS];
41         int Rjmap[PACKASSEMBLY_SHMEM_MAPINTS];
42     } packassemble ;
43 
44 } SharedMemory ;
45 
46 /* Shared memory for all kernels is defined globally and allocated here. */
47 __shared__ SharedMemory shMemory;
48 __shared__ TaskDescriptor myTask;
49 __shared__ int IsApplyFactorize;
50 
51 #endif
52