Home
last modified time | relevance | path

Searched refs:warp_ReduceSum (Results 1 – 25 of 102) sorted by relevance

12345

/dports/math/suitesparse-klu/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
H A DreduceWarp.cu25 T warp_ReduceSum( thread_block_tile<tile_sz> g, T val) in warp_ReduceSum() function
48 val = warp_ReduceSum<T, warpSize>( tile, val); in block_ReduceSum()
64 if (wid==0) val = warp_ReduceSum<T, warpSize>( tile, val); //Final reduce within first warp in block_ReduceSum()
/dports/math/suitesparse-btf/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
H A DreduceWarp.cu25 T warp_ReduceSum( thread_block_tile<tile_sz> g, T val) in warp_ReduceSum() function
48 val = warp_ReduceSum<T, warpSize>( tile, val); in block_ReduceSum()
64 if (wid==0) val = warp_ReduceSum<T, warpSize>( tile, val); //Final reduce within first warp in block_ReduceSum()
/dports/math/suitesparse-amd/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
H A DreduceWarp.cu25 T warp_ReduceSum( thread_block_tile<tile_sz> g, T val) in warp_ReduceSum() function
48 val = warp_ReduceSum<T, warpSize>( tile, val); in block_ReduceSum()
64 if (wid==0) val = warp_ReduceSum<T, warpSize>( tile, val); //Final reduce within first warp in block_ReduceSum()
/dports/math/suitesparse-ldl/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
H A DreduceWarp.cu25 T warp_ReduceSum( thread_block_tile<tile_sz> g, T val) in warp_ReduceSum() function
48 val = warp_ReduceSum<T, warpSize>( tile, val); in block_ReduceSum()
64 if (wid==0) val = warp_ReduceSum<T, warpSize>( tile, val); //Final reduce within first warp in block_ReduceSum()
/dports/math/suitesparse-mongoose/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
H A DreduceWarp.cu25 T warp_ReduceSum( thread_block_tile<tile_sz> g, T val) in warp_ReduceSum() function
48 val = warp_ReduceSum<T, warpSize>( tile, val); in block_ReduceSum()
64 if (wid==0) val = warp_ReduceSum<T, warpSize>( tile, val); //Final reduce within first warp in block_ReduceSum()
/dports/math/suitesparse-rbio/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
H A DreduceWarp.cu25 T warp_ReduceSum( thread_block_tile<tile_sz> g, T val) in warp_ReduceSum() function
48 val = warp_ReduceSum<T, warpSize>( tile, val); in block_ReduceSum()
64 if (wid==0) val = warp_ReduceSum<T, warpSize>( tile, val); //Final reduce within first warp in block_ReduceSum()
/dports/math/suitesparse-colamd/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
H A DreduceWarp.cu25 T warp_ReduceSum( thread_block_tile<tile_sz> g, T val) in warp_ReduceSum() function
48 val = warp_ReduceSum<T, warpSize>( tile, val); in block_ReduceSum()
64 if (wid==0) val = warp_ReduceSum<T, warpSize>( tile, val); //Final reduce within first warp in block_ReduceSum()
/dports/math/suitesparse-config/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
/dports/math/suitesparse-ccolamd/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
/dports/math/suitesparse-cholmod/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
/dports/math/suitesparse-spqr/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
/dports/math/suitesparse-slip_lu/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
/dports/math/suitesparse-camd/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
/dports/math/suitesparse-umfpack/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
/dports/math/suitesparse-csparse/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
/dports/math/suitesparse-cxsparse/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
/dports/math/suitesparse-graphblas/SuiteSparse-5.10.1/GraphBLAS/CUDA/templates/
H A DGB_jit_AxB_dot3_phase3_dndn.cu46 __inline__ __device__ T warp_ReduceSum(thread_block_tile<warp_sz> g, T val) in warp_ReduceSum() function
68 val = warp_ReduceSum< T, warpSize>(tile, val); in block_ReduceSum()
79 if (wid==0) val = warp_ReduceSum< T, warpSize>(tile,val); //Final reduce within first warp in block_ReduceSum()
163 cij = warp_ReduceSum<T_Z, 32> ( tile, cij); in AxB_dot3_phase3_dndn()
H A DreduceWarp.cu25 T warp_ReduceSum( thread_block_tile<tile_sz> g, T val) in warp_ReduceSum() function
48 val = warp_ReduceSum<T, warpSize>( tile, val); in block_ReduceSum()
64 if (wid==0) val = warp_ReduceSum<T, warpSize>( tile, val); //Final reduce within first warp in block_ReduceSum()

12345