1 #include <stdio.h>
2
3 // __device__ function
func()4 __device__ void func()
5 {
6 short* array0 = (short*)array;
7 float* array1 = (float*)&array0[127];
8 }
9
10 /* __global__ function */
reduction(const float * __restrict__ input,float * output,clock_t * timer)11 __global__ static void reduction(const float* __restrict__ input, float *output, clock_t *timer)
12 {
13 // __shared__ float shared[2 * blockDim.x];
14 extern __shared__ float shared[];
15
16 const int tid = threadIdx.x;
17 const int bid = blockIdx.x;
18
19 if (threadIdx.x == 0) {
20 __threadfence();
21 }
22
23 // Perform reduction to find minimum.
24 for (int d = blockDim.x; d > 0; d /= 2)
25 {
26 __syncthreads();
27 }
28 }
29
main(int argc,char ** argv)30 int main(int argc, char **argv)
31 {
32 dim3 dimBlock(8, 8, 1);
33
34 timedReduction<<<dimBlock, 256, 256, 0>>>(dinput, doutput, dtimer);
35 cudaDeviceReset();
36 }
37