1 // Copyright (c) 2018, ETH Zurich and UNC Chapel Hill.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 //       notice, this list of conditions and the following disclaimer.
9 //
10 //     * Redistributions in binary form must reproduce the above copyright
11 //       notice, this list of conditions and the following disclaimer in the
12 //       documentation and/or other materials provided with the distribution.
13 //
14 //     * Neither the name of ETH Zurich and UNC Chapel Hill nor the names of
15 //       its contributors may be used to endorse or promote products derived
16 //       from this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
22 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 // POSSIBILITY OF SUCH DAMAGE.
29 //
30 // Author: Johannes L. Schoenberger (jsch-at-demuc-dot-de)
31 
32 #ifndef COLMAP_SRC_MVS_CUDA_FLIP_H_
33 #define COLMAP_SRC_MVS_CUDA_FLIP_H_
34 
35 #include <cuda_runtime.h>
36 
37 namespace colmap {
38 namespace mvs {
39 
40 // Flip the input matrix horizontally.
41 template <typename T>
42 void CudaFlipHorizontal(const T* input, T* output, const int width,
43                         const int height, const int pitch_input,
44                         const int pitch_output);
45 
46 ////////////////////////////////////////////////////////////////////////////////
47 // Implementation
48 ////////////////////////////////////////////////////////////////////////////////
49 
50 #ifdef __CUDACC__
51 
52 // TILE_DIM_FLIP must divide by BLOCK_ROWS. Do not change these values.
53 #define TILE_DIM_FLIP 32
54 #define BLOCK_ROWS_FLIP 8
55 
56 namespace internal {
57 
58 template <typename T>
CudaFlipHorizontalKernel(T * output_data,const T * input_data,const int width,const int height,const int input_pitch,const int output_pitch)59 __global__ void CudaFlipHorizontalKernel(T* output_data, const T* input_data,
60                                          const int width, const int height,
61                                          const int input_pitch,
62                                          const int output_pitch) {
63   int x_index = blockIdx.x * TILE_DIM_FLIP + threadIdx.x;
64   const int y_index = blockIdx.y * TILE_DIM_FLIP + threadIdx.y;
65 
66   __shared__ T tile[TILE_DIM_FLIP][TILE_DIM_FLIP + 1];
67   const int tile_x = min(threadIdx.x, width - 1 - blockIdx.x * TILE_DIM_FLIP);
68   const int tile_y = min(threadIdx.y, height - 1 - blockIdx.y * TILE_DIM_FLIP);
69 
70   for (int i = 0; i < TILE_DIM_FLIP; i += BLOCK_ROWS_FLIP) {
71     const int x = min(x_index, width - 1);
72     const int y = min(y_index, height - i - 1);
73     tile[tile_y + i][tile_x] =
74         *((T*)((char*)input_data + y * input_pitch + i * input_pitch) + x);
75   }
76 
77   __syncthreads();
78 
79   x_index = width - 1 - (blockIdx.x * TILE_DIM_FLIP + threadIdx.x);
80   if (x_index < width) {
81     for (int i = 0; i < TILE_DIM_FLIP; i += BLOCK_ROWS_FLIP) {
82       if (y_index + i < height) {
83         *((T*)((char*)output_data + y_index * output_pitch + i * output_pitch) +
84           x_index) = tile[threadIdx.y + i][threadIdx.x];
85       }
86     }
87   }
88 }
89 
90 }  // namespace internal
91 
92 template <typename T>
CudaFlipHorizontal(const T * input,T * output,const int width,const int height,const int pitch_input,const int pitch_output)93 void CudaFlipHorizontal(const T* input, T* output, const int width,
94                         const int height, const int pitch_input,
95                         const int pitch_output) {
96   dim3 block_dim(TILE_DIM_FLIP, BLOCK_ROWS_FLIP, 1);
97   dim3 grid_dim;
98   grid_dim.x = (width - 1) / TILE_DIM_FLIP + 1;
99   grid_dim.y = (height - 1) / TILE_DIM_FLIP + 1;
100 
101   internal::CudaFlipHorizontalKernel<<<grid_dim, block_dim>>>(
102       output, input, width, height, pitch_input, pitch_output);
103 }
104 
105 #undef TILE_DIM_FLIP
106 #undef BLOCK_ROWS_FLIP
107 
108 #endif  // __CUDACC__
109 
110 }  // namespace mvs
111 }  // namespace colmap
112 
113 #endif  // COLMAP_SRC_MVS_CUDA_FLIP_H_
114