1 // =============================================================================
2 // PROJECT CHRONO - http://projectchrono.org
3 //
4 // Copyright (c) 2019 projectchrono.org
5 // All rights reserved.
6 //
7 // Use of this source code is governed by a BSD-style license that can be found
8 // in the LICENSE file at the top level of the distribution and at
9 // http://projectchrono.org/license-chrono.txt.
10 //
11 // =============================================================================
12 // Authors: Eric Brandt, Asher Elmquist
13 // =============================================================================
14 //
15 // =============================================================================
16 
17 #include <cuda.h>
18 #include "grayscale.cuh"
19 #include <iostream>
20 
21 namespace chrono {
22 namespace sensor {
23 
24 // Converts 32bpp ARGB imgIn pixels to 8bpp Grayscale imgOut pixels
mean_reduce_kernel(float * bufIn,float * bufOut,int w,int h,int r)25 __global__ void mean_reduce_kernel(float* bufIn, float* bufOut, int w, int h, int r) {
26     int out_index = (blockDim.x * blockIdx.x + threadIdx.x);  // index into output buffer
27 
28     int out_hIndex = out_index % w;
29     int out_vIndex = out_index / w;
30 
31     int d = r * 2 - 1;
32 
33     if (out_index < w * h) {
34         // reset buffer to zeros
35         bufOut[2 * out_index] = 0;
36         bufOut[2 * out_index + 1] = 0;
37 
38         float sum_range = 0.f;
39         float sum_intensity = 0.f;
40         int n_contributing = 0;
41         // gather up all of our values, take mean and push to output buffer
42         for (int i = 0; i < d; i++) {
43             for (int j = 0; j < d; j++) {
44                 int in_index = (d * out_vIndex + i) * d * w + (d * out_hIndex + j);
45                 sum_intensity += bufIn[2 * in_index + 1];
46                 if (bufIn[2 * in_index + 1] > 1e-6) {
47                     sum_range += bufIn[2 * in_index];
48                     n_contributing++;
49                 }
50             }
51         }
52         if (n_contributing > 0) {
53             bufOut[2 * out_index] = sum_range / (n_contributing);
54             bufOut[2 * out_index + 1] = sum_intensity / (d * d);
55         }
56     }
57 }
58 
59 // Converts 32bpp ARGB imgIn pixels to 8bpp Grayscale imgOut pixels
strong_reduce_kernel(float * bufIn,float * bufOut,int w,int h,int r)60 __global__ void strong_reduce_kernel(float* bufIn, float* bufOut, int w, int h, int r) {
61     int out_index = (blockDim.x * blockIdx.x + threadIdx.x);  // index into output buffer
62 
63     int out_hIndex = out_index % w;
64     int out_vIndex = out_index / w;
65 
66     int d = r * 2 - 1;
67 
68     // float* raw_range = new float[d * d];
69     // float* raw_intensity = new float[d * d];
70     // int raw_id = 0;
71 
72     // extract the values we will use in our return distribution
73     if (out_index < w * h) {
74         float strongest = 0;
75         float intensity_at_strongest = 0;
76 
77         // perform kernel operation to find max intensity
78         float kernel_radius = .05;  // 10 cm total kernel width
79 
80         for (int i = 0; i < d; i++) {
81             for (int j = 0; j < d; j++) {
82                 int in_index = (d * out_vIndex + i) * d * w + (d * out_hIndex + j);
83                 // float range = bufIn[2 * in_index];
84                 // float intensity = bufIn[2 * in_index + 1];
85 
86                 float local_range = bufIn[2 * in_index];
87                 float local_intensity = bufIn[2 * in_index + 1];
88 
89                 for (int k = 0; k < d; k++) {
90                     for (int l = 0; l < d; l++) {
91                         int inner_in_index = (d * out_vIndex + k) * d * w + (d * out_hIndex + l);
92                         float range = bufIn[2 * inner_in_index];
93                         float intensity = bufIn[2 * inner_in_index + 1];
94 
95                         if (inner_in_index != in_index && abs(range - local_range) < kernel_radius) {
96                             float weight = (kernel_radius - abs(range - local_range)) / kernel_radius;
97                             local_intensity += weight * intensity;
98                             // norm_val += weight;
99                         }
100                     }
101                 }
102 
103                 local_intensity = local_intensity / (d * d);  // calculating portion of beam here
104                 if (local_intensity > intensity_at_strongest) {
105                     intensity_at_strongest = local_intensity;
106                     strongest = local_range;
107                 }
108 
109                 // raw_range[raw_id] = bufIn[2 * in_index];
110                 // raw_intensity[raw_id] = bufIn[2 * in_index + 1];
111 
112                 // if (raw_id > d * d)
113                 //     printf("OH NO!\n");
114                 // raw_id++;
115             }
116         }
117         //        printf("%s %f %f\n", "stron", strongest, intensity_at_strongest);
118         bufOut[2 * out_index] = strongest;
119         bufOut[2 * out_index + 1] = intensity_at_strongest;
120     }
121 
122     // // essentially performing a linear blur to find range of max intensity
123     // for (int i = 0; i < d * d; i++) {
124     //     float norm_val = 1;
125     //     float local_intensity = raw_intensity[i];
126     //     for (int j = 0; j < d * d; j++) {
127     //         if (j != i && abs(raw_range[j] - raw_range[i]) < kernel_radius) {
128     //             float weight = (kernel_radius - abs(raw_range[j] - raw_range[i])) / kernel_radius;
129     //             local_intensity += weight * raw_intensity[j];
130     //             norm_val += weight;
131     //         }
132     //     }
133     //     local_intensity = local_intensity / (d * d);  // calculating portion of beam here
134     //     if (local_intensity > intensity_at_strongest) {
135     //         intensity_at_strongest = local_intensity;
136     //         strongest = raw_range[i];
137     //     }
138     // }
139     //
140     // // push strongest return
141     // bufOut[2 * out_index] = strongest;
142     // bufOut[2 * out_index + 1] = intensity_at_strongest;
143     // //
144     // delete[] raw_range;
145     // delete[] raw_intensity;
146 }
147 
first_reduce_kernel(float * bufIn,float * bufOut,int w,int h,int r)148 __global__ void first_reduce_kernel(float* bufIn, float* bufOut, int w, int h, int r) {
149     int out_index = (blockDim.x * blockIdx.x + threadIdx.x);  // index into output buffer
150 
151     int out_hIndex = out_index % w;
152     int out_vIndex = out_index / w;
153 
154     int d = r * 2 - 1;
155 
156     if (out_index < w * h) {
157         float shortest = 1e10;
158         float intensity_at_shortest = 0;
159 
160         // perform kernel operation to find max intensity
161         float kernel_radius = .05;  // 10 cm total kernel width
162 
163         for (int i = 0; i < d; i++) {
164             for (int j = 0; j < d; j++) {
165                 int in_index = (d * out_vIndex + i) * d * w + (d * out_hIndex + j);
166                 // float range = bufIn[2 * in_index];
167                 // float intensity = bufIn[2 * in_index + 1];
168 
169                 float local_range = bufIn[2 * in_index];
170                 float local_intensity = bufIn[2 * in_index + 1];
171                 float ray_intensity = local_intensity;
172 
173                 for (int k = 0; k < d; k++) {
174                     for (int l = 0; l < d; l++) {
175                         int inner_in_index = (d * out_vIndex + k) * d * w + (d * out_hIndex + l);
176                         float range = bufIn[2 * inner_in_index];
177                         float intensity = bufIn[2 * inner_in_index + 1];
178 
179                         if (inner_in_index != in_index && abs(range - local_range) < kernel_radius) {
180                             float weight = (kernel_radius - abs(range - local_range)) / kernel_radius;
181                             local_intensity += weight * intensity;
182                         }
183                     }
184                 }
185 
186                 local_intensity = local_intensity / (d * d);  // calculating portion of beam here
187                 if (shortest > local_range && ray_intensity > 0) {
188                     intensity_at_shortest = local_intensity;
189                     shortest = local_range;
190                 }
191             }
192         }
193         //        printf("%s %f %f\n", "first", shortest, intensity_at_shortest);
194         bufOut[2 * out_index] = shortest;
195         bufOut[2 * out_index + 1] = intensity_at_shortest;
196     }
197 }
198 
dual_reduce_kernel(float * bufIn,float * bufOut,int w,int h,int r)199 __global__ void dual_reduce_kernel(float* bufIn, float* bufOut, int w, int h, int r) {
200     int out_index = (blockDim.x * blockIdx.x + threadIdx.x);  // index into output buffer
201 
202     int out_hIndex = out_index % w;
203     int out_vIndex = out_index / w;
204 
205     int d = r * 2 - 1;
206 
207     if (out_index < w * h) {
208         float shortest = 1e10;  // very very far
209         float intensity_at_shortest = 0;
210         float strongest = 0;
211         float intensity_at_strongest = 0;
212 
213         // perform kernel operation to find max intensity
214         float kernel_radius = .05;  // 10 cm total kernel width
215 
216         for (int i = 0; i < d; i++) {
217             for (int j = 0; j < d; j++) {
218                 int in_index = (d * out_vIndex + i) * d * w + (d * out_hIndex + j);
219                 // float range = bufIn[2 * in_index];
220                 // float intensity = bufIn[2 * in_index + 1];
221 
222                 float local_range = bufIn[2 * in_index];
223                 float local_intensity = bufIn[2 * in_index + 1];
224                 float ray_intensity = local_intensity;
225 
226                 for (int k = 0; k < d; k++) {
227                     for (int l = 0; l < d; l++) {
228                         int inner_in_index = (d * out_vIndex + k) * d * w + (d * out_hIndex + l);
229                         float range = bufIn[2 * inner_in_index];
230                         float intensity = bufIn[2 * inner_in_index + 1];
231 
232                         if (inner_in_index != in_index && abs(range - local_range) < kernel_radius) {
233                             float weight = (kernel_radius - abs(range - local_range)) / kernel_radius;
234                             local_intensity += weight * intensity;
235                         }
236                     }
237                 }
238 
239                 local_intensity = local_intensity / (d * d);  // calculating portion of beam here
240                 if (shortest > local_range && ray_intensity > 0) {
241                     intensity_at_shortest = local_intensity;
242                     shortest = local_range;
243                 }
244                 if (local_intensity > intensity_at_strongest) {
245                     intensity_at_strongest = local_intensity;
246                     strongest = local_range;
247                 }
248             }
249         }
250         //        printf("%s %f %f\n", "dualS", strongest, intensity_at_strongest);
251         //        printf("%s %f %f\n", "dualF", shortest, intensity_at_shortest);
252         bufOut[4 * out_index] = strongest;
253         bufOut[4 * out_index + 1] = intensity_at_strongest;
254         bufOut[4 * out_index + 2] = shortest;
255         bufOut[4 * out_index + 3] = intensity_at_shortest;
256     }
257 }
258 
cuda_lidar_mean_reduce(void * bufIn,void * bufOut,int width,int height,int radius,CUstream & stream)259 void cuda_lidar_mean_reduce(void* bufIn, void* bufOut, int width, int height, int radius, CUstream& stream) {
260     int w = width / (radius * 2 - 1);
261     int h = height / (radius * 2 - 1);
262     int numPixels = w * h;
263     const int nThreads = 512;
264     int nBlocks = (numPixels + nThreads - 1) / nThreads;
265     mean_reduce_kernel<<<nBlocks, nThreads, 0, stream>>>((float*)bufIn, (float*)bufOut, w, h, radius);
266 }
267 
cuda_lidar_strong_reduce(void * bufIn,void * bufOut,int width,int height,int radius,CUstream & stream)268 void cuda_lidar_strong_reduce(void* bufIn, void* bufOut, int width, int height, int radius, CUstream& stream) {
269     int w = width / (radius * 2 - 1);
270     int h = height / (radius * 2 - 1);
271     int numPixels = w * h;
272     const int nThreads = 512;
273     int nBlocks = (numPixels + nThreads - 1) / nThreads;
274     strong_reduce_kernel<<<nBlocks, nThreads, 0, stream>>>((float*)bufIn, (float*)bufOut, w, h, radius);
275 }
276 
cuda_lidar_first_reduce(void * bufIn,void * bufOut,int width,int height,int radius,CUstream & stream)277 void cuda_lidar_first_reduce(void* bufIn, void* bufOut, int width, int height, int radius, CUstream& stream) {
278     int w = width / (radius * 2 - 1);
279     int h = height / (radius * 2 - 1);
280     int numPixels = w * h;
281     const int nThreads = 512;
282     int nBlocks = (numPixels + nThreads - 1) / nThreads;
283     first_reduce_kernel<<<nBlocks, nThreads, 0, stream>>>((float*)bufIn, (float*)bufOut, w, h, radius);
284 }
285 
cuda_lidar_dual_reduce(void * bufIn,void * bufOut,int width,int height,int radius,CUstream & stream)286 void cuda_lidar_dual_reduce(void* bufIn, void* bufOut, int width, int height, int radius, CUstream& stream) {
287     int w = width / (radius * 2 - 1);
288     int h = height / (radius * 2 - 1);
289     int numPixels = w * h;
290     const int nThreads = 512;
291     int nBlocks = (numPixels + nThreads - 1) / nThreads;
292     dual_reduce_kernel<<<nBlocks, nThreads, 0, stream>>>((float*)bufIn, (float*)bufOut, w, h, radius);
293 }
294 
295 }  // namespace sensor
296 }  // namespace chrono
297