1 // =============================================================================
2 // PROJECT CHRONO - http://projectchrono.org
3 //
4 // Copyright (c) 2019 projectchrono.org
5 // All rights reserved.
6 //
7 // Use of this source code is governed by a BSD-style license that can be found
8 // in the LICENSE file at the top level of the distribution and at
9 // http://projectchrono.org/license-chrono.txt.
10 //
11 // =============================================================================
12 // Authors: Eric Brandt, Asher Elmquist
13 // =============================================================================
14 //
15 // =============================================================================
16
17 #include <cuda.h>
18 #include "grayscale.cuh"
19 #include <iostream>
20
21 namespace chrono {
22 namespace sensor {
23
24 // Converts 32bpp ARGB imgIn pixels to 8bpp Grayscale imgOut pixels
mean_reduce_kernel(float * bufIn,float * bufOut,int w,int h,int r)25 __global__ void mean_reduce_kernel(float* bufIn, float* bufOut, int w, int h, int r) {
26 int out_index = (blockDim.x * blockIdx.x + threadIdx.x); // index into output buffer
27
28 int out_hIndex = out_index % w;
29 int out_vIndex = out_index / w;
30
31 int d = r * 2 - 1;
32
33 if (out_index < w * h) {
34 // reset buffer to zeros
35 bufOut[2 * out_index] = 0;
36 bufOut[2 * out_index + 1] = 0;
37
38 float sum_range = 0.f;
39 float sum_intensity = 0.f;
40 int n_contributing = 0;
41 // gather up all of our values, take mean and push to output buffer
42 for (int i = 0; i < d; i++) {
43 for (int j = 0; j < d; j++) {
44 int in_index = (d * out_vIndex + i) * d * w + (d * out_hIndex + j);
45 sum_intensity += bufIn[2 * in_index + 1];
46 if (bufIn[2 * in_index + 1] > 1e-6) {
47 sum_range += bufIn[2 * in_index];
48 n_contributing++;
49 }
50 }
51 }
52 if (n_contributing > 0) {
53 bufOut[2 * out_index] = sum_range / (n_contributing);
54 bufOut[2 * out_index + 1] = sum_intensity / (d * d);
55 }
56 }
57 }
58
59 // Converts 32bpp ARGB imgIn pixels to 8bpp Grayscale imgOut pixels
strong_reduce_kernel(float * bufIn,float * bufOut,int w,int h,int r)60 __global__ void strong_reduce_kernel(float* bufIn, float* bufOut, int w, int h, int r) {
61 int out_index = (blockDim.x * blockIdx.x + threadIdx.x); // index into output buffer
62
63 int out_hIndex = out_index % w;
64 int out_vIndex = out_index / w;
65
66 int d = r * 2 - 1;
67
68 // float* raw_range = new float[d * d];
69 // float* raw_intensity = new float[d * d];
70 // int raw_id = 0;
71
72 // extract the values we will use in our return distribution
73 if (out_index < w * h) {
74 float strongest = 0;
75 float intensity_at_strongest = 0;
76
77 // perform kernel operation to find max intensity
78 float kernel_radius = .05; // 10 cm total kernel width
79
80 for (int i = 0; i < d; i++) {
81 for (int j = 0; j < d; j++) {
82 int in_index = (d * out_vIndex + i) * d * w + (d * out_hIndex + j);
83 // float range = bufIn[2 * in_index];
84 // float intensity = bufIn[2 * in_index + 1];
85
86 float local_range = bufIn[2 * in_index];
87 float local_intensity = bufIn[2 * in_index + 1];
88
89 for (int k = 0; k < d; k++) {
90 for (int l = 0; l < d; l++) {
91 int inner_in_index = (d * out_vIndex + k) * d * w + (d * out_hIndex + l);
92 float range = bufIn[2 * inner_in_index];
93 float intensity = bufIn[2 * inner_in_index + 1];
94
95 if (inner_in_index != in_index && abs(range - local_range) < kernel_radius) {
96 float weight = (kernel_radius - abs(range - local_range)) / kernel_radius;
97 local_intensity += weight * intensity;
98 // norm_val += weight;
99 }
100 }
101 }
102
103 local_intensity = local_intensity / (d * d); // calculating portion of beam here
104 if (local_intensity > intensity_at_strongest) {
105 intensity_at_strongest = local_intensity;
106 strongest = local_range;
107 }
108
109 // raw_range[raw_id] = bufIn[2 * in_index];
110 // raw_intensity[raw_id] = bufIn[2 * in_index + 1];
111
112 // if (raw_id > d * d)
113 // printf("OH NO!\n");
114 // raw_id++;
115 }
116 }
117 // printf("%s %f %f\n", "stron", strongest, intensity_at_strongest);
118 bufOut[2 * out_index] = strongest;
119 bufOut[2 * out_index + 1] = intensity_at_strongest;
120 }
121
122 // // essentially performing a linear blur to find range of max intensity
123 // for (int i = 0; i < d * d; i++) {
124 // float norm_val = 1;
125 // float local_intensity = raw_intensity[i];
126 // for (int j = 0; j < d * d; j++) {
127 // if (j != i && abs(raw_range[j] - raw_range[i]) < kernel_radius) {
128 // float weight = (kernel_radius - abs(raw_range[j] - raw_range[i])) / kernel_radius;
129 // local_intensity += weight * raw_intensity[j];
130 // norm_val += weight;
131 // }
132 // }
133 // local_intensity = local_intensity / (d * d); // calculating portion of beam here
134 // if (local_intensity > intensity_at_strongest) {
135 // intensity_at_strongest = local_intensity;
136 // strongest = raw_range[i];
137 // }
138 // }
139 //
140 // // push strongest return
141 // bufOut[2 * out_index] = strongest;
142 // bufOut[2 * out_index + 1] = intensity_at_strongest;
143 // //
144 // delete[] raw_range;
145 // delete[] raw_intensity;
146 }
147
first_reduce_kernel(float * bufIn,float * bufOut,int w,int h,int r)148 __global__ void first_reduce_kernel(float* bufIn, float* bufOut, int w, int h, int r) {
149 int out_index = (blockDim.x * blockIdx.x + threadIdx.x); // index into output buffer
150
151 int out_hIndex = out_index % w;
152 int out_vIndex = out_index / w;
153
154 int d = r * 2 - 1;
155
156 if (out_index < w * h) {
157 float shortest = 1e10;
158 float intensity_at_shortest = 0;
159
160 // perform kernel operation to find max intensity
161 float kernel_radius = .05; // 10 cm total kernel width
162
163 for (int i = 0; i < d; i++) {
164 for (int j = 0; j < d; j++) {
165 int in_index = (d * out_vIndex + i) * d * w + (d * out_hIndex + j);
166 // float range = bufIn[2 * in_index];
167 // float intensity = bufIn[2 * in_index + 1];
168
169 float local_range = bufIn[2 * in_index];
170 float local_intensity = bufIn[2 * in_index + 1];
171 float ray_intensity = local_intensity;
172
173 for (int k = 0; k < d; k++) {
174 for (int l = 0; l < d; l++) {
175 int inner_in_index = (d * out_vIndex + k) * d * w + (d * out_hIndex + l);
176 float range = bufIn[2 * inner_in_index];
177 float intensity = bufIn[2 * inner_in_index + 1];
178
179 if (inner_in_index != in_index && abs(range - local_range) < kernel_radius) {
180 float weight = (kernel_radius - abs(range - local_range)) / kernel_radius;
181 local_intensity += weight * intensity;
182 }
183 }
184 }
185
186 local_intensity = local_intensity / (d * d); // calculating portion of beam here
187 if (shortest > local_range && ray_intensity > 0) {
188 intensity_at_shortest = local_intensity;
189 shortest = local_range;
190 }
191 }
192 }
193 // printf("%s %f %f\n", "first", shortest, intensity_at_shortest);
194 bufOut[2 * out_index] = shortest;
195 bufOut[2 * out_index + 1] = intensity_at_shortest;
196 }
197 }
198
dual_reduce_kernel(float * bufIn,float * bufOut,int w,int h,int r)199 __global__ void dual_reduce_kernel(float* bufIn, float* bufOut, int w, int h, int r) {
200 int out_index = (blockDim.x * blockIdx.x + threadIdx.x); // index into output buffer
201
202 int out_hIndex = out_index % w;
203 int out_vIndex = out_index / w;
204
205 int d = r * 2 - 1;
206
207 if (out_index < w * h) {
208 float shortest = 1e10; // very very far
209 float intensity_at_shortest = 0;
210 float strongest = 0;
211 float intensity_at_strongest = 0;
212
213 // perform kernel operation to find max intensity
214 float kernel_radius = .05; // 10 cm total kernel width
215
216 for (int i = 0; i < d; i++) {
217 for (int j = 0; j < d; j++) {
218 int in_index = (d * out_vIndex + i) * d * w + (d * out_hIndex + j);
219 // float range = bufIn[2 * in_index];
220 // float intensity = bufIn[2 * in_index + 1];
221
222 float local_range = bufIn[2 * in_index];
223 float local_intensity = bufIn[2 * in_index + 1];
224 float ray_intensity = local_intensity;
225
226 for (int k = 0; k < d; k++) {
227 for (int l = 0; l < d; l++) {
228 int inner_in_index = (d * out_vIndex + k) * d * w + (d * out_hIndex + l);
229 float range = bufIn[2 * inner_in_index];
230 float intensity = bufIn[2 * inner_in_index + 1];
231
232 if (inner_in_index != in_index && abs(range - local_range) < kernel_radius) {
233 float weight = (kernel_radius - abs(range - local_range)) / kernel_radius;
234 local_intensity += weight * intensity;
235 }
236 }
237 }
238
239 local_intensity = local_intensity / (d * d); // calculating portion of beam here
240 if (shortest > local_range && ray_intensity > 0) {
241 intensity_at_shortest = local_intensity;
242 shortest = local_range;
243 }
244 if (local_intensity > intensity_at_strongest) {
245 intensity_at_strongest = local_intensity;
246 strongest = local_range;
247 }
248 }
249 }
250 // printf("%s %f %f\n", "dualS", strongest, intensity_at_strongest);
251 // printf("%s %f %f\n", "dualF", shortest, intensity_at_shortest);
252 bufOut[4 * out_index] = strongest;
253 bufOut[4 * out_index + 1] = intensity_at_strongest;
254 bufOut[4 * out_index + 2] = shortest;
255 bufOut[4 * out_index + 3] = intensity_at_shortest;
256 }
257 }
258
cuda_lidar_mean_reduce(void * bufIn,void * bufOut,int width,int height,int radius,CUstream & stream)259 void cuda_lidar_mean_reduce(void* bufIn, void* bufOut, int width, int height, int radius, CUstream& stream) {
260 int w = width / (radius * 2 - 1);
261 int h = height / (radius * 2 - 1);
262 int numPixels = w * h;
263 const int nThreads = 512;
264 int nBlocks = (numPixels + nThreads - 1) / nThreads;
265 mean_reduce_kernel<<<nBlocks, nThreads, 0, stream>>>((float*)bufIn, (float*)bufOut, w, h, radius);
266 }
267
cuda_lidar_strong_reduce(void * bufIn,void * bufOut,int width,int height,int radius,CUstream & stream)268 void cuda_lidar_strong_reduce(void* bufIn, void* bufOut, int width, int height, int radius, CUstream& stream) {
269 int w = width / (radius * 2 - 1);
270 int h = height / (radius * 2 - 1);
271 int numPixels = w * h;
272 const int nThreads = 512;
273 int nBlocks = (numPixels + nThreads - 1) / nThreads;
274 strong_reduce_kernel<<<nBlocks, nThreads, 0, stream>>>((float*)bufIn, (float*)bufOut, w, h, radius);
275 }
276
cuda_lidar_first_reduce(void * bufIn,void * bufOut,int width,int height,int radius,CUstream & stream)277 void cuda_lidar_first_reduce(void* bufIn, void* bufOut, int width, int height, int radius, CUstream& stream) {
278 int w = width / (radius * 2 - 1);
279 int h = height / (radius * 2 - 1);
280 int numPixels = w * h;
281 const int nThreads = 512;
282 int nBlocks = (numPixels + nThreads - 1) / nThreads;
283 first_reduce_kernel<<<nBlocks, nThreads, 0, stream>>>((float*)bufIn, (float*)bufOut, w, h, radius);
284 }
285
cuda_lidar_dual_reduce(void * bufIn,void * bufOut,int width,int height,int radius,CUstream & stream)286 void cuda_lidar_dual_reduce(void* bufIn, void* bufOut, int width, int height, int radius, CUstream& stream) {
287 int w = width / (radius * 2 - 1);
288 int h = height / (radius * 2 - 1);
289 int numPixels = w * h;
290 const int nThreads = 512;
291 int nBlocks = (numPixels + nThreads - 1) / nThreads;
292 dual_reduce_kernel<<<nBlocks, nThreads, 0, stream>>>((float*)bufIn, (float*)bufOut, w, h, radius);
293 }
294
295 } // namespace sensor
296 } // namespace chrono
297