1 /******************************************************************************
2 * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 * * Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * * Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 * * Neither the name of the NVIDIA CORPORATION nor the
12 * names of its contributors may be used to endorse or promote products
13 * derived from this software without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *
26 ******************************************************************************/
27
28 #include <stdio.h>
29 #include <map>
30 #include <vector>
31 #include <algorithm>
32 #include <cstdio>
33 #include <fstream>
34
35 #include "histogram/histogram_gmem_atomics.h"
36 #include "histogram/histogram_smem_atomics.h"
37 #include "histogram/histogram_cub.h"
38
39 #include <cub/util_allocator.cuh>
40 #include <test/test_util.h>
41
42 using namespace cub;
43
44 //---------------------------------------------------------------------
45 // Globals, constants, and type declarations
46 //---------------------------------------------------------------------
47
48 // Ensure printing of CUDA runtime errors to console
49 #define CUB_STDERR
50
51 bool g_verbose = false; // Whether to display input/output to console
52 bool g_report = false; // Whether to display a full report in CSV format
53 CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory
54
55 struct less_than_value
56 {
operator ()less_than_value57 inline bool operator()(
58 const std::pair<std::string, double> &a,
59 const std::pair<std::string, double> &b)
60 {
61 return a.second < b.second;
62 }
63 };
64
65
66 //---------------------------------------------------------------------
67 // Targa (.tga) image file parsing
68 //---------------------------------------------------------------------
69
70 /**
71 * TGA image header info
72 */
73 struct TgaHeader
74 {
75 char idlength;
76 char colormaptype;
77 char datatypecode;
78 short colormaporigin;
79 short colormaplength;
80 char colormapdepth;
81 short x_origin;
82 short y_origin;
83 short width;
84 short height;
85 char bitsperpixel;
86 char imagedescriptor;
87
ParseTgaHeader88 void Parse (FILE *fptr)
89 {
90 idlength = fgetc(fptr);
91 colormaptype = fgetc(fptr);
92 datatypecode = fgetc(fptr);
93 fread(&colormaporigin, 2, 1, fptr);
94 fread(&colormaplength, 2, 1, fptr);
95 colormapdepth = fgetc(fptr);
96 fread(&x_origin, 2, 1, fptr);
97 fread(&y_origin, 2, 1, fptr);
98 fread(&width, 2, 1, fptr);
99 fread(&height, 2, 1, fptr);
100 bitsperpixel = fgetc(fptr);
101 imagedescriptor = fgetc(fptr);
102 }
103
DisplayTgaHeader104 void Display (FILE *fptr)
105 {
106 fprintf(fptr, "ID length: %d\n", idlength);
107 fprintf(fptr, "Color map type: %d\n", colormaptype);
108 fprintf(fptr, "Image type: %d\n", datatypecode);
109 fprintf(fptr, "Color map offset: %d\n", colormaporigin);
110 fprintf(fptr, "Color map length: %d\n", colormaplength);
111 fprintf(fptr, "Color map depth: %d\n", colormapdepth);
112 fprintf(fptr, "X origin: %d\n", x_origin);
113 fprintf(fptr, "Y origin: %d\n", y_origin);
114 fprintf(fptr, "Width: %d\n", width);
115 fprintf(fptr, "Height: %d\n", height);
116 fprintf(fptr, "Bits per pixel: %d\n", bitsperpixel);
117 fprintf(fptr, "Descriptor: %d\n", imagedescriptor);
118 }
119 };
120
121
122 /**
123 * Decode image byte data into pixel
124 */
ParseTgaPixel(uchar4 & pixel,unsigned char * tga_pixel,int bytes)125 void ParseTgaPixel(uchar4 &pixel, unsigned char *tga_pixel, int bytes)
126 {
127 if (bytes == 4)
128 {
129 pixel.x = tga_pixel[2];
130 pixel.y = tga_pixel[1];
131 pixel.z = tga_pixel[0];
132 pixel.w = tga_pixel[3];
133 }
134 else if (bytes == 3)
135 {
136 pixel.x = tga_pixel[2];
137 pixel.y = tga_pixel[1];
138 pixel.z = tga_pixel[0];
139 pixel.w = 0;
140 }
141 else if (bytes == 2)
142 {
143 pixel.x = (tga_pixel[1] & 0x7c) << 1;
144 pixel.y = ((tga_pixel[1] & 0x03) << 6) | ((tga_pixel[0] & 0xe0) >> 2);
145 pixel.z = (tga_pixel[0] & 0x1f) << 3;
146 pixel.w = (tga_pixel[1] & 0x80);
147 }
148 }
149
150
151 /**
152 * Reads a .tga image file
153 */
ReadTga(uchar4 * & pixels,int & width,int & height,const char * filename)154 void ReadTga(uchar4* &pixels, int &width, int &height, const char *filename)
155 {
156 // Open the file
157 FILE *fptr;
158 if ((fptr = fopen(filename, "rb")) == NULL)
159 {
160 fprintf(stderr, "File open failed\n");
161 exit(-1);
162 }
163
164 // Parse header
165 TgaHeader header;
166 header.Parse(fptr);
167 // header.Display(stdout);
168 width = header.width;
169 height = header.height;
170
171 // Verify compatibility
172 if (header.datatypecode != 2 && header.datatypecode != 10)
173 {
174 fprintf(stderr, "Can only handle image type 2 and 10\n");
175 exit(-1);
176 }
177 if (header.bitsperpixel != 16 && header.bitsperpixel != 24 && header.bitsperpixel != 32)
178 {
179 fprintf(stderr, "Can only handle pixel depths of 16, 24, and 32\n");
180 exit(-1);
181 }
182 if (header.colormaptype != 0 && header.colormaptype != 1)
183 {
184 fprintf(stderr, "Can only handle color map types of 0 and 1\n");
185 exit(-1);
186 }
187
188 // Skip unnecessary header info
189 int skip_bytes = header.idlength + (header.colormaptype * header.colormaplength);
190 fseek(fptr, skip_bytes, SEEK_CUR);
191
192 // Read the image
193 int pixel_bytes = header.bitsperpixel / 8;
194
195 // Allocate and initialize pixel data
196 size_t image_bytes = width * height * sizeof(uchar4);
197 if ((pixels == NULL) && ((pixels = (uchar4*) malloc(image_bytes)) == NULL))
198 {
199 fprintf(stderr, "malloc of image failed\n");
200 exit(-1);
201 }
202 memset(pixels, 0, image_bytes);
203
204 // Parse pixels
205 unsigned char tga_pixel[5];
206 int current_pixel = 0;
207 while (current_pixel < header.width * header.height)
208 {
209 if (header.datatypecode == 2)
210 {
211 // Uncompressed
212 if (fread(tga_pixel, 1, pixel_bytes, fptr) != pixel_bytes)
213 {
214 fprintf(stderr, "Unexpected end of file at pixel %d (uncompressed)\n", current_pixel);
215 exit(-1);
216 }
217 ParseTgaPixel(pixels[current_pixel], tga_pixel, pixel_bytes);
218 current_pixel++;
219 }
220 else if (header.datatypecode == 10)
221 {
222 // Compressed
223 if (fread(tga_pixel, 1, pixel_bytes + 1, fptr) != pixel_bytes + 1)
224 {
225 fprintf(stderr, "Unexpected end of file at pixel %d (compressed)\n", current_pixel);
226 exit(-1);
227 }
228 int run_length = tga_pixel[0] & 0x7f;
229 ParseTgaPixel(pixels[current_pixel], &(tga_pixel[1]), pixel_bytes);
230 current_pixel++;
231
232 if (tga_pixel[0] & 0x80)
233 {
234 // RLE chunk
235 for (int i = 0; i < run_length; i++)
236 {
237 ParseTgaPixel(pixels[current_pixel], &(tga_pixel[1]), pixel_bytes);
238 current_pixel++;
239 }
240 }
241 else
242 {
243 // Normal chunk
244 for (int i = 0; i < run_length; i++)
245 {
246 if (fread(tga_pixel, 1, pixel_bytes, fptr) != pixel_bytes)
247 {
248 fprintf(stderr, "Unexpected end of file at pixel %d (normal)\n", current_pixel);
249 exit(-1);
250 }
251 ParseTgaPixel(pixels[current_pixel], tga_pixel, pixel_bytes);
252 current_pixel++;
253 }
254 }
255 }
256 }
257
258 // Close file
259 fclose(fptr);
260 }
261
262
263
264 //---------------------------------------------------------------------
265 // Random image generation
266 //---------------------------------------------------------------------
267
268 /**
269 * Generate a random image with specified entropy
270 */
GenerateRandomImage(uchar4 * & pixels,int width,int height,int entropy_reduction)271 void GenerateRandomImage(uchar4* &pixels, int width, int height, int entropy_reduction)
272 {
273 int num_pixels = width * height;
274 size_t image_bytes = num_pixels * sizeof(uchar4);
275 if ((pixels == NULL) && ((pixels = (uchar4*) malloc(image_bytes)) == NULL))
276 {
277 fprintf(stderr, "malloc of image failed\n");
278 exit(-1);
279 }
280
281 for (int i = 0; i < num_pixels; ++i)
282 {
283 RandomBits(pixels[i].x, entropy_reduction);
284 RandomBits(pixels[i].y, entropy_reduction);
285 RandomBits(pixels[i].z, entropy_reduction);
286 RandomBits(pixels[i].w, entropy_reduction);
287 }
288 }
289
290
291
292 //---------------------------------------------------------------------
293 // Histogram verification
294 //---------------------------------------------------------------------
295
296 // Decode float4 pixel into bins
297 template <int NUM_BINS, int ACTIVE_CHANNELS>
DecodePixelGold(float4 pixel,unsigned int (& bins)[ACTIVE_CHANNELS])298 void DecodePixelGold(float4 pixel, unsigned int (&bins)[ACTIVE_CHANNELS])
299 {
300 float* samples = reinterpret_cast<float*>(&pixel);
301
302 for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL)
303 bins[CHANNEL] = (unsigned int) (samples[CHANNEL] * float(NUM_BINS));
304 }
305
306 // Decode uchar4 pixel into bins
307 template <int NUM_BINS, int ACTIVE_CHANNELS>
DecodePixelGold(uchar4 pixel,unsigned int (& bins)[ACTIVE_CHANNELS])308 void DecodePixelGold(uchar4 pixel, unsigned int (&bins)[ACTIVE_CHANNELS])
309 {
310 unsigned char* samples = reinterpret_cast<unsigned char*>(&pixel);
311
312 for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL)
313 bins[CHANNEL] = (unsigned int) (samples[CHANNEL]);
314 }
315
316 // Decode uchar1 pixel into bins
317 template <int NUM_BINS, int ACTIVE_CHANNELS>
DecodePixelGold(uchar1 pixel,unsigned int (& bins)[ACTIVE_CHANNELS])318 void DecodePixelGold(uchar1 pixel, unsigned int (&bins)[ACTIVE_CHANNELS])
319 {
320 bins[0] = (unsigned int) pixel.x;
321 }
322
323
324 // Compute reference histogram. Specialized for uchar4
325 template <
326 int ACTIVE_CHANNELS,
327 int NUM_BINS,
328 typename PixelType>
HistogramGold(PixelType * image,int width,int height,unsigned int * hist)329 void HistogramGold(PixelType *image, int width, int height, unsigned int* hist)
330 {
331 memset(hist, 0, ACTIVE_CHANNELS * NUM_BINS * sizeof(unsigned int));
332
333 for (int i = 0; i < width; i++)
334 {
335 for (int j = 0; j < height; j++)
336 {
337 PixelType pixel = image[i + j * width];
338
339 unsigned int bins[ACTIVE_CHANNELS];
340 DecodePixelGold<NUM_BINS>(pixel, bins);
341
342 for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL)
343 {
344 hist[(NUM_BINS * CHANNEL) + bins[CHANNEL]]++;
345 }
346 }
347 }
348 }
349
350
351 //---------------------------------------------------------------------
352 // Test execution
353 //---------------------------------------------------------------------
354
355 /**
356 * Run a specific histogram implementation
357 */
358 template <
359 int ACTIVE_CHANNELS,
360 int NUM_BINS,
361 typename PixelType>
RunTest(std::vector<std::pair<std::string,double>> & timings,PixelType * d_pixels,const int width,const int height,unsigned int * d_hist,unsigned int * h_hist,int timing_iterations,const char * long_name,const char * short_name,double (* f)(PixelType *,int,int,unsigned int *,bool))362 void RunTest(
363 std::vector<std::pair<std::string, double> >& timings,
364 PixelType* d_pixels,
365 const int width,
366 const int height,
367 unsigned int * d_hist,
368 unsigned int * h_hist,
369 int timing_iterations,
370 const char * long_name,
371 const char * short_name,
372 double (*f)(PixelType*, int, int, unsigned int*, bool))
373 {
374 if (!g_report) printf("%s ", long_name); fflush(stdout);
375
376 // Run single test to verify (and code cache)
377 (*f)(d_pixels, width, height, d_hist, !g_report);
378
379 int compare = CompareDeviceResults(h_hist, d_hist, ACTIVE_CHANNELS * NUM_BINS, true, g_verbose);
380 if (!g_report) printf("\t%s\n", compare ? "FAIL" : "PASS"); fflush(stdout);
381
382 double elapsed_ms = 0;
383 for (int i = 0; i < timing_iterations; i++)
384 {
385 elapsed_ms += (*f)(d_pixels, width, height, d_hist, false);
386 }
387 double avg_us = (elapsed_ms / timing_iterations) * 1000; // average in us
388 timings.push_back(std::pair<std::string, double>(short_name, avg_us));
389
390 if (!g_report)
391 {
392 printf("Avg time %.3f us (%d iterations)\n", avg_us, timing_iterations); fflush(stdout);
393 }
394 else
395 {
396 printf("%.3f, ", avg_us); fflush(stdout);
397 }
398
399 AssertEquals(0, compare);
400 }
401
402
403 /**
404 * Evaluate corpus of histogram implementations
405 */
406 template <
407 int NUM_CHANNELS,
408 int ACTIVE_CHANNELS,
409 int NUM_BINS,
410 typename PixelType>
TestMethods(PixelType * h_pixels,int height,int width,int timing_iterations,double bandwidth_GBs)411 void TestMethods(
412 PixelType* h_pixels,
413 int height,
414 int width,
415 int timing_iterations,
416 double bandwidth_GBs)
417 {
418 // Copy data to gpu
419 PixelType* d_pixels;
420 size_t pixel_bytes = width * height * sizeof(PixelType);
421 CubDebugExit(g_allocator.DeviceAllocate((void**) &d_pixels, pixel_bytes));
422 CubDebugExit(cudaMemcpy(d_pixels, h_pixels, pixel_bytes, cudaMemcpyHostToDevice));
423
424 if (g_report) printf("%.3f, ", double(pixel_bytes) / bandwidth_GBs / 1000);
425
426 // Allocate results arrays on cpu/gpu
427 unsigned int *h_hist;
428 unsigned int *d_hist;
429 size_t histogram_bytes = NUM_BINS * ACTIVE_CHANNELS * sizeof(unsigned int);
430 h_hist = (unsigned int *) malloc(histogram_bytes);
431 g_allocator.DeviceAllocate((void **) &d_hist, histogram_bytes);
432
433 // Compute reference cpu histogram
434 HistogramGold<ACTIVE_CHANNELS, NUM_BINS>(h_pixels, width, height, h_hist);
435
436 // Store timings
437 std::vector<std::pair<std::string, double> > timings;
438
439 // Run experiments
440 RunTest<ACTIVE_CHANNELS, NUM_BINS>(timings, d_pixels, width, height, d_hist, h_hist, timing_iterations,
441 "CUB", "CUB", run_cub_histogram<NUM_CHANNELS, ACTIVE_CHANNELS, NUM_BINS, PixelType>);
442 RunTest<ACTIVE_CHANNELS, NUM_BINS>(timings, d_pixels, width, height, d_hist, h_hist, timing_iterations,
443 "Shared memory atomics", "smem atomics", run_smem_atomics<ACTIVE_CHANNELS, NUM_BINS, PixelType>);
444 RunTest<ACTIVE_CHANNELS, NUM_BINS>(timings, d_pixels, width, height, d_hist, h_hist, timing_iterations,
445 "Global memory atomics", "gmem atomics", run_gmem_atomics<ACTIVE_CHANNELS, NUM_BINS, PixelType>);
446
447 // Report timings
448 if (!g_report)
449 {
450 std::sort(timings.begin(), timings.end(), less_than_value());
451 printf("Timings (us):\n");
452 for (int i = 0; i < timings.size(); i++)
453 {
454 double bandwidth = height * width * sizeof(PixelType) / timings[i].second / 1000;
455 printf("\t %.3f %s (%.3f GB/s, %.3f%% peak)\n", timings[i].second, timings[i].first.c_str(), bandwidth, bandwidth / bandwidth_GBs * 100);
456 }
457 printf("\n");
458 }
459
460 // Free data
461 CubDebugExit(g_allocator.DeviceFree(d_pixels));
462 CubDebugExit(g_allocator.DeviceFree(d_hist));
463 free(h_hist);
464 }
465
466
467 /**
468 * Test different problem genres
469 */
TestGenres(uchar4 * uchar4_pixels,int height,int width,int timing_iterations,double bandwidth_GBs)470 void TestGenres(
471 uchar4* uchar4_pixels,
472 int height,
473 int width,
474 int timing_iterations,
475 double bandwidth_GBs)
476 {
477 int num_pixels = width * height;
478
479 {
480 if (!g_report) printf("1 channel uchar1 tests (256-bin):\n\n"); fflush(stdout);
481
482 size_t image_bytes = num_pixels * sizeof(uchar1);
483 uchar1* uchar1_pixels = (uchar1*) malloc(image_bytes);
484
485 // Convert to 1-channel (averaging first 3 channels)
486 for (int i = 0; i < num_pixels; ++i)
487 {
488 uchar1_pixels[i].x = (unsigned char)
489 (((unsigned int) uchar4_pixels[i].x +
490 (unsigned int) uchar4_pixels[i].y +
491 (unsigned int) uchar4_pixels[i].z) / 3);
492 }
493
494 TestMethods<1, 1, 256>(uchar1_pixels, width, height, timing_iterations, bandwidth_GBs);
495 free(uchar1_pixels);
496 if (g_report) printf(", ");
497 }
498
499 {
500 if (!g_report) printf("3/4 channel uchar4 tests (256-bin):\n\n"); fflush(stdout);
501 TestMethods<4, 3, 256>(uchar4_pixels, width, height, timing_iterations, bandwidth_GBs);
502 if (g_report) printf(", ");
503 }
504
505 {
506 if (!g_report) printf("3/4 channel float4 tests (256-bin):\n\n"); fflush(stdout);
507 size_t image_bytes = num_pixels * sizeof(float4);
508 float4* float4_pixels = (float4*) malloc(image_bytes);
509
510 // Convert to float4 with range [0.0, 1.0)
511 for (int i = 0; i < num_pixels; ++i)
512 {
513 float4_pixels[i].x = float(uchar4_pixels[i].x) / 256;
514 float4_pixels[i].y = float(uchar4_pixels[i].y) / 256;
515 float4_pixels[i].z = float(uchar4_pixels[i].z) / 256;
516 float4_pixels[i].w = float(uchar4_pixels[i].w) / 256;
517 }
518 TestMethods<4, 3, 256>(float4_pixels, width, height, timing_iterations, bandwidth_GBs);
519 free(float4_pixels);
520 if (g_report) printf("\n");
521 }
522 }
523
524
525 /**
526 * Main
527 */
main(int argc,char ** argv)528 int main(int argc, char **argv)
529 {
530 // Initialize command line
531 CommandLineArgs args(argc, argv);
532 if (args.CheckCmdLineFlag("help"))
533 {
534 printf(
535 "%s "
536 "[--device=<device-id>] "
537 "[--v] "
538 "[--i=<timing iterations>] "
539 "\n\t"
540 "--file=<.tga filename> "
541 "\n\t"
542 "--entropy=<-1 (0%), 0 (100%), 1 (81%), 2 (54%), 3 (34%), 4 (20%), ..."
543 "[--height=<default: 1080>] "
544 "[--width=<default: 1920>] "
545 "\n", argv[0]);
546 exit(0);
547 }
548
549 std::string filename;
550 int timing_iterations = 100;
551 int entropy_reduction = 0;
552 int height = 1080;
553 int width = 1920;
554
555 g_verbose = args.CheckCmdLineFlag("v");
556 g_report = args.CheckCmdLineFlag("report");
557 args.GetCmdLineArgument("i", timing_iterations);
558 args.GetCmdLineArgument("file", filename);
559 args.GetCmdLineArgument("height", height);
560 args.GetCmdLineArgument("width", width);
561 args.GetCmdLineArgument("entropy", entropy_reduction);
562
563 // Initialize device
564 CubDebugExit(args.DeviceInit());
565
566 // Get GPU device bandwidth (GB/s)
567 int device_ordinal, bus_width, mem_clock_khz;
568 CubDebugExit(cudaGetDevice(&device_ordinal));
569 CubDebugExit(cudaDeviceGetAttribute(&bus_width, cudaDevAttrGlobalMemoryBusWidth, device_ordinal));
570 CubDebugExit(cudaDeviceGetAttribute(&mem_clock_khz, cudaDevAttrMemoryClockRate, device_ordinal));
571 double bandwidth_GBs = double(bus_width) * mem_clock_khz * 2 / 8 / 1000 / 1000;
572
573 // Run test(s)
574 uchar4* uchar4_pixels = NULL;
575 if (!g_report)
576 {
577 if (!filename.empty())
578 {
579 // Parse targa file
580 ReadTga(uchar4_pixels, width, height, filename.c_str());
581 printf("File %s: width(%d) height(%d)\n\n", filename.c_str(), width, height); fflush(stdout);
582 }
583 else
584 {
585 // Generate image
586 GenerateRandomImage(uchar4_pixels, width, height, entropy_reduction);
587 printf("Random image: entropy-reduction(%d) width(%d) height(%d)\n\n", entropy_reduction, width, height); fflush(stdout);
588 }
589
590 TestGenres(uchar4_pixels, height, width, timing_iterations, bandwidth_GBs);
591 }
592 else
593 {
594 // Run test suite
595 printf("Test, MIN, RLE CUB, SMEM, GMEM, , MIN, RLE_CUB, SMEM, GMEM, , MIN, RLE_CUB, SMEM, GMEM\n");
596
597 // Entropy reduction tests
598 for (entropy_reduction = 0; entropy_reduction < 5; ++entropy_reduction)
599 {
600 printf("entropy reduction %d, ", entropy_reduction);
601 GenerateRandomImage(uchar4_pixels, width, height, entropy_reduction);
602 TestGenres(uchar4_pixels, height, width, timing_iterations, bandwidth_GBs);
603 }
604 printf("entropy reduction -1, ");
605 GenerateRandomImage(uchar4_pixels, width, height, -1);
606 TestGenres(uchar4_pixels, height, width, timing_iterations, bandwidth_GBs);
607 printf("\n");
608
609 // File image tests
610 std::vector<std::string> file_tests;
611 file_tests.push_back("animals");
612 file_tests.push_back("apples");
613 file_tests.push_back("sunset");
614 file_tests.push_back("cheetah");
615 file_tests.push_back("nature");
616 file_tests.push_back("operahouse");
617 file_tests.push_back("austin");
618 file_tests.push_back("cityscape");
619
620 for (int i = 0; i < file_tests.size(); ++i)
621 {
622 printf("%s, ", file_tests[i].c_str());
623 std::string filename = std::string("histogram/benchmark/") + file_tests[i] + ".tga";
624 ReadTga(uchar4_pixels, width, height, filename.c_str());
625 TestGenres(uchar4_pixels, height, width, timing_iterations, bandwidth_GBs);
626 }
627 }
628
629 free(uchar4_pixels);
630
631 CubDebugExit(cudaDeviceSynchronize());
632 printf("\n\n");
633
634 return 0;
635 }
636