1 /*
2   Copyright (c) 2011-2014, Intel Corporation
3   All rights reserved.
4 
5   Redistribution and use in source and binary forms, with or without
6   modification, are permitted provided that the following conditions are
7   met:
8 
9     * Redistributions of source code must retain the above copyright
10       notice, this list of conditions and the following disclaimer.
11 
12     * Redistributions in binary form must reproduce the above copyright
13       notice, this list of conditions and the following disclaimer in the
14       documentation and/or other materials provided with the distribution.
15 
16     * Neither the name of Intel Corporation nor the names of its
17       contributors may be used to endorse or promote products derived from
18       this software without specific prior written permission.
19 
20 
21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
22    IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23    TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24    PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33 
34 #ifdef _MSC_VER
35 #define _CRT_SECURE_NO_WARNINGS
36 #define NOMINMAX
37 #pragma warning(disable : 4244)
38 #pragma warning(disable : 4305)
39 #endif
40 
41 #include "../../common/timing.h"
42 #include "volume_ispc.h"
43 #include <algorithm>
44 #include <cstdlib>
45 #include <stdio.h>
46 using namespace ispc;
47 
48 extern void volume_serial(float density[], int nVoxels[3], const float raster2camera[4][4],
49                           const float camera2world[4][4], int width, int height, float image[]);
50 
51 /* Write a PPM image file with the image */
writePPM(float * buf,int width,int height,const char * fn)52 static void writePPM(float *buf, int width, int height, const char *fn) {
53     FILE *fp = fopen(fn, "wb");
54     fprintf(fp, "P6\n");
55     fprintf(fp, "%d %d\n", width, height);
56     fprintf(fp, "255\n");
57     for (int i = 0; i < width * height; ++i) {
58         float v = buf[i] * 255.f;
59         if (v < 0.f)
60             v = 0.f;
61         else if (v > 255.f)
62             v = 255.f;
63         unsigned char c = (unsigned char)v;
64         for (int j = 0; j < 3; ++j)
65             fputc(c, fp);
66     }
67     fclose(fp);
68     printf("Wrote image file %s\n", fn);
69 }
70 
71 /* Load image and viewing parameters from a camera data file.
72    FIXME: we should add support to be able to specify viewing parameters
73    in the program here directly. */
loadCamera(const char * fn,int * width,int * height,float raster2camera[4][4],float camera2world[4][4])74 static void loadCamera(const char *fn, int *width, int *height, float raster2camera[4][4], float camera2world[4][4]) {
75     FILE *f = fopen(fn, "r");
76     if (!f) {
77         perror(fn);
78         exit(1);
79     }
80     if (fscanf(f, "%d %d", width, height) != 2) {
81         fprintf(stderr, "Unexpected end of file in camera file\n");
82         exit(1);
83     }
84 
85     for (int i = 0; i < 4; ++i) {
86         for (int j = 0; j < 4; ++j) {
87             if (fscanf(f, "%f", &raster2camera[i][j]) != 1) {
88                 fprintf(stderr, "Unexpected end of file in camera file\n");
89                 exit(1);
90             }
91         }
92     }
93     for (int i = 0; i < 4; ++i) {
94         for (int j = 0; j < 4; ++j) {
95             if (fscanf(f, "%f", &camera2world[i][j]) != 1) {
96                 fprintf(stderr, "Unexpected end of file in camera file\n");
97                 exit(1);
98             }
99         }
100     }
101     fclose(f);
102 }
103 
104 /* Load a volume density file.  Expects the number of x, y, and z samples
105    as the first three values (as integer strings), then x*y*z
106    floating-point values (also as strings) to give the densities.  */
loadVolume(const char * fn,int n[3])107 static float *loadVolume(const char *fn, int n[3]) {
108     FILE *f = fopen(fn, "r");
109     if (!f) {
110         perror(fn);
111         exit(1);
112     }
113 
114     if (fscanf(f, "%d %d %d", &n[0], &n[1], &n[2]) != 3) {
115         fprintf(stderr, "Couldn't find resolution at start of density file\n");
116         exit(1);
117     }
118 
119     int count = n[0] * n[1] * n[2];
120     float *v = new float[count];
121     for (int i = 0; i < count; ++i) {
122         if (fscanf(f, "%f", &v[i]) != 1) {
123             fprintf(stderr, "Unexpected end of file at %d'th density value\n", i);
124             exit(1);
125         }
126     }
127 
128     return v;
129 }
130 
main(int argc,char * argv[])131 int main(int argc, char *argv[]) {
132     static unsigned int test_iterations[] = {3, 7, 1};
133     if (argc < 3) {
134         fprintf(stderr, "usage: volume <camera.dat> <volume_density.vol> [ispc iterations] [tasks iterations] [serial "
135                         "iterations]\n");
136         return 1;
137     }
138     if (argc == 6) {
139         for (int i = 0; i < 3; i++) {
140             test_iterations[i] = atoi(argv[3 + i]);
141         }
142     }
143 
144     //
145     // Load viewing data and the volume density data
146     //
147     int width, height;
148     float raster2camera[4][4], camera2world[4][4];
149     loadCamera(argv[1], &width, &height, raster2camera, camera2world);
150     float *image = new float[width * height];
151 
152     int n[3];
153     float *density = loadVolume(argv[2], n);
154 
155     //
156     // Compute the image using the ispc implementation; report the minimum
157     // time of three runs.
158     //
159     double minISPC = 1e30;
160     for (unsigned int i = 0; i < test_iterations[0]; ++i) {
161         reset_and_start_timer();
162         volume_ispc(density, n, raster2camera, camera2world, width, height, image);
163         double dt = get_elapsed_mcycles();
164         printf("@time of ISPC run:\t\t\t[%.3f] million cycles\n", dt);
165         minISPC = std::min(minISPC, dt);
166     }
167 
168     printf("[volume ispc 1 core]:\t\t[%.3f] million cycles\n", minISPC);
169     writePPM(image, width, height, "volume-ispc-1core.ppm");
170 
171     // Clear out the buffer
172     for (int i = 0; i < width * height; ++i)
173         image[i] = 0.;
174 
175     //
176     // Compute the image using the ispc implementation that also uses
177     // tasks; report the minimum time of three runs.
178     //
179     double minISPCtasks = 1e30;
180     for (unsigned int i = 0; i < test_iterations[1]; ++i) {
181         reset_and_start_timer();
182         volume_ispc_tasks(density, n, raster2camera, camera2world, width, height, image);
183         double dt = get_elapsed_mcycles();
184         printf("@time of ISPC + TASKS run:\t\t\t[%.3f] million cycles\n", dt);
185         minISPCtasks = std::min(minISPCtasks, dt);
186     }
187 
188     printf("[volume ispc + tasks]:\t\t[%.3f] million cycles\n", minISPCtasks);
189     writePPM(image, width, height, "volume-ispc-tasks.ppm");
190 
191     // Clear out the buffer
192     for (int i = 0; i < width * height; ++i)
193         image[i] = 0.;
194 
195     //
196     // And run the serial implementation 3 times, again reporting the
197     // minimum time.
198     //
199     double minSerial = 1e30;
200     for (unsigned int i = 0; i < test_iterations[2]; ++i) {
201         reset_and_start_timer();
202         volume_serial(density, n, raster2camera, camera2world, width, height, image);
203         double dt = get_elapsed_mcycles();
204         printf("@time of serial run:\t\t\t[%.3f] million cycles\n", dt);
205         minSerial = std::min(minSerial, dt);
206     }
207 
208     printf("[volume serial]:\t\t[%.3f] million cycles\n", minSerial);
209     writePPM(image, width, height, "volume-serial.ppm");
210 
211     printf("\t\t\t\t(%.2fx speedup from ISPC, %.2fx speedup from ISPC + tasks)\n", minSerial / minISPC,
212            minSerial / minISPCtasks);
213 
214     return 0;
215 }
216