1 /*
2 Copyright (c) 2005-2021 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 /*
18 The original source for this example is
19 Copyright (c) 1994-2008 John E. Stone
20 All rights reserved.
21
22 Redistribution and use in source and binary forms, with or without
23 modification, are permitted provided that the following conditions
24 are met:
25 1. Redistributions of source code must retain the above copyright
26 notice, this list of conditions and the following disclaimer.
27 2. Redistributions in binary form must reproduce the above copyright
28 notice, this list of conditions and the following disclaimer in the
29 documentation and/or other materials provided with the distribution.
30 3. The name of the author may not be used to endorse or promote products
31 derived from this software without specific prior written permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43 SUCH DAMAGE.
44 */
45
46 #include "machine.hpp"
47 #include "types.hpp"
48 #include "macros.hpp"
49 #include "vector.hpp"
50 #include "tgafile.hpp"
51 #include "trace.hpp"
52 #include "light.hpp"
53 #include "shade.hpp"
54 #include "camera.hpp"
55 #include "util.hpp"
56 #include "intersect.hpp"
57 #include "global.hpp"
58 #include "ui.hpp"
59 #include "tachyon_video.hpp"
60
61 // shared but read-only so could be private too
62 static thr_parms *all_parms;
63 static scenedef scene;
64 static int startx;
65 static int stopx;
66 static int starty;
67 static int stopy;
68 static flt jitterscale;
69 static int totaly;
70 static int nthreads;
71
72 static int grain_size = 50;
73 const int DIVFACTOR = 2;
74
75 #define MIN(a, b) ((a) < (b) ? (a) : (b))
76
77 #ifdef _WIN32
78 #include <windows.h>
79 #include "pthread_w.hpp"
80 #else
81 #include <pthread.h>
82 #endif
83
84 static pthread_mutex_t MyMutex, MyMutex2, MyMutex3;
85
render_one_pixel(int x,int y,unsigned int * local_mbox,unsigned int & serial,int startx,int stopx,int starty,int stopy)86 static color_t render_one_pixel(int x,
87 int y,
88 unsigned int *local_mbox,
89 unsigned int &serial,
90 int startx,
91 int stopx,
92 int starty,
93 int stopy) {
94 /* private vars moved inside loop */
95 ray primary, sample;
96 color col, avcol;
97 int R, G, B;
98 intersectstruct local_intersections;
99 int alias;
100 /* end private */
101
102 primary = camray(&scene, x, y);
103 primary.intstruct = &local_intersections;
104 primary.flags = RT_RAY_REGULAR;
105
106 serial++;
107 primary.serial = serial;
108 primary.mbox = local_mbox;
109 primary.maxdist = FHUGE;
110 primary.scene = &scene;
111 col = trace(&primary);
112
113 serial = primary.serial;
114
115 /* perform antialiasing if enabled.. */
116 if (scene.antialiasing > 0) {
117 for (alias = 0; alias < scene.antialiasing; alias++) {
118 serial++; /* increment serial number */
119 sample = primary; /* copy the regular primary ray to start with */
120 sample.serial = serial;
121
122 {
123 pthread_mutex_lock(&MyMutex);
124 sample.d.x += ((rand() % 100) - 50) / jitterscale;
125 sample.d.y += ((rand() % 100) - 50) / jitterscale;
126 sample.d.z += ((rand() % 100) - 50) / jitterscale;
127 pthread_mutex_unlock(&MyMutex);
128 }
129
130 avcol = trace(&sample);
131
132 serial = sample.serial; /* update our overall serial # */
133
134 col.r += avcol.r;
135 col.g += avcol.g;
136 col.b += avcol.b;
137 }
138
139 col.r /= (scene.antialiasing + 1.0);
140 col.g /= (scene.antialiasing + 1.0);
141 col.b /= (scene.antialiasing + 1.0);
142 }
143
144 /* Handle overexposure and underexposure here... */
145 R = (int)(col.r * 255);
146 if (R > 255)
147 R = 255;
148 else if (R < 0)
149 R = 0;
150
151 G = (int)(col.g * 255);
152 if (G > 255)
153 G = 255;
154 else if (G < 0)
155 G = 0;
156
157 B = (int)(col.b * 255);
158 if (B > 255)
159 B = 255;
160 else if (B < 0)
161 B = 0;
162
163 return video->get_color(R, G, B);
164 }
165
166 // need this so threads can self-schedule work; returns true (and bounds of work) if more work to do
167
168 typedef struct work_queue_entry_s {
169 patch pch;
170 struct work_queue_entry_s *next;
171 } work_queue_entry_t;
172 static work_queue_entry_t *work_queue_head = nullptr;
173 static work_queue_entry_t *work_queue_tail = nullptr;
174
generate_work(patch * pchin)175 static void generate_work(patch *pchin) {
176 int startx, stopx, starty, stopy;
177 int xs, ys;
178
179 startx = pchin->startx;
180 stopx = pchin->stopx;
181 starty = pchin->starty;
182 stopy = pchin->stopy;
183
184 if (((stopx - startx) >= grain_size) || ((stopy - starty) >= grain_size)) {
185 int xpatchsize = (stopx - startx) / DIVFACTOR + 1;
186 int ypatchsize = (stopy - starty) / DIVFACTOR + 1;
187 for (ys = starty; ys <= stopy; ys += ypatchsize)
188 for (xs = startx; xs <= stopx; xs += xpatchsize) {
189 patch pch;
190 pch.startx = xs;
191 pch.starty = ys;
192 pch.stopx = MIN(xs + xpatchsize, stopx);
193 pch.stopy = MIN(ys + ypatchsize, stopy);
194
195 generate_work(&pch);
196 }
197 }
198 else {
199 /* just trace this patch */
200 work_queue_entry_t *q = (work_queue_entry_t *)malloc(sizeof(work_queue_entry_t));
201 q->pch.starty = starty;
202 q->pch.stopy = stopy;
203 q->pch.startx = startx;
204 q->pch.stopx = stopx;
205 q->next = nullptr;
206 if (work_queue_head == nullptr) {
207 work_queue_head = q;
208 }
209 else {
210 work_queue_tail->next = q;
211 }
212 work_queue_tail = q;
213 }
214 }
215
generate_worklist(void)216 static void generate_worklist(void) {
217 patch pch;
218 pch.startx = startx;
219 pch.stopx = stopx;
220 pch.starty = starty;
221 pch.stopy = stopy;
222 generate_work(&pch);
223 }
224
schedule_thread_work(patch & pch)225 static bool schedule_thread_work(patch &pch) {
226 pthread_mutex_lock(&MyMutex3);
227 work_queue_entry_t *q = work_queue_head;
228 if (q != nullptr) {
229 pch = q->pch;
230 work_queue_head = work_queue_head->next;
231 }
232 pthread_mutex_unlock(&MyMutex3);
233 return (q != nullptr);
234 }
235
parallel_thread(void * arg)236 static void parallel_thread(void *arg) {
237 // thread-local storage
238 unsigned int serial = 1;
239 unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20);
240 unsigned int *local_mbox = (unsigned int *)alloca(mboxsize);
241 memset(local_mbox, 0, mboxsize);
242
243 // int thread_no = (int) arg;
244 patch pch;
245 while (schedule_thread_work(pch)) {
246 {
247 drawing_area drawing(
248 pch.startx, totaly - pch.stopy, pch.stopx - pch.startx, pch.stopy - pch.starty);
249 for (int i = 1, y = pch.starty; y < pch.stopy; ++y, i++) {
250 drawing.set_pos(0, drawing.size_y - i);
251 for (int x = pch.startx; x < pch.stopx; x++) {
252 color_t c =
253 render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy);
254 drawing.put_pixel(c);
255 }
256 }
257 }
258 if (!video->next_frame())
259 pthread_exit(arg);
260 }
261 pthread_exit(arg);
262 }
263
264 // need this (for each platform) so we can create the right number of threads, to work efficiently
265
266 #if defined(_WIN32)
267
get_num_cpus(void)268 static int get_num_cpus(void) {
269 SYSTEM_INFO si;
270 GetNativeSystemInfo(&si);
271 return (int)si.dwNumberOfProcessors;
272 }
273
274 #elif defined(__APPLE__)
275
276 #include "sys/types.hpp"
277 #include "sys/sysctl.hpp"
get_num_cpus(void)278 static int get_num_cpus(void) {
279 int name[2] = { CTL_HW, HW_NCPU };
280 int ncpu;
281 std::size_t size = sizeof(ncpu);
282 sysctl(name, 2, &ncpu, &size, nullptr, 0);
283 return ncpu;
284 }
285
286 #else /* Linux */
287
288 #include <sys/sysinfo.h>
get_num_cpus(void)289 static int get_num_cpus(void) {
290 return get_nprocs();
291 }
292
293 #endif
294
thread_trace(thr_parms * parms)295 void *thread_trace(thr_parms *parms) {
296 // shared but read-only so could be private too
297 all_parms = parms;
298 scene = parms->scene;
299 startx = parms->startx;
300 stopx = parms->stopx;
301 starty = parms->starty;
302 stopy = parms->stopy;
303 jitterscale = 40.0 * (scene.hres + scene.vres);
304 totaly = parms->scene.vres;
305
306 int n;
307 nthreads = get_num_cpus();
308 char *nthreads_str = getenv("THR_NUM_THREADS");
309 if (nthreads_str && (sscanf(nthreads_str, "%d", &n) > 0) && (n > 0))
310 nthreads = n;
311 char *grain_str = getenv("THR_GRAINSIZE");
312 if (grain_str && (sscanf(grain_str, "%d", &n) > 0) && (n > 0))
313 grain_size = n;
314 pthread_t *threads = (pthread_t *)alloca(nthreads * sizeof(pthread_t));
315 pthread_mutex_init(&MyMutex, nullptr);
316 pthread_mutex_init(&MyMutex2, nullptr);
317 pthread_mutex_init(&MyMutex3, nullptr);
318 generate_worklist(); // initialize schedule_thread_work() self-scheduler
319 for (int i = 0; i < nthreads; i++) {
320 pthread_create(
321 &threads[i], nullptr, (void *(*)(void *))parallel_thread, (void *)((std::size_t)i));
322 }
323 for (int i = 0; i < nthreads; i++) {
324 void *exit_val;
325 pthread_join(threads[i], &exit_val);
326 // expect i = (int) exit_val
327 }
328
329 return (nullptr);
330 }
331