1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 /*
18     The original source for this example is
19     Copyright (c) 1994-2008 John E. Stone
20     All rights reserved.
21 
22     Redistribution and use in source and binary forms, with or without
23     modification, are permitted provided that the following conditions
24     are met:
25     1. Redistributions of source code must retain the above copyright
26        notice, this list of conditions and the following disclaimer.
27     2. Redistributions in binary form must reproduce the above copyright
28        notice, this list of conditions and the following disclaimer in the
29        documentation and/or other materials provided with the distribution.
30     3. The name of the author may not be used to endorse or promote products
31        derived from this software without specific prior written permission.
32 
33     THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34     OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35     WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36     ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
39     OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
40     HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
41     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
42     OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
43     SUCH DAMAGE.
44 */
45 
46 #include "machine.hpp"
47 #include "types.hpp"
48 #include "macros.hpp"
49 #include "vector.hpp"
50 #include "tgafile.hpp"
51 #include "trace.hpp"
52 #include "light.hpp"
53 #include "shade.hpp"
54 #include "camera.hpp"
55 #include "util.hpp"
56 #include "intersect.hpp"
57 #include "global.hpp"
58 #include "ui.hpp"
59 #include "tachyon_video.hpp"
60 
61 // shared but read-only so could be private too
62 static thr_parms *all_parms;
63 static scenedef scene;
64 static int startx;
65 static int stopx;
66 static int starty;
67 static int stopy;
68 static flt jitterscale;
69 static int totaly;
70 static int nthreads;
71 
72 static int grain_size = 50;
73 const int DIVFACTOR = 2;
74 
75 #define MIN(a, b) ((a) < (b) ? (a) : (b))
76 
77 #ifdef _WIN32
78 #include <windows.h>
79 #include "pthread_w.hpp"
80 #else
81 #include <pthread.h>
82 #endif
83 
84 static pthread_mutex_t MyMutex, MyMutex2, MyMutex3;
85 
render_one_pixel(int x,int y,unsigned int * local_mbox,unsigned int & serial,int startx,int stopx,int starty,int stopy)86 static color_t render_one_pixel(int x,
87                                 int y,
88                                 unsigned int *local_mbox,
89                                 unsigned int &serial,
90                                 int startx,
91                                 int stopx,
92                                 int starty,
93                                 int stopy) {
94     /* private vars moved inside loop */
95     ray primary, sample;
96     color col, avcol;
97     int R, G, B;
98     intersectstruct local_intersections;
99     int alias;
100     /* end private */
101 
102     primary = camray(&scene, x, y);
103     primary.intstruct = &local_intersections;
104     primary.flags = RT_RAY_REGULAR;
105 
106     serial++;
107     primary.serial = serial;
108     primary.mbox = local_mbox;
109     primary.maxdist = FHUGE;
110     primary.scene = &scene;
111     col = trace(&primary);
112 
113     serial = primary.serial;
114 
115     /* perform antialiasing if enabled.. */
116     if (scene.antialiasing > 0) {
117         for (alias = 0; alias < scene.antialiasing; alias++) {
118             serial++; /* increment serial number */
119             sample = primary; /* copy the regular primary ray to start with */
120             sample.serial = serial;
121 
122             {
123                 pthread_mutex_lock(&MyMutex);
124                 sample.d.x += ((rand() % 100) - 50) / jitterscale;
125                 sample.d.y += ((rand() % 100) - 50) / jitterscale;
126                 sample.d.z += ((rand() % 100) - 50) / jitterscale;
127                 pthread_mutex_unlock(&MyMutex);
128             }
129 
130             avcol = trace(&sample);
131 
132             serial = sample.serial; /* update our overall serial # */
133 
134             col.r += avcol.r;
135             col.g += avcol.g;
136             col.b += avcol.b;
137         }
138 
139         col.r /= (scene.antialiasing + 1.0);
140         col.g /= (scene.antialiasing + 1.0);
141         col.b /= (scene.antialiasing + 1.0);
142     }
143 
144     /* Handle overexposure and underexposure here... */
145     R = (int)(col.r * 255);
146     if (R > 255)
147         R = 255;
148     else if (R < 0)
149         R = 0;
150 
151     G = (int)(col.g * 255);
152     if (G > 255)
153         G = 255;
154     else if (G < 0)
155         G = 0;
156 
157     B = (int)(col.b * 255);
158     if (B > 255)
159         B = 255;
160     else if (B < 0)
161         B = 0;
162 
163     return video->get_color(R, G, B);
164 }
165 
166 // need this so threads can self-schedule work; returns true (and bounds of work) if more work to do
167 
168 typedef struct work_queue_entry_s {
169     patch pch;
170     struct work_queue_entry_s *next;
171 } work_queue_entry_t;
172 static work_queue_entry_t *work_queue_head = nullptr;
173 static work_queue_entry_t *work_queue_tail = nullptr;
174 
generate_work(patch * pchin)175 static void generate_work(patch *pchin) {
176     int startx, stopx, starty, stopy;
177     int xs, ys;
178 
179     startx = pchin->startx;
180     stopx = pchin->stopx;
181     starty = pchin->starty;
182     stopy = pchin->stopy;
183 
184     if (((stopx - startx) >= grain_size) || ((stopy - starty) >= grain_size)) {
185         int xpatchsize = (stopx - startx) / DIVFACTOR + 1;
186         int ypatchsize = (stopy - starty) / DIVFACTOR + 1;
187         for (ys = starty; ys <= stopy; ys += ypatchsize)
188             for (xs = startx; xs <= stopx; xs += xpatchsize) {
189                 patch pch;
190                 pch.startx = xs;
191                 pch.starty = ys;
192                 pch.stopx = MIN(xs + xpatchsize, stopx);
193                 pch.stopy = MIN(ys + ypatchsize, stopy);
194 
195                 generate_work(&pch);
196             }
197     }
198     else {
199         /* just trace this patch */
200         work_queue_entry_t *q = (work_queue_entry_t *)malloc(sizeof(work_queue_entry_t));
201         q->pch.starty = starty;
202         q->pch.stopy = stopy;
203         q->pch.startx = startx;
204         q->pch.stopx = stopx;
205         q->next = nullptr;
206         if (work_queue_head == nullptr) {
207             work_queue_head = q;
208         }
209         else {
210             work_queue_tail->next = q;
211         }
212         work_queue_tail = q;
213     }
214 }
215 
generate_worklist(void)216 static void generate_worklist(void) {
217     patch pch;
218     pch.startx = startx;
219     pch.stopx = stopx;
220     pch.starty = starty;
221     pch.stopy = stopy;
222     generate_work(&pch);
223 }
224 
schedule_thread_work(patch & pch)225 static bool schedule_thread_work(patch &pch) {
226     pthread_mutex_lock(&MyMutex3);
227     work_queue_entry_t *q = work_queue_head;
228     if (q != nullptr) {
229         pch = q->pch;
230         work_queue_head = work_queue_head->next;
231     }
232     pthread_mutex_unlock(&MyMutex3);
233     return (q != nullptr);
234 }
235 
parallel_thread(void * arg)236 static void parallel_thread(void *arg) {
237     // thread-local storage
238     unsigned int serial = 1;
239     unsigned int mboxsize = sizeof(unsigned int) * (max_objectid() + 20);
240     unsigned int *local_mbox = (unsigned int *)alloca(mboxsize);
241     memset(local_mbox, 0, mboxsize);
242 
243     // int thread_no = (int) arg;
244     patch pch;
245     while (schedule_thread_work(pch)) {
246         {
247             drawing_area drawing(
248                 pch.startx, totaly - pch.stopy, pch.stopx - pch.startx, pch.stopy - pch.starty);
249             for (int i = 1, y = pch.starty; y < pch.stopy; ++y, i++) {
250                 drawing.set_pos(0, drawing.size_y - i);
251                 for (int x = pch.startx; x < pch.stopx; x++) {
252                     color_t c =
253                         render_one_pixel(x, y, local_mbox, serial, startx, stopx, starty, stopy);
254                     drawing.put_pixel(c);
255                 }
256             }
257         }
258         if (!video->next_frame())
259             pthread_exit(arg);
260     }
261     pthread_exit(arg);
262 }
263 
264 // need this (for each platform) so we can create the right number of threads, to work efficiently
265 
266 #if defined(_WIN32)
267 
get_num_cpus(void)268 static int get_num_cpus(void) {
269     SYSTEM_INFO si;
270     GetNativeSystemInfo(&si);
271     return (int)si.dwNumberOfProcessors;
272 }
273 
274 #elif defined(__APPLE__)
275 
276 #include "sys/types.hpp"
277 #include "sys/sysctl.hpp"
get_num_cpus(void)278 static int get_num_cpus(void) {
279     int name[2] = { CTL_HW, HW_NCPU };
280     int ncpu;
281     std::size_t size = sizeof(ncpu);
282     sysctl(name, 2, &ncpu, &size, nullptr, 0);
283     return ncpu;
284 }
285 
286 #else /*  Linux  */
287 
288 #include <sys/sysinfo.h>
get_num_cpus(void)289 static int get_num_cpus(void) {
290     return get_nprocs();
291 }
292 
293 #endif
294 
thread_trace(thr_parms * parms)295 void *thread_trace(thr_parms *parms) {
296     // shared but read-only so could be private too
297     all_parms = parms;
298     scene = parms->scene;
299     startx = parms->startx;
300     stopx = parms->stopx;
301     starty = parms->starty;
302     stopy = parms->stopy;
303     jitterscale = 40.0 * (scene.hres + scene.vres);
304     totaly = parms->scene.vres;
305 
306     int n;
307     nthreads = get_num_cpus();
308     char *nthreads_str = getenv("THR_NUM_THREADS");
309     if (nthreads_str && (sscanf(nthreads_str, "%d", &n) > 0) && (n > 0))
310         nthreads = n;
311     char *grain_str = getenv("THR_GRAINSIZE");
312     if (grain_str && (sscanf(grain_str, "%d", &n) > 0) && (n > 0))
313         grain_size = n;
314     pthread_t *threads = (pthread_t *)alloca(nthreads * sizeof(pthread_t));
315     pthread_mutex_init(&MyMutex, nullptr);
316     pthread_mutex_init(&MyMutex2, nullptr);
317     pthread_mutex_init(&MyMutex3, nullptr);
318     generate_worklist(); // initialize schedule_thread_work() self-scheduler
319     for (int i = 0; i < nthreads; i++) {
320         pthread_create(
321             &threads[i], nullptr, (void *(*)(void *))parallel_thread, (void *)((std::size_t)i));
322     }
323     for (int i = 0; i < nthreads; i++) {
324         void *exit_val;
325         pthread_join(threads[i], &exit_val);
326         // expect i = (int) exit_val
327     }
328 
329     return (nullptr);
330 }
331