1 // Copyright 2009-2021 Intel Corporation
2 // SPDX-License-Identifier: Apache-2.0
3 
4 #pragma once
5 
6 #include "../../common/sys/platform.h"
7 #include "../../common/sys/sysinfo.h"
8 #include "../../common/sys/thread.h"
9 #include "../../common/sys/alloc.h"
10 #include "../../common/sys/ref.h"
11 #include "../../common/sys/intrinsics.h"
12 #include "../../common/sys/atomic.h"
13 #include "../../common/sys/mutex.h"
14 #include "../../common/sys/vector.h"
15 #include "../../common/sys/array.h"
16 #include "../../common/sys/string.h"
17 #include "../../common/sys/regression.h"
18 #include "../../common/sys/vector.h"
19 
20 #include "../../common/math/math.h"
21 #include "../../common/math/transcendental.h"
22 #include "../../common/simd/simd.h"
23 #include "../../common/math/vec2.h"
24 #include "../../common/math/vec3.h"
25 #include "../../common/math/vec4.h"
26 #include "../../common/math/vec2fa.h"
27 #include "../../common/math/vec3fa.h"
28 #include "../../common/math/interval.h"
29 #include "../../common/math/bbox.h"
30 #include "../../common/math/obbox.h"
31 #include "../../common/math/lbbox.h"
32 #include "../../common/math/linearspace2.h"
33 #include "../../common/math/linearspace3.h"
34 #include "../../common/math/affinespace.h"
35 #include "../../common/math/range.h"
36 #include "../../common/lexers/tokenstream.h"
37 
38 #include "../../common/tasking/taskscheduler.h"
39 
40 #define COMMA ,
41 
42 #include "../config.h"
43 #include "isa.h"
44 #include "stat.h"
45 #include "profile.h"
46 #include "rtcore.h"
47 #include "vector.h"
48 #include "state.h"
49 #include "instance_stack.h"
50 
51 #include <vector>
52 #include <map>
53 #include <algorithm>
54 #include <functional>
55 #include <utility>
56 #include <sstream>
57 
58 namespace embree
59 {
60   ////////////////////////////////////////////////////////////////////////////////
61   /// Vec2 shortcuts
62   ////////////////////////////////////////////////////////////////////////////////
63 
64   template<int N> using Vec2vf  = Vec2<vfloat<N>>;
65   template<int N> using Vec2vd  = Vec2<vdouble<N>>;
66   template<int N> using Vec2vr  = Vec2<vreal<N>>;
67   template<int N> using Vec2vi  = Vec2<vint<N>>;
68   template<int N> using Vec2vl  = Vec2<vllong<N>>;
69   template<int N> using Vec2vb  = Vec2<vbool<N>>;
70   template<int N> using Vec2vbf = Vec2<vboolf<N>>;
71   template<int N> using Vec2vbd = Vec2<vboold<N>>;
72 
73   typedef Vec2<vfloat4>  Vec2vf4;
74   typedef Vec2<vdouble4> Vec2vd4;
75   typedef Vec2<vreal4>   Vec2vr4;
76   typedef Vec2<vint4>    Vec2vi4;
77   typedef Vec2<vllong4>  Vec2vl4;
78   typedef Vec2<vbool4>   Vec2vb4;
79   typedef Vec2<vboolf4>  Vec2vbf4;
80   typedef Vec2<vboold4>  Vec2vbd4;
81 
82   typedef Vec2<vfloat8>  Vec2vf8;
83   typedef Vec2<vdouble8> Vec2vd8;
84   typedef Vec2<vreal8>   Vec2vr8;
85   typedef Vec2<vint8>    Vec2vi8;
86   typedef Vec2<vllong8>  Vec2vl8;
87   typedef Vec2<vbool8>   Vec2vb8;
88   typedef Vec2<vboolf8>  Vec2vbf8;
89   typedef Vec2<vboold8>  Vec2vbd8;
90 
91   typedef Vec2<vfloat16>  Vec2vf16;
92   typedef Vec2<vdouble16> Vec2vd16;
93   typedef Vec2<vreal16>   Vec2vr16;
94   typedef Vec2<vint16>    Vec2vi16;
95   typedef Vec2<vllong16>  Vec2vl16;
96   typedef Vec2<vbool16>   Vec2vb16;
97   typedef Vec2<vboolf16>  Vec2vbf16;
98   typedef Vec2<vboold16>  Vec2vbd16;
99 
100   typedef Vec2<vfloatx>  Vec2vfx;
101   typedef Vec2<vdoublex> Vec2vdx;
102   typedef Vec2<vrealx>   Vec2vrx;
103   typedef Vec2<vintx>    Vec2vix;
104   typedef Vec2<vllongx>  Vec2vlx;
105   typedef Vec2<vboolx>   Vec2vbx;
106   typedef Vec2<vboolfx>  Vec2vbfx;
107   typedef Vec2<vbooldx>  Vec2vbdx;
108 
109   ////////////////////////////////////////////////////////////////////////////////
110   /// Vec3 shortcuts
111   ////////////////////////////////////////////////////////////////////////////////
112 
113   template<int N> using Vec3vf  = Vec3<vfloat<N>>;
114   template<int N> using Vec3vd  = Vec3<vdouble<N>>;
115   template<int N> using Vec3vr  = Vec3<vreal<N>>;
116   template<int N> using Vec3vi  = Vec3<vint<N>>;
117   template<int N> using Vec3vl  = Vec3<vllong<N>>;
118   template<int N> using Vec3vb  = Vec3<vbool<N>>;
119   template<int N> using Vec3vbf = Vec3<vboolf<N>>;
120   template<int N> using Vec3vbd = Vec3<vboold<N>>;
121 
122   typedef Vec3<vfloat4>  Vec3vf4;
123   typedef Vec3<vdouble4> Vec3vd4;
124   typedef Vec3<vreal4>   Vec3vr4;
125   typedef Vec3<vint4>    Vec3vi4;
126   typedef Vec3<vllong4>  Vec3vl4;
127   typedef Vec3<vbool4>   Vec3vb4;
128   typedef Vec3<vboolf4>  Vec3vbf4;
129   typedef Vec3<vboold4>  Vec3vbd4;
130 
131   typedef Vec3<vfloat8>  Vec3vf8;
132   typedef Vec3<vdouble8> Vec3vd8;
133   typedef Vec3<vreal8>   Vec3vr8;
134   typedef Vec3<vint8>    Vec3vi8;
135   typedef Vec3<vllong8>  Vec3vl8;
136   typedef Vec3<vbool8>   Vec3vb8;
137   typedef Vec3<vboolf8>  Vec3vbf8;
138   typedef Vec3<vboold8>  Vec3vbd8;
139 
140   typedef Vec3<vfloat16>  Vec3vf16;
141   typedef Vec3<vdouble16> Vec3vd16;
142   typedef Vec3<vreal16>   Vec3vr16;
143   typedef Vec3<vint16>    Vec3vi16;
144   typedef Vec3<vllong16>  Vec3vl16;
145   typedef Vec3<vbool16>   Vec3vb16;
146   typedef Vec3<vboolf16>  Vec3vbf16;
147   typedef Vec3<vboold16>  Vec3vbd16;
148 
149   typedef Vec3<vfloatx>  Vec3vfx;
150   typedef Vec3<vdoublex> Vec3vdx;
151   typedef Vec3<vrealx>   Vec3vrx;
152   typedef Vec3<vintx>    Vec3vix;
153   typedef Vec3<vllongx>  Vec3vlx;
154   typedef Vec3<vboolx>   Vec3vbx;
155   typedef Vec3<vboolfx>  Vec3vbfx;
156   typedef Vec3<vbooldx>  Vec3vbdx;
157 
158   ////////////////////////////////////////////////////////////////////////////////
159   /// Vec4 shortcuts
160   ////////////////////////////////////////////////////////////////////////////////
161 
162   template<int N> using Vec4vf  = Vec4<vfloat<N>>;
163   template<int N> using Vec4vd  = Vec4<vdouble<N>>;
164   template<int N> using Vec4vr  = Vec4<vreal<N>>;
165   template<int N> using Vec4vi  = Vec4<vint<N>>;
166   template<int N> using Vec4vl  = Vec4<vllong<N>>;
167   template<int N> using Vec4vb  = Vec4<vbool<N>>;
168   template<int N> using Vec4vbf = Vec4<vboolf<N>>;
169   template<int N> using Vec4vbd = Vec4<vboold<N>>;
170 
171   typedef Vec4<vfloat4>  Vec4vf4;
172   typedef Vec4<vdouble4> Vec4vd4;
173   typedef Vec4<vreal4>   Vec4vr4;
174   typedef Vec4<vint4>    Vec4vi4;
175   typedef Vec4<vllong4>  Vec4vl4;
176   typedef Vec4<vbool4>   Vec4vb4;
177   typedef Vec4<vboolf4>  Vec4vbf4;
178   typedef Vec4<vboold4>  Vec4vbd4;
179 
180   typedef Vec4<vfloat8>  Vec4vf8;
181   typedef Vec4<vdouble8> Vec4vd8;
182   typedef Vec4<vreal8>   Vec4vr8;
183   typedef Vec4<vint8>    Vec4vi8;
184   typedef Vec4<vllong8>  Vec4vl8;
185   typedef Vec4<vbool8>   Vec4vb8;
186   typedef Vec4<vboolf8>  Vec4vbf8;
187   typedef Vec4<vboold8>  Vec4vbd8;
188 
189   typedef Vec4<vfloat16>  Vec4vf16;
190   typedef Vec4<vdouble16> Vec4vd16;
191   typedef Vec4<vreal16>   Vec4vr16;
192   typedef Vec4<vint16>    Vec4vi16;
193   typedef Vec4<vllong16>  Vec4vl16;
194   typedef Vec4<vbool16>   Vec4vb16;
195   typedef Vec4<vboolf16>  Vec4vbf16;
196   typedef Vec4<vboold16>  Vec4vbd16;
197 
198   typedef Vec4<vfloatx>  Vec4vfx;
199   typedef Vec4<vdoublex> Vec4vdx;
200   typedef Vec4<vrealx>   Vec4vrx;
201   typedef Vec4<vintx>    Vec4vix;
202   typedef Vec4<vllongx>  Vec4vlx;
203   typedef Vec4<vboolx>   Vec4vbx;
204   typedef Vec4<vboolfx>  Vec4vbfx;
205   typedef Vec4<vbooldx>  Vec4vbdx;
206 
207   ////////////////////////////////////////////////////////////////////////////////
208   /// Other shortcuts
209   ////////////////////////////////////////////////////////////////////////////////
210 
211   template<int N> using BBox3vf = BBox<Vec3vf<N>>;
212   typedef BBox<Vec3vf4>  BBox3vf4;
213   typedef BBox<Vec3vf8>  BBox3vf8;
214   typedef BBox<Vec3vf16> BBox3vf16;
215 
216   /* calculate time segment itime and fractional time ftime */
getTimeSegment(float time,float numTimeSegments,float & ftime)217   __forceinline int getTimeSegment(float time, float numTimeSegments, float& ftime)
218   {
219     const float timeScaled = time * numTimeSegments;
220     const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f);
221     ftime = timeScaled - itimef;
222     return int(itimef);
223   }
224 
getTimeSegment(float time,float start_time,float end_time,float numTimeSegments,float & ftime)225   __forceinline int getTimeSegment(float time, float start_time, float end_time, float numTimeSegments, float& ftime)
226   {
227     const float timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
228     const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f);
229     ftime = timeScaled - itimef;
230     return int(itimef);
231   }
232 
233   template<int N>
getTimeSegment(const vfloat<N> & time,const vfloat<N> & numTimeSegments,vfloat<N> & ftime)234   __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
235   {
236     const vfloat<N> timeScaled = time * numTimeSegments;
237     const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
238     ftime = timeScaled - itimef;
239     return vint<N>(itimef);
240   }
241 
242   template<int N>
getTimeSegment(const vfloat<N> & time,const vfloat<N> & start_time,const vfloat<N> & end_time,const vfloat<N> & numTimeSegments,vfloat<N> & ftime)243     __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& start_time, const vfloat<N>& end_time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime)
244   {
245     const vfloat<N> timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments;
246     const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f);
247     ftime = timeScaled - itimef;
248     return vint<N>(itimef);
249   }
250 
251   /* calculate overlapping time segment range */
getTimeSegmentRange(const BBox1f & time_range,float numTimeSegments)252   __forceinline range<int> getTimeSegmentRange(const BBox1f& time_range, float numTimeSegments)
253   {
254     const float round_up   = 1.0f+2.0f*float(ulp); // corrects inaccuracies to precisely match time step
255     const float round_down = 1.0f-2.0f*float(ulp);
256     const int itime_lower = (int)max(floor(round_up  *time_range.lower*numTimeSegments), 0.0f);
257     const int itime_upper = (int)min(ceil (round_down*time_range.upper*numTimeSegments), numTimeSegments);
258     return make_range(itime_lower, itime_upper);
259   }
260 
261   /* calculate overlapping time segment range */
getTimeSegmentRange(const BBox1f & range,BBox1f time_range,float numTimeSegments)262   __forceinline range<int> getTimeSegmentRange(const BBox1f& range, BBox1f time_range, float numTimeSegments)
263   {
264     const float lower = (range.lower-time_range.lower)/time_range.size();
265     const float upper = (range.upper-time_range.lower)/time_range.size();
266     return getTimeSegmentRange(BBox1f(lower,upper),numTimeSegments);
267   }
268 }
269