1 // Copyright 2009-2021 Intel Corporation 2 // SPDX-License-Identifier: Apache-2.0 3 4 #pragma once 5 6 #include "../../common/sys/platform.h" 7 #include "../../common/sys/sysinfo.h" 8 #include "../../common/sys/thread.h" 9 #include "../../common/sys/alloc.h" 10 #include "../../common/sys/ref.h" 11 #include "../../common/sys/intrinsics.h" 12 #include "../../common/sys/atomic.h" 13 #include "../../common/sys/mutex.h" 14 #include "../../common/sys/vector.h" 15 #include "../../common/sys/array.h" 16 #include "../../common/sys/string.h" 17 #include "../../common/sys/regression.h" 18 #include "../../common/sys/vector.h" 19 20 #include "../../common/math/math.h" 21 #include "../../common/math/transcendental.h" 22 #include "../../common/simd/simd.h" 23 #include "../../common/math/vec2.h" 24 #include "../../common/math/vec3.h" 25 #include "../../common/math/vec4.h" 26 #include "../../common/math/vec2fa.h" 27 #include "../../common/math/vec3fa.h" 28 #include "../../common/math/interval.h" 29 #include "../../common/math/bbox.h" 30 #include "../../common/math/obbox.h" 31 #include "../../common/math/lbbox.h" 32 #include "../../common/math/linearspace2.h" 33 #include "../../common/math/linearspace3.h" 34 #include "../../common/math/affinespace.h" 35 #include "../../common/math/range.h" 36 #include "../../common/lexers/tokenstream.h" 37 38 #include "../../common/tasking/taskscheduler.h" 39 40 #define COMMA , 41 42 #include "../config.h" 43 #include "isa.h" 44 #include "stat.h" 45 #include "profile.h" 46 #include "rtcore.h" 47 #include "vector.h" 48 #include "state.h" 49 #include "instance_stack.h" 50 51 #include <vector> 52 #include <map> 53 #include <algorithm> 54 #include <functional> 55 #include <utility> 56 #include <sstream> 57 58 namespace embree 59 { 60 //////////////////////////////////////////////////////////////////////////////// 61 /// Vec2 shortcuts 62 //////////////////////////////////////////////////////////////////////////////// 63 64 template<int N> using Vec2vf = Vec2<vfloat<N>>; 65 template<int N> using Vec2vd = Vec2<vdouble<N>>; 66 template<int N> using Vec2vr = Vec2<vreal<N>>; 67 template<int N> using Vec2vi = Vec2<vint<N>>; 68 template<int N> using Vec2vl = Vec2<vllong<N>>; 69 template<int N> using Vec2vb = Vec2<vbool<N>>; 70 template<int N> using Vec2vbf = Vec2<vboolf<N>>; 71 template<int N> using Vec2vbd = Vec2<vboold<N>>; 72 73 typedef Vec2<vfloat4> Vec2vf4; 74 typedef Vec2<vdouble4> Vec2vd4; 75 typedef Vec2<vreal4> Vec2vr4; 76 typedef Vec2<vint4> Vec2vi4; 77 typedef Vec2<vllong4> Vec2vl4; 78 typedef Vec2<vbool4> Vec2vb4; 79 typedef Vec2<vboolf4> Vec2vbf4; 80 typedef Vec2<vboold4> Vec2vbd4; 81 82 typedef Vec2<vfloat8> Vec2vf8; 83 typedef Vec2<vdouble8> Vec2vd8; 84 typedef Vec2<vreal8> Vec2vr8; 85 typedef Vec2<vint8> Vec2vi8; 86 typedef Vec2<vllong8> Vec2vl8; 87 typedef Vec2<vbool8> Vec2vb8; 88 typedef Vec2<vboolf8> Vec2vbf8; 89 typedef Vec2<vboold8> Vec2vbd8; 90 91 typedef Vec2<vfloat16> Vec2vf16; 92 typedef Vec2<vdouble16> Vec2vd16; 93 typedef Vec2<vreal16> Vec2vr16; 94 typedef Vec2<vint16> Vec2vi16; 95 typedef Vec2<vllong16> Vec2vl16; 96 typedef Vec2<vbool16> Vec2vb16; 97 typedef Vec2<vboolf16> Vec2vbf16; 98 typedef Vec2<vboold16> Vec2vbd16; 99 100 typedef Vec2<vfloatx> Vec2vfx; 101 typedef Vec2<vdoublex> Vec2vdx; 102 typedef Vec2<vrealx> Vec2vrx; 103 typedef Vec2<vintx> Vec2vix; 104 typedef Vec2<vllongx> Vec2vlx; 105 typedef Vec2<vboolx> Vec2vbx; 106 typedef Vec2<vboolfx> Vec2vbfx; 107 typedef Vec2<vbooldx> Vec2vbdx; 108 109 //////////////////////////////////////////////////////////////////////////////// 110 /// Vec3 shortcuts 111 //////////////////////////////////////////////////////////////////////////////// 112 113 template<int N> using Vec3vf = Vec3<vfloat<N>>; 114 template<int N> using Vec3vd = Vec3<vdouble<N>>; 115 template<int N> using Vec3vr = Vec3<vreal<N>>; 116 template<int N> using Vec3vi = Vec3<vint<N>>; 117 template<int N> using Vec3vl = Vec3<vllong<N>>; 118 template<int N> using Vec3vb = Vec3<vbool<N>>; 119 template<int N> using Vec3vbf = Vec3<vboolf<N>>; 120 template<int N> using Vec3vbd = Vec3<vboold<N>>; 121 122 typedef Vec3<vfloat4> Vec3vf4; 123 typedef Vec3<vdouble4> Vec3vd4; 124 typedef Vec3<vreal4> Vec3vr4; 125 typedef Vec3<vint4> Vec3vi4; 126 typedef Vec3<vllong4> Vec3vl4; 127 typedef Vec3<vbool4> Vec3vb4; 128 typedef Vec3<vboolf4> Vec3vbf4; 129 typedef Vec3<vboold4> Vec3vbd4; 130 131 typedef Vec3<vfloat8> Vec3vf8; 132 typedef Vec3<vdouble8> Vec3vd8; 133 typedef Vec3<vreal8> Vec3vr8; 134 typedef Vec3<vint8> Vec3vi8; 135 typedef Vec3<vllong8> Vec3vl8; 136 typedef Vec3<vbool8> Vec3vb8; 137 typedef Vec3<vboolf8> Vec3vbf8; 138 typedef Vec3<vboold8> Vec3vbd8; 139 140 typedef Vec3<vfloat16> Vec3vf16; 141 typedef Vec3<vdouble16> Vec3vd16; 142 typedef Vec3<vreal16> Vec3vr16; 143 typedef Vec3<vint16> Vec3vi16; 144 typedef Vec3<vllong16> Vec3vl16; 145 typedef Vec3<vbool16> Vec3vb16; 146 typedef Vec3<vboolf16> Vec3vbf16; 147 typedef Vec3<vboold16> Vec3vbd16; 148 149 typedef Vec3<vfloatx> Vec3vfx; 150 typedef Vec3<vdoublex> Vec3vdx; 151 typedef Vec3<vrealx> Vec3vrx; 152 typedef Vec3<vintx> Vec3vix; 153 typedef Vec3<vllongx> Vec3vlx; 154 typedef Vec3<vboolx> Vec3vbx; 155 typedef Vec3<vboolfx> Vec3vbfx; 156 typedef Vec3<vbooldx> Vec3vbdx; 157 158 //////////////////////////////////////////////////////////////////////////////// 159 /// Vec4 shortcuts 160 //////////////////////////////////////////////////////////////////////////////// 161 162 template<int N> using Vec4vf = Vec4<vfloat<N>>; 163 template<int N> using Vec4vd = Vec4<vdouble<N>>; 164 template<int N> using Vec4vr = Vec4<vreal<N>>; 165 template<int N> using Vec4vi = Vec4<vint<N>>; 166 template<int N> using Vec4vl = Vec4<vllong<N>>; 167 template<int N> using Vec4vb = Vec4<vbool<N>>; 168 template<int N> using Vec4vbf = Vec4<vboolf<N>>; 169 template<int N> using Vec4vbd = Vec4<vboold<N>>; 170 171 typedef Vec4<vfloat4> Vec4vf4; 172 typedef Vec4<vdouble4> Vec4vd4; 173 typedef Vec4<vreal4> Vec4vr4; 174 typedef Vec4<vint4> Vec4vi4; 175 typedef Vec4<vllong4> Vec4vl4; 176 typedef Vec4<vbool4> Vec4vb4; 177 typedef Vec4<vboolf4> Vec4vbf4; 178 typedef Vec4<vboold4> Vec4vbd4; 179 180 typedef Vec4<vfloat8> Vec4vf8; 181 typedef Vec4<vdouble8> Vec4vd8; 182 typedef Vec4<vreal8> Vec4vr8; 183 typedef Vec4<vint8> Vec4vi8; 184 typedef Vec4<vllong8> Vec4vl8; 185 typedef Vec4<vbool8> Vec4vb8; 186 typedef Vec4<vboolf8> Vec4vbf8; 187 typedef Vec4<vboold8> Vec4vbd8; 188 189 typedef Vec4<vfloat16> Vec4vf16; 190 typedef Vec4<vdouble16> Vec4vd16; 191 typedef Vec4<vreal16> Vec4vr16; 192 typedef Vec4<vint16> Vec4vi16; 193 typedef Vec4<vllong16> Vec4vl16; 194 typedef Vec4<vbool16> Vec4vb16; 195 typedef Vec4<vboolf16> Vec4vbf16; 196 typedef Vec4<vboold16> Vec4vbd16; 197 198 typedef Vec4<vfloatx> Vec4vfx; 199 typedef Vec4<vdoublex> Vec4vdx; 200 typedef Vec4<vrealx> Vec4vrx; 201 typedef Vec4<vintx> Vec4vix; 202 typedef Vec4<vllongx> Vec4vlx; 203 typedef Vec4<vboolx> Vec4vbx; 204 typedef Vec4<vboolfx> Vec4vbfx; 205 typedef Vec4<vbooldx> Vec4vbdx; 206 207 //////////////////////////////////////////////////////////////////////////////// 208 /// Other shortcuts 209 //////////////////////////////////////////////////////////////////////////////// 210 211 template<int N> using BBox3vf = BBox<Vec3vf<N>>; 212 typedef BBox<Vec3vf4> BBox3vf4; 213 typedef BBox<Vec3vf8> BBox3vf8; 214 typedef BBox<Vec3vf16> BBox3vf16; 215 216 /* calculate time segment itime and fractional time ftime */ getTimeSegment(float time,float numTimeSegments,float & ftime)217 __forceinline int getTimeSegment(float time, float numTimeSegments, float& ftime) 218 { 219 const float timeScaled = time * numTimeSegments; 220 const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f); 221 ftime = timeScaled - itimef; 222 return int(itimef); 223 } 224 getTimeSegment(float time,float start_time,float end_time,float numTimeSegments,float & ftime)225 __forceinline int getTimeSegment(float time, float start_time, float end_time, float numTimeSegments, float& ftime) 226 { 227 const float timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments; 228 const float itimef = clamp(floorf(timeScaled), 0.0f, numTimeSegments-1.0f); 229 ftime = timeScaled - itimef; 230 return int(itimef); 231 } 232 233 template<int N> getTimeSegment(const vfloat<N> & time,const vfloat<N> & numTimeSegments,vfloat<N> & ftime)234 __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime) 235 { 236 const vfloat<N> timeScaled = time * numTimeSegments; 237 const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f); 238 ftime = timeScaled - itimef; 239 return vint<N>(itimef); 240 } 241 242 template<int N> getTimeSegment(const vfloat<N> & time,const vfloat<N> & start_time,const vfloat<N> & end_time,const vfloat<N> & numTimeSegments,vfloat<N> & ftime)243 __forceinline vint<N> getTimeSegment(const vfloat<N>& time, const vfloat<N>& start_time, const vfloat<N>& end_time, const vfloat<N>& numTimeSegments, vfloat<N>& ftime) 244 { 245 const vfloat<N> timeScaled = (time-start_time)/(end_time-start_time) * numTimeSegments; 246 const vfloat<N> itimef = clamp(floor(timeScaled), vfloat<N>(zero), numTimeSegments-1.0f); 247 ftime = timeScaled - itimef; 248 return vint<N>(itimef); 249 } 250 251 /* calculate overlapping time segment range */ getTimeSegmentRange(const BBox1f & time_range,float numTimeSegments)252 __forceinline range<int> getTimeSegmentRange(const BBox1f& time_range, float numTimeSegments) 253 { 254 const float round_up = 1.0f+2.0f*float(ulp); // corrects inaccuracies to precisely match time step 255 const float round_down = 1.0f-2.0f*float(ulp); 256 const int itime_lower = (int)max(floor(round_up *time_range.lower*numTimeSegments), 0.0f); 257 const int itime_upper = (int)min(ceil (round_down*time_range.upper*numTimeSegments), numTimeSegments); 258 return make_range(itime_lower, itime_upper); 259 } 260 261 /* calculate overlapping time segment range */ getTimeSegmentRange(const BBox1f & range,BBox1f time_range,float numTimeSegments)262 __forceinline range<int> getTimeSegmentRange(const BBox1f& range, BBox1f time_range, float numTimeSegments) 263 { 264 const float lower = (range.lower-time_range.lower)/time_range.size(); 265 const float upper = (range.upper-time_range.lower)/time_range.size(); 266 return getTimeSegmentRange(BBox1f(lower,upper),numTimeSegments); 267 } 268 } 269