1 //-----------------------------------------------------------------------------
2 // Math
3 //-----------------------------------------------------------------------------
4 
5 #include "math.h"
6 #include "types.h"
7 #include <string.h>   // for memset and memcpy
8 
9 // Integer Math
10 // Find the closest power of 2 that is >= N.
NextPowerOfTwo(DWORD N)11 DWORD NextPowerOfTwo(DWORD N)
12 {
13   if (N<=0L   ) return 0L;
14   if (N<=1L   ) return 1L;
15   if (N<=2L   ) return 2L;
16   if (N<=4L   ) return 4L;
17   if (N<=8L   ) return 8L;
18   if (N<=16L      ) return 16L;
19   if (N<=32L      ) return 32L;
20   if (N<=64L      ) return 64L;
21   if (N<=128L     ) return 128L;
22   if (N<=256L     ) return 256L;
23   if (N<=512L     ) return 512L;
24   if (N<=1024L    ) return 1024L;
25   if (N<=2048L    ) return 2048L;
26   if (N<=4096L    ) return 4096L;
27   if (N<=8192L    ) return 8192L;
28   if (N<=16384L   ) return 16384L;
29   if (N<=32768L   ) return 32768L;
30   if (N<=65536L   ) return 65536L;
31   else        return 0;
32 }
33 
Log2(DWORD val)34 DWORD Log2(DWORD val)
35 {
36   DWORD answer = 0;
37   while (val>>=1) ++answer;
38   return answer;
39 }
40 
41 // Floating Point Math
42 #ifdef WIN32
43   #ifdef ASM
FastAbs(float a)44     __declspec(naked) float __fastcall FastAbs(float a)
45     {
46       __asm
47       {
48         fld   DWORD PTR [esp+4]
49         fabs
50         ret 4
51       }
52     }
53   #else
FastAbs(float a)54     float __fastcall FastAbs(float a)
55     {
56       return fabsf(a);
57     }
58   #endif
59 
60 #else
FastAbs(float a)61   float FastAbs(float a)
62   {
63     return fabsf(a);
64   }
65 #endif
66 
67 #if 0
68   double sintable[1024] = {
69   0.000000,0.001534,0.003068,0.004602,0.006136,0.007670,0.009204,0.010738,
70   0.012272,0.013805,0.015339,0.016873,0.018407,0.019940,0.021474,0.023008,
71   0.024541,0.026075,0.027608,0.029142,0.030675,0.032208,0.033741,0.035274,
72   0.036807,0.038340,0.039873,0.041406,0.042938,0.044471,0.046003,0.047535,
73   0.049068,0.050600,0.052132,0.053664,0.055195,0.056727,0.058258,0.059790,
74   0.061321,0.062852,0.064383,0.065913,0.067444,0.068974,0.070505,0.072035,
75   0.073565,0.075094,0.076624,0.078153,0.079682,0.081211,0.082740,0.084269,
76   0.085797,0.087326,0.088854,0.090381,0.091909,0.093436,0.094963,0.096490,
77   0.098017,0.099544,0.101070,0.102596,0.104122,0.105647,0.107172,0.108697,
78   0.110222,0.111747,0.113271,0.114795,0.116319,0.117842,0.119365,0.120888,
79   0.122411,0.123933,0.125455,0.126977,0.128498,0.130019,0.131540,0.133061,
80   0.134581,0.136101,0.137620,0.139139,0.140658,0.142177,0.143695,0.145213,
81   0.146730,0.148248,0.149765,0.151281,0.152797,0.154313,0.155828,0.157343,
82   0.158858,0.160372,0.161886,0.163400,0.164913,0.166426,0.167938,0.169450,
83   0.170962,0.172473,0.173984,0.175494,0.177004,0.178514,0.180023,0.181532,
84   0.183040,0.184548,0.186055,0.187562,0.189069,0.190575,0.192080,0.193586,
85   0.195090,0.196595,0.198098,0.199602,0.201105,0.202607,0.204109,0.205610,
86   0.207111,0.208612,0.210112,0.211611,0.213110,0.214609,0.216107,0.217604,
87   0.219101,0.220598,0.222094,0.223589,0.225084,0.226578,0.228072,0.229565,
88   0.231058,0.232550,0.234042,0.235533,0.237024,0.238514,0.240003,0.241492,
89   0.242980,0.244468,0.245955,0.247442,0.248928,0.250413,0.251898,0.253382,
90   0.254866,0.256349,0.257831,0.259313,0.260794,0.262275,0.263755,0.265234,
91   0.266713,0.268191,0.269668,0.271145,0.272621,0.274097,0.275572,0.277046,
92   0.278520,0.279993,0.281465,0.282937,0.284408,0.285878,0.287347,0.288816,
93   0.290285,0.291752,0.293219,0.294685,0.296151,0.297616,0.299080,0.300543,
94   0.302006,0.303468,0.304929,0.306390,0.307850,0.309309,0.310767,0.312225,
95   0.313682,0.315138,0.316593,0.318048,0.319502,0.320955,0.322408,0.323859,
96   0.325310,0.326760,0.328210,0.329658,0.331106,0.332553,0.334000,0.335445,
97   0.336890,0.338334,0.339777,0.341219,0.342661,0.344101,0.345541,0.346980,
98   0.348419,0.349856,0.351293,0.352729,0.354164,0.355598,0.357031,0.358463,
99   0.359895,0.361326,0.362756,0.364185,0.365613,0.367040,0.368467,0.369892,
100   0.371317,0.372741,0.374164,0.375586,0.377007,0.378428,0.379847,0.381266,
101   0.382683,0.384100,0.385516,0.386931,0.388345,0.389758,0.391170,0.392582,
102   0.393992,0.395401,0.396810,0.398218,0.399624,0.401030,0.402435,0.403838,
103   0.405241,0.406643,0.408044,0.409444,0.410843,0.412241,0.413638,0.415034,
104   0.416430,0.417824,0.419217,0.420609,0.422000,0.423390,0.424780,0.426168,
105   0.427555,0.428941,0.430326,0.431711,0.433094,0.434476,0.435857,0.437237,
106   0.438616,0.439994,0.441371,0.442747,0.444122,0.445496,0.446869,0.448241,
107   0.449611,0.450981,0.452350,0.453717,0.455084,0.456449,0.457813,0.459177,
108   0.460539,0.461900,0.463260,0.464619,0.465976,0.467333,0.468689,0.470043,
109   0.471397,0.472749,0.474100,0.475450,0.476799,0.478147,0.479494,0.480839,
110   0.482184,0.483527,0.484869,0.486210,0.487550,0.488889,0.490226,0.491563,
111   0.492898,0.494232,0.495565,0.496897,0.498228,0.499557,0.500885,0.502212,
112   0.503538,0.504863,0.506187,0.507509,0.508830,0.510150,0.511469,0.512786,
113   0.514103,0.515418,0.516732,0.518045,0.519356,0.520666,0.521975,0.523283,
114   0.524590,0.525895,0.527199,0.528502,0.529804,0.531104,0.532403,0.533701,
115   0.534998,0.536293,0.537587,0.538880,0.540171,0.541462,0.542751,0.544039,
116   0.545325,0.546610,0.547894,0.549177,0.550458,0.551738,0.553017,0.554294,
117   0.555570,0.556845,0.558119,0.559391,0.560662,0.561931,0.563199,0.564466,
118   0.565732,0.566996,0.568259,0.569521,0.570781,0.572040,0.573297,0.574553,
119   0.575808,0.577062,0.578314,0.579565,0.580814,0.582062,0.583309,0.584554,
120   0.585798,0.587040,0.588282,0.589521,0.590760,0.591997,0.593232,0.594466,
121   0.595699,0.596931,0.598161,0.599389,0.600616,0.601842,0.603067,0.604290,
122   0.605511,0.606731,0.607950,0.609167,0.610383,0.611597,0.612810,0.614022,
123   0.615232,0.616440,0.617647,0.618853,0.620057,0.621260,0.622461,0.623661,
124   0.624859,0.626056,0.627252,0.628446,0.629638,0.630829,0.632019,0.633207,
125   0.634393,0.635578,0.636762,0.637944,0.639124,0.640303,0.641481,0.642657,
126   0.643832,0.645005,0.646176,0.647346,0.648514,0.649681,0.650847,0.652011,
127   0.653173,0.654334,0.655493,0.656651,0.657807,0.658961,0.660114,0.661266,
128   0.662416,0.663564,0.664711,0.665856,0.667000,0.668142,0.669283,0.670422,
129   0.671559,0.672695,0.673829,0.674962,0.676093,0.677222,0.678350,0.679476,
130   0.680601,0.681724,0.682846,0.683965,0.685084,0.686200,0.687315,0.688429,
131   0.689541,0.690651,0.691759,0.692866,0.693971,0.695075,0.696177,0.697278,
132   0.698376,0.699473,0.700569,0.701663,0.702755,0.703845,0.704934,0.706021,
133   0.707107,0.708191,0.709273,0.710353,0.711432,0.712509,0.713585,0.714659,
134   0.715731,0.716801,0.717870,0.718937,0.720003,0.721066,0.722128,0.723188,
135   0.724247,0.725304,0.726359,0.727413,0.728464,0.729514,0.730563,0.731609,
136   0.732654,0.733697,0.734739,0.735779,0.736817,0.737853,0.738887,0.739920,
137   0.740951,0.741980,0.743008,0.744034,0.745058,0.746080,0.747101,0.748119,
138   0.749136,0.750152,0.751165,0.752177,0.753187,0.754195,0.755201,0.756206,
139   0.757209,0.758210,0.759209,0.760207,0.761202,0.762196,0.763188,0.764179,
140   0.765167,0.766154,0.767139,0.768122,0.769103,0.770083,0.771061,0.772036,
141   0.773010,0.773983,0.774953,0.775922,0.776888,0.777853,0.778817,0.779778,
142   0.780737,0.781695,0.782651,0.783605,0.784557,0.785507,0.786455,0.787402,
143   0.788346,0.789289,0.790230,0.791169,0.792107,0.793042,0.793975,0.794907,
144   0.795837,0.796765,0.797691,0.798615,0.799537,0.800458,0.801376,0.802293,
145   0.803208,0.804120,0.805031,0.805940,0.806848,0.807753,0.808656,0.809558,
146   0.810457,0.811355,0.812251,0.813144,0.814036,0.814926,0.815814,0.816701,
147   0.817585,0.818467,0.819348,0.820226,0.821103,0.821977,0.822850,0.823721,
148   0.824589,0.825456,0.826321,0.827184,0.828045,0.828904,0.829761,0.830616,
149   0.831470,0.832321,0.833170,0.834018,0.834863,0.835706,0.836548,0.837387,
150   0.838225,0.839060,0.839894,0.840725,0.841555,0.842383,0.843208,0.844032,
151   0.844854,0.845673,0.846491,0.847307,0.848120,0.848932,0.849742,0.850549,
152   0.851355,0.852159,0.852961,0.853760,0.854558,0.855354,0.856147,0.856939,
153   0.857729,0.858516,0.859302,0.860085,0.860867,0.861646,0.862424,0.863199,
154   0.863973,0.864744,0.865514,0.866281,0.867046,0.867809,0.868571,0.869330,
155   0.870087,0.870842,0.871595,0.872346,0.873095,0.873842,0.874587,0.875329,
156   0.876070,0.876809,0.877545,0.878280,0.879012,0.879743,0.880471,0.881197,
157   0.881921,0.882643,0.883363,0.884081,0.884797,0.885511,0.886223,0.886932,
158   0.887640,0.888345,0.889048,0.889750,0.890449,0.891146,0.891841,0.892534,
159   0.893224,0.893913,0.894599,0.895284,0.895966,0.896646,0.897325,0.898001,
160   0.898674,0.899346,0.900016,0.900683,0.901349,0.902012,0.902673,0.903332,
161   0.903989,0.904644,0.905297,0.905947,0.906596,0.907242,0.907886,0.908528,
162   0.909168,0.909806,0.910441,0.911075,0.911706,0.912335,0.912962,0.913587,
163   0.914210,0.914830,0.915449,0.916065,0.916679,0.917291,0.917901,0.918508,
164   0.919114,0.919717,0.920318,0.920917,0.921514,0.922109,0.922701,0.923291,
165   0.923880,0.924465,0.925049,0.925631,0.926210,0.926787,0.927363,0.927935,
166   0.928506,0.929075,0.929641,0.930205,0.930767,0.931327,0.931884,0.932440,
167   0.932993,0.933544,0.934093,0.934639,0.935184,0.935726,0.936266,0.936803,
168   0.937339,0.937872,0.938404,0.938932,0.939459,0.939984,0.940506,0.941026,
169   0.941544,0.942060,0.942573,0.943084,0.943593,0.944100,0.944605,0.945107,
170   0.945607,0.946105,0.946601,0.947094,0.947586,0.948075,0.948561,0.949046,
171   0.949528,0.950008,0.950486,0.950962,0.951435,0.951906,0.952375,0.952842,
172   0.953306,0.953768,0.954228,0.954686,0.955141,0.955594,0.956045,0.956494,
173   0.956940,0.957385,0.957826,0.958266,0.958703,0.959139,0.959572,0.960002,
174   0.960431,0.960857,0.961280,0.961702,0.962121,0.962538,0.962953,0.963366,
175   0.963776,0.964184,0.964590,0.964993,0.965394,0.965793,0.966190,0.966584,
176   0.966976,0.967366,0.967754,0.968139,0.968522,0.968903,0.969281,0.969657,
177   0.970031,0.970403,0.970772,0.971139,0.971504,0.971866,0.972226,0.972584,
178   0.972940,0.973293,0.973644,0.973993,0.974339,0.974684,0.975025,0.975365,
179   0.975702,0.976037,0.976370,0.976700,0.977028,0.977354,0.977677,0.977999,
180   0.978317,0.978634,0.978948,0.979260,0.979570,0.979877,0.980182,0.980485,
181   0.980785,0.981083,0.981379,0.981673,0.981964,0.982253,0.982539,0.982824,
182   0.983105,0.983385,0.983662,0.983937,0.984210,0.984480,0.984749,0.985014,
183   0.985278,0.985539,0.985798,0.986054,0.986308,0.986560,0.986809,0.987057,
184   0.987301,0.987544,0.987784,0.988022,0.988258,0.988491,0.988722,0.988950,
185   0.989177,0.989400,0.989622,0.989841,0.990058,0.990273,0.990485,0.990695,
186   0.990903,0.991108,0.991311,0.991511,0.991710,0.991906,0.992099,0.992291,
187   0.992480,0.992666,0.992850,0.993032,0.993212,0.993389,0.993564,0.993737,
188   0.993907,0.994075,0.994240,0.994404,0.994565,0.994723,0.994879,0.995033,
189   0.995185,0.995334,0.995481,0.995625,0.995767,0.995907,0.996045,0.996180,
190   0.996313,0.996443,0.996571,0.996697,0.996820,0.996941,0.997060,0.997176,
191   0.997290,0.997402,0.997511,0.997618,0.997723,0.997825,0.997925,0.998023,
192   0.998118,0.998211,0.998302,0.998390,0.998476,0.998559,0.998640,0.998719,
193   0.998795,0.998870,0.998941,0.999011,0.999078,0.999142,0.999205,0.999265,
194   0.999322,0.999378,0.999431,0.999481,0.999529,0.999575,0.999619,0.999660,
195   0.999699,0.999735,0.999769,0.999801,0.999831,0.999858,0.999882,0.999905,
196   0.999925,0.999942,0.999958,0.999971,0.999981,0.999989,0.999995,0.999999};
197 
198   float __fastcall FastSin(float a)
199   {
200     int index;
201     int quad;
202 
203     index = 1024 * a / (M_PI * 0.5);
204     quad = ( index >> 10 ) & 3;
205     index &= 1023;
206     switch ( quad )
207     {
208       case 0:
209         return sintable[index];
210       case 1:
211         return sintable[1023-index];
212       case 2:
213         return -sintable[index];
214       case 3:
215         return -sintable[1023-index];
216       default:
217         break;
218     }
219     return 0;
220   }
221 
222   float __fastcall FastCos(float a)
223   {
224     int index;
225     int quad;
226 
227     index = 1024 * a / (M_PI * 0.5);
228     quad = ( index >> 10 ) & 3;
229     index &= 1023;
230     switch ( quad )
231     {
232       case 3:
233         return sintable[index];
234       case 0:
235         return sintable[1023-index];
236       case 1:
237         return -sintable[index];
238       case 2:
239         return -sintable[1023-index];
240       default:
241         break;
242     }
243     return 0;
244   }
245 #else
246   #ifdef WIN32
247     #if ASM
FastSin(float a)248       __declspec(naked) float __fastcall FastSin(float a)
249       {
250         __asm
251         {
252           fld   DWORD PTR [esp+4]
253           fsin
254           ret 4
255         }
256       }
257 
FastCos(float a)258       __declspec(naked) float __fastcall FastCos(float a)
259       {
260         __asm
261         {
262           fld   DWORD PTR [esp+4]
263           fcos
264           ret 4
265         }
266       }
267     #else
FastSin(float a)268       float __fastcall FastSin(float a)
269       {
270         return sinf(a);
271       }
272 
FastCos(float a)273       float __fastcall FastCos(float a)
274       {
275         return cosf(a);
276       }
277     #endif
278   #else
FastSin(float a)279     float FastSin(float a)
280     {
281       return sin(a);
282     }
283 
FastCos(float a)284     float FastCos(float a)
285     {
286       return cos(a);
287     }
288   #endif
289 #endif
290 
291 // Reciprocal square root
rSqrt(float number)292 float rSqrt( float number )
293 {
294   long i;
295   float x2, y;
296   const float threehalfs = 1.5F;
297 
298   x2 = number * 0.5F;
299   y  = number;
300   i  = * ( long * ) &y;           // evil floating point bit level hacking
301   i  = 0x5f3759df - ( i >> 1 );
302   y  = * ( float * ) &i;
303   y  = y * ( threehalfs - ( x2 * y * y ) );   // 1st iteration
304 //  y  = y * ( threehalfs - ( x2 * y * y ) );   // 2nd iteration, this can be removed
305 
306   return y;
307 }
308 
309 #ifdef WIN32
310   #if ASM
311     #if 0
312       __declspec(naked) float __fastcall InverseSqrt(float a)
313       {
314         __asm
315         {
316           mov   eax, 0be6eb508h
317           mov   DWORD PTR [esp-12],03fc00000h       ;  1.5 on the stack
318           sub   eax, DWORD PTR [esp+4]            ; a
319           sub   DWORD PTR [esp+4], 800000h          ; a/2 a=Y0
320           shr   eax, 1                      ; firs approx in eax=R0
321           mov   DWORD PTR [esp-8], eax
322 
323           fld   DWORD PTR [esp-8]               ;r
324           fmul  st, st                          ;r*r
325           fld   DWORD PTR [esp-8]               ;r
326           fxch  st(1)
327           fmul  DWORD PTR [esp+4];a ;r*r*y0
328           fld   DWORD PTR [esp-12];load 1.5
329           fld   st(0)
330           fsub  st,st(2)                  ;r1 = 1.5 - y1
331                                     ;x1 = st(3)
332                                     ;y1 = st(2)
333                                     ;1.5 = st(1)
334                                     ;r1 = st(0)
335 
336           fld   st(1)
337           fxch  st(1)
338           fmul  st(3),st                  ; y2=y1*r1*...
339           fmul  st(3),st                  ; y2=y1*r1*r1
340           fmulp st(4),st                        ; x2=x1*r1
341           fsub  st,st(2)                        ; r2=1.5-y2
342                                     ;x2=st(3)
343                                     ;y2=st(2)
344                                     ;1.5=st(1)
345                                     ;r2 = st(0)
346 
347           fmul  st(2),st                  ;y3=y2*r2*...
348           fmul  st(3),st                  ;x3=x2*r2
349           fmulp st(2),st                  ;y3=y2*r2*r2
350           fxch  st(1)
351           fsubp st(1),st                  ;r3= 1.5 - y3
352                                     ;x3 = st(1)
353                                     ;r3 = st(0)
354           fmulp st(1), st
355           ret 4
356         }
357       }
358     #else
InverseSqrt(float a)359       float __fastcall InverseSqrt(float a)
360       {
361         float ahalf = 0.5f * a;
362         int i = *(int *) &a;        // get bits for floating value
363         i = 0x5f3759df - (i >> 1);      // gives initial guess y0
364         a = *(float *) &i;          // convert bits back to float
365         return a * (1.5f - ahalf * a * a);  // Newton step, repeating increases accuracy
366       }
367     #endif
368   #else
InverseSqrt(float a)369     float __fastcall InverseSqrt(float a)
370     {
371       return 1.f/sqrtf(a);
372     }
373   #endif
374 #else
InverseSqrt(float a)375   float InverseSqrt(float a)
376   {
377     return 1/sqrt(a);
378   }
379 #endif
380 
381 // Aproximations:
382 #ifdef WIN32
FastSqrt(float a)383   float __fastcall FastSqrt(float a)
384   {
385     return a?a*InverseSqrt(a):0;
386   }
387 #else
FastSqrt(float a)388   float FastSqrt(float a)
389   {
390     return sqrt(a);
391   }
392 #endif
393 
394 // optimized dot product
395 #if 0
396 #ifndef DotProduct
397   #if ASM
398     #pragma warning (disable: 4035)
399     //__declspec( naked )
400     float __cdecl DotProduct(const vec3_t v1, const vec3_t v2)
401     {
402       FLOAT dotret;
403       _asm
404       {
405         mov     ecx, v1
406         mov     eax, v2
407 
408         ;optimized dot product; 15 cycles
409         fld dword ptr   [eax+0]     ;starts & ends on cycle 0
410         fmul dword ptr  [ecx+0]     ;starts on cycle 1
411         fld dword ptr   [eax+4]     ;starts & ends on cycle 2
412         fmul dword ptr  [ecx+4]     ;starts on cycle 3
413         fld dword ptr   [eax+8]     ;starts & ends on cycle 4
414         fmul dword ptr  [ecx+8]     ;starts on cycle 5
415         fxch            st(1)       ;no cost
416         faddp           st(2),st(0) ;starts on cycle 6, stalls for cycles 7-8
417         faddp           st(1),st(0) ;starts on cycle 9, stalls for cycles 10-12
418         fstp dword ptr  [dotret]    ;starts on cycle 13, ends on cycle 14
419 
420     //    ret
421       }
422       return dotret;
423     }
424     #pragma warning( default: 4035 )
425   #else
426     float __cdecl DotProduct(const vec3_t v1, const vec3_t v2)
427     {
428       return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
429     }
430   #endif
431 #endif
432 #endif
433 
434 // Matrix initialisation
InitMat3x3(float * A)435 void InitMat3x3(float *A)
436 {
437   memset(A, 0, 9*sizeof(float));
438   A[0] = A[4] = A[8] = 1.f;
439 }
440 
InitMat4x4(float * A)441 void InitMat4x4(float *A)
442 {
443   memset(A, 0, 16*sizeof(float));
444   A[0] = A[5] = A[10] = A[15] = 1.f;
445 }
446 
447 // Matrix multiplication
MultMat3x3(float * A,float * B,float * C)448 void MultMat3x3(float *A, float *B, float *C)
449 {
450   float t[9];
451 
452   t[0] = A[0]*B[0] + A[1]*B[3] + A[2]*B[6];
453   t[1] = A[0]*B[1] + A[1]*B[4] + A[2]*B[7];
454   t[2] = A[0]*B[2] + A[1]*B[5] + A[2]*B[8];
455   t[3] = A[3]*B[0] + A[4]*B[3] + A[5]*B[6];
456   t[4] = A[3]*B[1] + A[4]*B[4] + A[5]*B[7];
457   t[5] = A[3]*B[2] + A[4]*B[5] + A[5]*B[8];
458   t[6] = A[6]*B[0] + A[7]*B[3] + A[8]*B[6];
459   t[7] = A[6]*B[1] + A[7]*B[4] + A[8]*B[7];
460   t[8] = A[6]*B[2] + A[7]*B[5] + A[8]*B[8];
461 
462   memcpy(C, t, 9*sizeof(float));
463 }
464 
465 // Matrix multiplication
MultMat4x4(float * A,float * B,float * C)466 void MultMat4x4(float *A, float *B, float *C)
467 {
468   float t[16];
469 
470   t[0]  = A[0]*B[0]  + A[1]*B[4]  + A[2]*B[8]   + A[3]*B[12];
471   t[1]  = A[0]*B[1]  + A[1]*B[5]  + A[2]*B[9]   + A[3]*B[13];
472   t[2]  = A[0]*B[2]  + A[1]*B[6]  + A[2]*B[10]  + A[3]*B[14];
473   t[3]  = A[0]*B[3]  + A[1]*B[7]  + A[2]*B[11]  + A[3]*B[15];
474   t[4]  = A[4]*B[0]  + A[5]*B[4]  + A[6]*B[8]   + A[7]*B[12];
475   t[5]  = A[4]*B[1]  + A[5]*B[5]  + A[6]*B[9]   + A[7]*B[13];
476   t[6]  = A[4]*B[2]  + A[5]*B[6]  + A[6]*B[10]  + A[7]*B[14];
477   t[7]  = A[4]*B[3]  + A[5]*B[7]  + A[6]*B[11]  + A[7]*B[15];
478   t[8]  = A[8]*B[0]  + A[9]*B[4]  + A[10]*B[8]  + A[11]*B[12];
479   t[9]  = A[8]*B[1]  + A[9]*B[5]  + A[10]*B[9]  + A[11]*B[13];
480   t[10] = A[8]*B[2]  + A[9]*B[6]  + A[10]*B[10] + A[11]*B[14];
481   t[11] = A[8]*B[3]  + A[9]*B[7]  + A[10]*B[11] + A[11]*B[15];
482   t[12] = A[12]*B[0] + A[13]*B[4] + A[14]*B[8]  + A[15]*B[12];
483   t[13] = A[12]*B[1] + A[13]*B[5] + A[14]*B[9]  + A[15]*B[13];
484   t[14] = A[12]*B[2] + A[13]*B[6] + A[14]*B[10] + A[15]*B[14];
485   t[15] = A[12]*B[3] + A[13]*B[7] + A[14]*B[11] + A[15]*B[15];
486 
487   memcpy(C, t, 16*sizeof(float));
488 }
489 
490 // Vector and matrix multiplication
MultVect3x3(float * A,float * v,float * dest)491 void MultVect3x3(float *A, float *v, float *dest)
492 {
493   #if 0
494     dest[0] = A[0]*v[0] + A[3]*v[1] + A[6]*v[2];
495     dest[1] = A[1]*v[0] + A[4]*v[1] + A[6]*v[2];
496     dest[2] = A[2]*v[0] + A[5]*v[1] + A[7]*v[2];
497   #else
498     dest[0] = DotProduct(&A[0], v);
499     dest[1] = DotProduct(&A[3], v);
500     dest[2] = DotProduct(&A[6], v);
501   #endif
502 }
503 
MultVect4x4(float * A,float * v,float * dest)504 void MultVect4x4(float *A, float *v, float *dest)
505 {
506   vec3_t t;
507 
508   #if 0
509     t[0] = A[0]*v[0] + A[4]*v[1] + A[8]*v[2]  + A[12];
510     t[1] = A[1]*v[0] + A[5]*v[1] + A[9]*v[2]  + A[13];
511     t[2] = A[2]*v[0] + A[6]*v[1] + A[10]*v[2] + A[14];
512   #else
513     t[0] = A[0]*v[0] + A[1]*v[1] + A[2]*v[2]  + A[12];
514     t[1] = A[4]*v[0] + A[5]*v[1] + A[6]*v[2]  + A[13];
515     t[2] = A[8]*v[0] + A[9]*v[1] + A[10]*v[2] + A[14];
516   #endif
517 
518   // Recopie le vecteur obtenu dans le vecteur de destination
519   // pour la m�me raison que dans le produit matriciel
520   VectorCopy(t, dest);
521 }
522 
523 // Fast normalization of 3 component vector (does not test if the vector has 0 length)
FastNormVect3(float * v)524 void FastNormVect3(float *v)
525 {
526   float ilength;
527 
528   ilength = rSqrt(DotProduct(v, v));
529 
530   v[0] *= ilength;
531   v[1] *= ilength;
532   v[2] *= ilength;
533 }
534 
535 // Fast normalization of 2 component vector (does not test if the vector has 0 length)
FastNormVect2(float * v)536 void FastNormVect2(float *v)
537 {
538   float ilength;
539 
540   ilength = rSqrt(v[0]*v[0] + v[1]*v[1]);
541 
542   v[0] *= ilength;
543   v[1] *= ilength;
544 }
545 
546 // Slow normalization that returns the norm
VectorNormalize(vec3_t v)547 vec_t VectorNormalize(vec3_t v)
548 {
549   float length, ilength;
550 
551   length = v[0]*v[0] + v[1]*v[1] + v[2]*v[2];
552 
553   if (length)
554   {
555     length = sqrtf(length);   // FIXME
556     ilength = 1/length;
557     v[0] *= ilength;
558     v[1] *= ilength;
559     v[2] *= ilength;
560   }
561 
562   return length;
563 }
564 
VectorNormalize2(vec3_t v,vec3_t out)565 vec_t VectorNormalize2(vec3_t v, vec3_t out)
566 {
567   float length, ilength;
568 
569   length = v[0]*v[0] + v[1]*v[1] + v[2]*v[2];
570 
571   if (length)
572   {
573     length = sqrtf(length);   // FIXME
574     ilength = 1/length;
575     out[0] = v[0]*ilength;
576     out[1] = v[1]*ilength;
577     out[2] = v[2]*ilength;
578   }
579   else
580   {
581     VectorClear (out);
582   }
583 
584   return length;
585 }
586 
VectorNormalizeFast(vec3_t v)587 void VectorNormalizeFast(vec3_t v)
588 {
589   float ilength = InverseSqrt(DotProduct(v,v));
590 
591   v[0] *= ilength;
592   v[1] *= ilength;
593   v[2] *= ilength;
594 }
595 
ColorNormalize(vec3_t in,vec3_t out)596 float ColorNormalize(vec3_t in, vec3_t out)
597 {
598   float f = max (max (in[0], in[1]), in[2]);
599 
600   if ( f > 1.0 )
601   {
602     f = 1.f / f;
603     out[0] = in[0] * f;
604     out[1] = in[1] * f;
605     out[2] = in[2] * f;
606   }
607   else
608   {
609     out[0] = in[0];
610     out[1] = in[1];
611     out[2] = in[2];
612   }
613 
614   return f;
615 }
616 
617 // Cross Product
618 #ifndef CrossProduct
CrossProduct(const vec3_t v1,const vec3_t v2,vec3_t cross)619 void CrossProduct(const vec3_t v1, const vec3_t v2, vec3_t cross)
620 {
621   cross[0] = v1[1]*v2[2] - v1[2]*v2[1];
622   cross[1] = v1[2]*v2[0] - v1[0]*v2[2];
623   cross[2] = v1[0]*v2[1] - v1[1]*v2[0];
624 }
625 #endif
626 
627 // Three control point Bezier interpolation
628 // mu ranges from 0 to 1, start to end of the curve
BezierCurve3(vec3_t p1,vec3_t p2,vec3_t p3,float mu,vec3_t dest)629 void BezierCurve3(vec3_t p1, vec3_t p2, vec3_t p3, float mu, vec3_t dest)
630 {
631   float mum1, mum12, mu2;
632 
633   mu2 = mu * mu;
634   mum1 = 1 - mu;
635   mum12 = mum1 * mum1;
636   dest[0] = p1[0] * mum12 + 2 * p2[0] * mum1 * mu + p3[0] * mu2;
637   dest[1] = p1[1] * mum12 + 2 * p2[1] * mum1 * mu + p3[1] * mu2;
638   dest[2] = p1[2] * mum12 + 2 * p2[2] * mum1 * mu + p3[2] * mu2;
639 }
640 
641 // Four control point Bezier interpolation
642 // mu ranges from 0 to 1, start to end of curve
BezierCurve4(vec3_t p1,vec3_t p2,vec3_t p3,vec3_t p4,float mu,vec3_t dest)643 void BezierCurve4(vec3_t p1, vec3_t p2, vec3_t p3, vec3_t p4, float mu, vec3_t dest)
644 {
645   float mum1, mum13, mu3;
646 
647   mum1 = 1 - mu;
648   mum13 = mum1 * mum1 * mum1;
649   mu3 = mu * mu * mu;
650 
651   dest[0] = mum13*p1[0] + 3*mu*mum1*mum1*p2[0] + 3*mu*mu*mum1*p3[0] + mu3*p4[0];
652   dest[1] = mum13*p1[1] + 3*mu*mum1*mum1*p2[1] + 3*mu*mu*mum1*p3[1] + mu3*p4[1];
653   dest[2] = mum13*p1[2] + 3*mu*mum1*mum1*p2[2] + 3*mu*mu*mum1*p3[2] + mu3*p4[2];
654 }
655 
656 // General Bezier curve
657 // Number of control points is n
658 // 0 <= mu < 1    IMPORTANT, the last point is not computed
BezierCurveN(vec3_t * p,int n,float mu,vec3_t dest)659 void BezierCurveN(vec3_t *p, int n, float mu, vec3_t dest)
660 {
661   int kn, nn, nkn;
662   float blend, muk, munk;
663   VectorClear(dest);
664 
665   muk = 1;
666   munk = powf(1-mu, (float) (n-1));
667 
668   for (int k = 0; k < n; ++k)
669   {
670     nn = n-1;
671     kn = k;
672     nkn = nn - k;
673     blend = muk * munk;
674     muk *= mu;
675     munk /= (1-mu);
676     while (nn >= 1)
677     {
678       blend *= nn; --nn;
679       if (kn > 1) { blend /= ((float) kn); --kn; }
680       if (nkn > 1) { blend /= ((float) nkn); --nkn; }
681     }
682     dest[0] += (p[k][0] * blend);
683     dest[1] += (p[k][1] * blend);
684     dest[2] += (p[k][2] * blend);
685   }
686 }
687 
688 //-----------------------------------------------------------------------------
689 // Bounding box related functions
690 //-----------------------------------------------------------------------------
691 
692 // Calculate the bounding box of a mesh surface
CalcFaceBounds(Surface * surf)693 void CalcFaceBounds(Surface *surf)
694 {
695   float minx, miny, minz, maxx, maxy, maxz;
696 
697   vertex_t * v = surf->firstvert[0];
698 
699   minx = maxx = v->v_point[0];
700   miny = maxy = v->v_point[1];
701   minz = maxz = v->v_point[2];
702   ++v;
703 
704   for (int i = 1; i < surf->numverts[0]; ++i, ++v)
705   {
706     minx = min(minx, v->v_point[0]);
707     miny = min(miny, v->v_point[1]);
708     minz = min(minz, v->v_point[2]);
709     maxx = max(maxx, v->v_point[0]);
710     maxy = max(maxy, v->v_point[1]);
711     maxz = max(maxz, v->v_point[2]);
712   }
713   surf->bbox[0] = minx;
714   surf->bbox[1] = miny;
715   surf->bbox[2] = minz;
716   surf->bbox[3] = maxx;
717   surf->bbox[4] = maxy;
718   surf->bbox[5] = maxz;
719 }
720 
ClearBounds(bboxf_t bbox)721 void ClearBounds(bboxf_t bbox)
722 {
723   bbox[0] = bbox[1] = bbox[2] = 99999;
724   bbox[3] = bbox[4] = bbox[5] = -99999;
725 }
726 
ClearBounds(vec3_t mins,vec3_t maxs)727 void ClearBounds(vec3_t mins, vec3_t maxs)
728 {
729   mins[0] = mins[1] = mins[2] = 99999;
730   maxs[0] = maxs[1] = maxs[2] = -99999;
731 }
732 
BoundsIntersect(bboxf_t bbox,vec3_t mins2,vec3_t maxs2)733 bool BoundsIntersect(bboxf_t bbox, vec3_t mins2, vec3_t maxs2)
734 {
735   return (bbox[0] <= maxs2[0] && bbox[1] <= maxs2[1] && bbox[2] <= maxs2[2] &&
736       bbox[3] >= mins2[0] && bbox[4] >= mins2[1] && bbox[5] >= mins2[2]);
737 }
738 
BoundsAndSphereIntersect(bboxf_t bbox,vec3_t centre,float radius)739 bool BoundsAndSphereIntersect(bboxf_t bbox, vec3_t centre, float radius)
740 {
741   return (bbox[0] <= centre[0]+radius && bbox[1] <= centre[1]+radius && bbox[2] <= centre[2]+radius &&
742       bbox[3] >= centre[0]-radius && bbox[4] >= centre[1]-radius && bbox[5] >= centre[2]-radius);
743 }
744 
BoundsIntersect(vec3_t mins1,vec3_t maxs1,vec3_t mins2,vec3_t maxs2)745 bool BoundsIntersect(vec3_t mins1, vec3_t maxs1, vec3_t mins2, vec3_t maxs2)
746 {
747   return (mins1[0] <= maxs2[0] && mins1[1] <= maxs2[1] && mins1[2] <= maxs2[2] &&
748       maxs1[0] >= mins2[0] && maxs1[1] >= mins2[1] && maxs1[2] >= mins2[2]);
749 }
750 
BoundsAndSphereIntersect(vec3_t mins,vec3_t maxs,vec3_t centre,float radius)751 bool BoundsAndSphereIntersect(vec3_t mins, vec3_t maxs, vec3_t centre, float radius)
752 {
753   return (mins[0] <= centre[0]+radius && mins[1] <= centre[1]+radius && mins[2] <= centre[2]+radius &&
754       maxs[0] >= centre[0]-radius && maxs[1] >= centre[1]-radius && maxs[2] >= centre[2]-radius);
755 }
756 
GetIntersection(bboxf_t bbox1,bboxf_t bbox2,bboxf_t dest)757 bool GetIntersection(bboxf_t bbox1, bboxf_t bbox2, bboxf_t dest)
758 {
759   dest[0] = max(bbox1[0], bbox2[0]);
760   dest[1] = max(bbox1[1], bbox2[1]);
761   dest[2] = max(bbox1[2], bbox2[2]);
762   dest[3] = min(bbox1[3], bbox2[3]);
763   dest[4] = min(bbox1[4], bbox2[4]);
764   dest[5] = min(bbox1[5], bbox2[5]);
765 
766   // if volume exists, we have a collision between bounding boxes
767   return (dest[0] <= dest[3] &&
768       dest[1] <= dest[4] &&
769       dest[2] <= dest[5]);
770 }
771 
AddPointToBounds(vec3_t v,bboxf_t bbox)772 void AddPointToBounds(vec3_t v, bboxf_t bbox)
773 {
774   if (v[0] < bbox[0]) bbox[0] = v[0];
775   if (v[0] > bbox[3]) bbox[3] = v[0];
776   if (v[1] < bbox[1]) bbox[1] = v[1];
777   if (v[1] > bbox[4]) bbox[4] = v[1];
778   if (v[2] < bbox[2]) bbox[2] = v[2];
779   if (v[2] > bbox[5]) bbox[5] = v[2];
780 }
781 
AddPointToBounds(vec3_t v,vec3_t mins,vec3_t maxs)782 void AddPointToBounds(vec3_t v, vec3_t mins, vec3_t maxs)
783 {
784   #if 1
785     if (v[0] < mins[0]) mins[0] = v[0];
786     if (v[0] > maxs[0]) maxs[0] = v[0];
787     if (v[1] < mins[1]) mins[1] = v[1];
788     if (v[1] > maxs[1]) maxs[1] = v[1];
789     if (v[2] < mins[2]) mins[2] = v[2];
790     if (v[2] > maxs[2]) maxs[2] = v[2];
791   #else
792     vec_t val;
793     val = v[0];
794     if (val < mins[0]) mins[0] = val;
795     if (val > maxs[0]) maxs[0] = val;
796     val = v[1];
797     if (val < mins[1]) mins[1] = val;
798     if (val > maxs[1]) maxs[1] = val;
799     val = v[2];
800     if (val < mins[2]) mins[2] = val;
801     if (val > maxs[2]) maxs[2] = val;
802   #endif
803 }
804 
PointInBounds(vec3_t point,bboxf_t bbox)805 bool PointInBounds(vec3_t point, bboxf_t bbox)
806 {
807   return (point[0] >= bbox[0] && point[0] <= bbox[3] &&
808           point[1] >= bbox[1] && point[1] <= bbox[4] &&
809           point[2] >= bbox[2] && point[2] <= bbox[5]);
810 }
811 
812 //-----------------------------------------------------------------------------
813 // Plane operations:
814 //-----------------------------------------------------------------------------
815 
816 // Generate a plane given 3 points
817 //
818 // Returns false if the triangle is degenrate.
819 // The normal will point out of the clock for clockwise ordered points
PlaneFromPoints(vec4_t plane,const vec3_t a,const vec3_t b,const vec3_t c)820 bool PlaneFromPoints(vec4_t plane, const vec3_t a, const vec3_t b, const vec3_t c)
821 {
822   vec3_t d1, d2;
823 
824   VectorSub(b, a, d1);
825   VectorSub(c, a, d2);
826   CrossProduct(d2, d1, plane);
827   if (VectorNormalize(plane) == 0) return false;
828   plane[3] = DotProduct(a, plane);
829   return true;
830 }
831 
PlaneFromPoints(vertex_t verts[3],cplane_t * plane)832 void PlaneFromPoints(vertex_t verts[3], cplane_t *plane)
833 {
834   vec3_t v1, v2;
835 
836   VectorSub(verts[1].v_point, verts[0].v_point, v1);
837   VectorSub(verts[2].v_point, verts[0].v_point, v2);
838   CrossProduct(v2, v1, plane->normal);
839   VectorNormalize(plane->normal);
840   plane->dist = DotProduct(verts[0].v_point, plane->normal);
841 }
842 
PlaneFromPoints(vec3_t verts[3],cplane_t * plane)843 void PlaneFromPoints(vec3_t verts[3], cplane_t *plane)
844 {
845   vec3_t v1, v2;
846 
847   VectorSub(verts[1], verts[0], v1);
848   VectorSub(verts[2], verts[0], v2);
849   CrossProduct(v2, v1, plane->normal);
850   VectorNormalize(plane->normal);
851   plane->dist = DotProduct(verts[0], plane->normal);
852 }
853 
CategorizePlane(cplane_t * plane)854 void CategorizePlane(cplane_t *plane)
855 {
856   plane->signbits = 0;
857   plane->type = PLANE_ANYZ;
858   if (plane->normal[0] < 0)     plane->signbits |= 1;
859   if (plane->normal[0] == 1.0f) plane->type = 0;
860   if (plane->normal[1] < 0)     plane->signbits |= 1<<1;
861   if (plane->normal[1] == 1.0f) plane->type = 1;
862   if (plane->normal[2] < 0)     plane->signbits |= 1<<2;
863   if (plane->normal[2] == 1.0f) plane->type = 2;
864 }
865 
PointDistance(vec3_t v,cplane_t * p)866 float PointDistance(vec3_t v, cplane_t* p)
867 {
868   if (p->type < PLANE_NON_AXIAL) return v[p->type]*p->normal[p->type]-p->dist;
869   else return DotProduct(v, p->normal)-p->dist;
870 }
871 
872 // returns the side of the plane in which the box is
873 #if !defined(WIN32) || !defined(ASM)
BoxOnPlaneSide(vec3_t emins,vec3_t emaxs,struct cplane_s * p)874   int BoxOnPlaneSide(vec3_t emins, vec3_t emaxs, struct cplane_s *p)
875   {
876     float dist1, dist2;
877     int   sides;
878 
879     // fast axial cases
880     if (p->type < 3)
881     {
882       if (p->dist <= emins[p->type]) return 1;
883       if (p->dist >= emaxs[p->type]) return 2;
884       return 3;
885     }
886 
887     // general case
888     switch (p->signbits)
889     {
890       case 0:
891         // 000 -> PPP
892         dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
893         dist2 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
894         break;
895       case 1:
896         // 001 -> PPN
897         dist1 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
898         dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
899         break;
900       case 2:
901         // 010 -> PNP
902         dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
903         dist2 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
904         break;
905       case 3:
906         // 011 -> PNN
907         dist1 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
908         dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
909         break;
910       case 4:
911         // 100 -> NPP
912         dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
913         dist2 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
914         break;
915       case 5:
916         // 101 -> NPN
917         dist1 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
918         dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
919         break;
920       case 6:
921         // 110 -> NNP
922         dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
923         dist2 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
924         break;
925       case 7:
926         // 111 -> NNN
927         dist1 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
928         dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
929         break;
930       default:
931         dist1 = dist2 = 0;    // shut up compiler
932         break;
933     }
934 
935     sides = 0;
936     if (dist1 >= p->dist) sides = 1;
937     if (dist2 < p->dist) sides |= 2;
938 
939     return sides;
940   }
941 #else
942   #pragma warning(disable: 4035)
BoxOnPlaneSide(vec3_t emins,vec3_t emaxs,struct cplane_s * p)943   __declspec(naked) int BoxOnPlaneSide(vec3_t emins, vec3_t emaxs, struct cplane_s *p)
944   {
945     static int bops_initialized;
946     static int Ljmptab[8];
947 
948     __asm
949     {
950       push ebx
951 
952       cmp bops_initialized, 1
953       je  initialized
954       mov bops_initialized, 1
955 
956       mov Ljmptab[0*4], offset Lcase0
957       mov Ljmptab[1*4], offset Lcase1
958       mov Ljmptab[2*4], offset Lcase2
959       mov Ljmptab[3*4], offset Lcase3
960       mov Ljmptab[4*4], offset Lcase4
961       mov Ljmptab[5*4], offset Lcase5
962       mov Ljmptab[6*4], offset Lcase6
963       mov Ljmptab[7*4], offset Lcase7
964 
965   initialized:
966 
967       mov edx,dword ptr[4+12+esp]
968       mov ecx,dword ptr[4+4+esp]
969       xor eax,eax
970       mov ebx,dword ptr[4+8+esp]
971       mov al,byte ptr[17+edx]
972       cmp al,8
973       jge Lerror
974       fld dword ptr[0+edx]
975       fld st(0)
976       jmp dword ptr[Ljmptab+eax*4]
977   Lcase0:
978       fmul dword ptr[ebx]
979       fld dword ptr[0+4+edx]
980       fxch st(2)
981       fmul dword ptr[ecx]
982       fxch st(2)
983       fld st(0)
984       fmul dword ptr[4+ebx]
985       fld dword ptr[0+8+edx]
986       fxch st(2)
987       fmul dword ptr[4+ecx]
988       fxch st(2)
989       fld st(0)
990       fmul dword ptr[8+ebx]
991       fxch st(5)
992       faddp st(3),st(0)
993       fmul dword ptr[8+ecx]
994       fxch st(1)
995       faddp st(3),st(0)
996       fxch st(3)
997       faddp st(2),st(0)
998       jmp LSetSides
999   Lcase1:
1000       fmul dword ptr[ecx]
1001       fld dword ptr[0+4+edx]
1002       fxch st(2)
1003       fmul dword ptr[ebx]
1004       fxch st(2)
1005       fld st(0)
1006       fmul dword ptr[4+ebx]
1007       fld dword ptr[0+8+edx]
1008       fxch st(2)
1009       fmul dword ptr[4+ecx]
1010       fxch st(2)
1011       fld st(0)
1012       fmul dword ptr[8+ebx]
1013       fxch st(5)
1014       faddp st(3),st(0)
1015       fmul dword ptr[8+ecx]
1016       fxch st(1)
1017       faddp st(3),st(0)
1018       fxch st(3)
1019       faddp st(2),st(0)
1020       jmp LSetSides
1021   Lcase2:
1022       fmul dword ptr[ebx]
1023       fld dword ptr[0+4+edx]
1024       fxch st(2)
1025       fmul dword ptr[ecx]
1026       fxch st(2)
1027       fld st(0)
1028       fmul dword ptr[4+ecx]
1029       fld dword ptr[0+8+edx]
1030       fxch st(2)
1031       fmul dword ptr[4+ebx]
1032       fxch st(2)
1033       fld st(0)
1034       fmul dword ptr[8+ebx]
1035       fxch st(5)
1036       faddp st(3),st(0)
1037       fmul dword ptr[8+ecx]
1038       fxch st(1)
1039       faddp st(3),st(0)
1040       fxch st(3)
1041       faddp st(2),st(0)
1042       jmp LSetSides
1043   Lcase3:
1044       fmul dword ptr[ecx]
1045       fld dword ptr[0+4+edx]
1046       fxch st(2)
1047       fmul dword ptr[ebx]
1048       fxch st(2)
1049       fld st(0)
1050       fmul dword ptr[4+ecx]
1051       fld dword ptr[0+8+edx]
1052       fxch st(2)
1053       fmul dword ptr[4+ebx]
1054       fxch st(2)
1055       fld st(0)
1056       fmul dword ptr[8+ebx]
1057       fxch st(5)
1058       faddp st(3),st(0)
1059       fmul dword ptr[8+ecx]
1060       fxch st(1)
1061       faddp st(3),st(0)
1062       fxch st(3)
1063       faddp st(2),st(0)
1064       jmp LSetSides
1065   Lcase4:
1066       fmul dword ptr[ebx]
1067       fld dword ptr[0+4+edx]
1068       fxch st(2)
1069       fmul dword ptr[ecx]
1070       fxch st(2)
1071       fld st(0)
1072       fmul dword ptr[4+ebx]
1073       fld dword ptr[0+8+edx]
1074       fxch st(2)
1075       fmul dword ptr[4+ecx]
1076       fxch st(2)
1077       fld st(0)
1078       fmul dword ptr[8+ecx]
1079       fxch st(5)
1080       faddp st(3),st(0)
1081       fmul dword ptr[8+ebx]
1082       fxch st(1)
1083       faddp st(3),st(0)
1084       fxch st(3)
1085       faddp st(2),st(0)
1086       jmp LSetSides
1087   Lcase5:
1088       fmul dword ptr[ecx]
1089       fld dword ptr[0+4+edx]
1090       fxch st(2)
1091       fmul dword ptr[ebx]
1092       fxch st(2)
1093       fld st(0)
1094       fmul dword ptr[4+ebx]
1095       fld dword ptr[0+8+edx]
1096       fxch st(2)
1097       fmul dword ptr[4+ecx]
1098       fxch st(2)
1099       fld st(0)
1100       fmul dword ptr[8+ecx]
1101       fxch st(5)
1102       faddp st(3),st(0)
1103       fmul dword ptr[8+ebx]
1104       fxch st(1)
1105       faddp st(3),st(0)
1106       fxch st(3)
1107       faddp st(2),st(0)
1108       jmp LSetSides
1109   Lcase6:
1110       fmul dword ptr[ebx]
1111       fld dword ptr[0+4+edx]
1112       fxch st(2)
1113       fmul dword ptr[ecx]
1114       fxch st(2)
1115       fld st(0)
1116       fmul dword ptr[4+ecx]
1117       fld dword ptr[0+8+edx]
1118       fxch st(2)
1119       fmul dword ptr[4+ebx]
1120       fxch st(2)
1121       fld st(0)
1122       fmul dword ptr[8+ecx]
1123       fxch st(5)
1124       faddp st(3),st(0)
1125       fmul dword ptr[8+ebx]
1126       fxch st(1)
1127       faddp st(3),st(0)
1128       fxch st(3)
1129       faddp st(2),st(0)
1130       jmp LSetSides
1131   Lcase7:
1132       fmul dword ptr[ecx]
1133       fld dword ptr[0+4+edx]
1134       fxch st(2)
1135       fmul dword ptr[ebx]
1136       fxch st(2)
1137       fld st(0)
1138       fmul dword ptr[4+ecx]
1139       fld dword ptr[0+8+edx]
1140       fxch st(2)
1141       fmul dword ptr[4+ebx]
1142       fxch st(2)
1143       fld st(0)
1144       fmul dword ptr[8+ecx]
1145       fxch st(5)
1146       faddp st(3),st(0)
1147       fmul dword ptr[8+ebx]
1148       fxch st(1)
1149       faddp st(3),st(0)
1150       fxch st(3)
1151       faddp st(2),st(0)
1152   LSetSides:
1153       faddp st(2),st(0)
1154       fcomp dword ptr[12+edx]
1155       xor ecx,ecx
1156       fnstsw ax
1157       fcomp dword ptr[12+edx]
1158       and ah,1
1159       xor ah,1
1160       add cl,ah
1161       fnstsw ax
1162       and ah,1
1163       add ah,ah
1164       add cl,ah
1165       pop ebx
1166       mov eax,ecx
1167       ret
1168   Lerror:
1169       int 3
1170     }
1171   }
1172   #pragma warning(default: 4035)
1173 #endif
1174