cake_src/cake/math.cpp

//-----------------------------------------------------------------------------
// Math
//-----------------------------------------------------------------------------

#include "math.h"
#include "types.h"
#include <string.h>   // for memset and memcpy

// Integer Math
// Find the closest power of 2 that is >= N.
DWORD NextPowerOfTwo(DWORD N)
{
  if (N<=0L   ) return 0L;
  if (N<=1L   ) return 1L;
  if (N<=2L   ) return 2L;
  if (N<=4L   ) return 4L;
  if (N<=8L   ) return 8L;
  if (N<=16L      ) return 16L;
  if (N<=32L      ) return 32L;
  if (N<=64L      ) return 64L;
  if (N<=128L     ) return 128L;
  if (N<=256L     ) return 256L;
  if (N<=512L     ) return 512L;
  if (N<=1024L    ) return 1024L;
  if (N<=2048L    ) return 2048L;
  if (N<=4096L    ) return 4096L;
  if (N<=8192L    ) return 8192L;
  if (N<=16384L   ) return 16384L;
  if (N<=32768L   ) return 32768L;
  if (N<=65536L   ) return 65536L;
  else        return 0;
}

DWORD Log2(DWORD val)
{
  DWORD answer = 0;
  while (val>>=1) ++answer;
  return answer;
}

// Floating Point Math
#ifdef WIN32
  #ifdef ASM
    __declspec(naked) float __fastcall FastAbs(float a)
    {
      __asm
      {
        fld   DWORD PTR [esp+4]
        fabs
        ret 4
      }
    }
  #else
    float __fastcall FastAbs(float a)
    {
      return fabsf(a);
    }
  #endif

#else
  float FastAbs(float a)
  {
    return fabsf(a);
  }
#endif

#if 0
  double sintable[1024] = {
  0.000000,0.001534,0.003068,0.004602,0.006136,0.007670,0.009204,0.010738,
  0.012272,0.013805,0.015339,0.016873,0.018407,0.019940,0.021474,0.023008,
  0.024541,0.026075,0.027608,0.029142,0.030675,0.032208,0.033741,0.035274,
  0.036807,0.038340,0.039873,0.041406,0.042938,0.044471,0.046003,0.047535,
  0.049068,0.050600,0.052132,0.053664,0.055195,0.056727,0.058258,0.059790,
  0.061321,0.062852,0.064383,0.065913,0.067444,0.068974,0.070505,0.072035,
  0.073565,0.075094,0.076624,0.078153,0.079682,0.081211,0.082740,0.084269,
  0.085797,0.087326,0.088854,0.090381,0.091909,0.093436,0.094963,0.096490,
  0.098017,0.099544,0.101070,0.102596,0.104122,0.105647,0.107172,0.108697,
  0.110222,0.111747,0.113271,0.114795,0.116319,0.117842,0.119365,0.120888,
  0.122411,0.123933,0.125455,0.126977,0.128498,0.130019,0.131540,0.133061,
  0.134581,0.136101,0.137620,0.139139,0.140658,0.142177,0.143695,0.145213,
  0.146730,0.148248,0.149765,0.151281,0.152797,0.154313,0.155828,0.157343,
  0.158858,0.160372,0.161886,0.163400,0.164913,0.166426,0.167938,0.169450,
  0.170962,0.172473,0.173984,0.175494,0.177004,0.178514,0.180023,0.181532,
  0.183040,0.184548,0.186055,0.187562,0.189069,0.190575,0.192080,0.193586,
  0.195090,0.196595,0.198098,0.199602,0.201105,0.202607,0.204109,0.205610,
  0.207111,0.208612,0.210112,0.211611,0.213110,0.214609,0.216107,0.217604,
  0.219101,0.220598,0.222094,0.223589,0.225084,0.226578,0.228072,0.229565,
  0.231058,0.232550,0.234042,0.235533,0.237024,0.238514,0.240003,0.241492,
  0.242980,0.244468,0.245955,0.247442,0.248928,0.250413,0.251898,0.253382,
  0.254866,0.256349,0.257831,0.259313,0.260794,0.262275,0.263755,0.265234,
  0.266713,0.268191,0.269668,0.271145,0.272621,0.274097,0.275572,0.277046,
  0.278520,0.279993,0.281465,0.282937,0.284408,0.285878,0.287347,0.288816,
  0.290285,0.291752,0.293219,0.294685,0.296151,0.297616,0.299080,0.300543,
  0.302006,0.303468,0.304929,0.306390,0.307850,0.309309,0.310767,0.312225,
  0.313682,0.315138,0.316593,0.318048,0.319502,0.320955,0.322408,0.323859,
  0.325310,0.326760,0.328210,0.329658,0.331106,0.332553,0.334000,0.335445,
  0.336890,0.338334,0.339777,0.341219,0.342661,0.344101,0.345541,0.346980,
  0.348419,0.349856,0.351293,0.352729,0.354164,0.355598,0.357031,0.358463,
  0.359895,0.361326,0.362756,0.364185,0.365613,0.367040,0.368467,0.369892,
  0.371317,0.372741,0.374164,0.375586,0.377007,0.378428,0.379847,0.381266,
  0.382683,0.384100,0.385516,0.386931,0.388345,0.389758,0.391170,0.392582,
  0.393992,0.395401,0.396810,0.398218,0.399624,0.401030,0.402435,0.403838,
  0.405241,0.406643,0.408044,0.409444,0.410843,0.412241,0.413638,0.415034,
  0.416430,0.417824,0.419217,0.420609,0.422000,0.423390,0.424780,0.426168,
  0.427555,0.428941,0.430326,0.431711,0.433094,0.434476,0.435857,0.437237,
  0.438616,0.439994,0.441371,0.442747,0.444122,0.445496,0.446869,0.448241,
  0.449611,0.450981,0.452350,0.453717,0.455084,0.456449,0.457813,0.459177,
  0.460539,0.461900,0.463260,0.464619,0.465976,0.467333,0.468689,0.470043,
  0.471397,0.472749,0.474100,0.475450,0.476799,0.478147,0.479494,0.480839,
  0.482184,0.483527,0.484869,0.486210,0.487550,0.488889,0.490226,0.491563,
  0.492898,0.494232,0.495565,0.496897,0.498228,0.499557,0.500885,0.502212,
  0.503538,0.504863,0.506187,0.507509,0.508830,0.510150,0.511469,0.512786,
  0.514103,0.515418,0.516732,0.518045,0.519356,0.520666,0.521975,0.523283,
  0.524590,0.525895,0.527199,0.528502,0.529804,0.531104,0.532403,0.533701,
  0.534998,0.536293,0.537587,0.538880,0.540171,0.541462,0.542751,0.544039,
  0.545325,0.546610,0.547894,0.549177,0.550458,0.551738,0.553017,0.554294,
  0.555570,0.556845,0.558119,0.559391,0.560662,0.561931,0.563199,0.564466,
  0.565732,0.566996,0.568259,0.569521,0.570781,0.572040,0.573297,0.574553,
  0.575808,0.577062,0.578314,0.579565,0.580814,0.582062,0.583309,0.584554,
  0.585798,0.587040,0.588282,0.589521,0.590760,0.591997,0.593232,0.594466,
  0.595699,0.596931,0.598161,0.599389,0.600616,0.601842,0.603067,0.604290,
  0.605511,0.606731,0.607950,0.609167,0.610383,0.611597,0.612810,0.614022,
  0.615232,0.616440,0.617647,0.618853,0.620057,0.621260,0.622461,0.623661,
  0.624859,0.626056,0.627252,0.628446,0.629638,0.630829,0.632019,0.633207,
  0.634393,0.635578,0.636762,0.637944,0.639124,0.640303,0.641481,0.642657,
  0.643832,0.645005,0.646176,0.647346,0.648514,0.649681,0.650847,0.652011,
  0.653173,0.654334,0.655493,0.656651,0.657807,0.658961,0.660114,0.661266,
  0.662416,0.663564,0.664711,0.665856,0.667000,0.668142,0.669283,0.670422,
  0.671559,0.672695,0.673829,0.674962,0.676093,0.677222,0.678350,0.679476,
  0.680601,0.681724,0.682846,0.683965,0.685084,0.686200,0.687315,0.688429,
  0.689541,0.690651,0.691759,0.692866,0.693971,0.695075,0.696177,0.697278,
  0.698376,0.699473,0.700569,0.701663,0.702755,0.703845,0.704934,0.706021,
  0.707107,0.708191,0.709273,0.710353,0.711432,0.712509,0.713585,0.714659,
  0.715731,0.716801,0.717870,0.718937,0.720003,0.721066,0.722128,0.723188,
  0.724247,0.725304,0.726359,0.727413,0.728464,0.729514,0.730563,0.731609,
  0.732654,0.733697,0.734739,0.735779,0.736817,0.737853,0.738887,0.739920,
  0.740951,0.741980,0.743008,0.744034,0.745058,0.746080,0.747101,0.748119,
  0.749136,0.750152,0.751165,0.752177,0.753187,0.754195,0.755201,0.756206,
  0.757209,0.758210,0.759209,0.760207,0.761202,0.762196,0.763188,0.764179,
  0.765167,0.766154,0.767139,0.768122,0.769103,0.770083,0.771061,0.772036,
  0.773010,0.773983,0.774953,0.775922,0.776888,0.777853,0.778817,0.779778,
  0.780737,0.781695,0.782651,0.783605,0.784557,0.785507,0.786455,0.787402,
  0.788346,0.789289,0.790230,0.791169,0.792107,0.793042,0.793975,0.794907,
  0.795837,0.796765,0.797691,0.798615,0.799537,0.800458,0.801376,0.802293,
  0.803208,0.804120,0.805031,0.805940,0.806848,0.807753,0.808656,0.809558,
  0.810457,0.811355,0.812251,0.813144,0.814036,0.814926,0.815814,0.816701,
  0.817585,0.818467,0.819348,0.820226,0.821103,0.821977,0.822850,0.823721,
  0.824589,0.825456,0.826321,0.827184,0.828045,0.828904,0.829761,0.830616,
  0.831470,0.832321,0.833170,0.834018,0.834863,0.835706,0.836548,0.837387,
  0.838225,0.839060,0.839894,0.840725,0.841555,0.842383,0.843208,0.844032,
  0.844854,0.845673,0.846491,0.847307,0.848120,0.848932,0.849742,0.850549,
  0.851355,0.852159,0.852961,0.853760,0.854558,0.855354,0.856147,0.856939,
  0.857729,0.858516,0.859302,0.860085,0.860867,0.861646,0.862424,0.863199,
  0.863973,0.864744,0.865514,0.866281,0.867046,0.867809,0.868571,0.869330,
  0.870087,0.870842,0.871595,0.872346,0.873095,0.873842,0.874587,0.875329,
  0.876070,0.876809,0.877545,0.878280,0.879012,0.879743,0.880471,0.881197,
  0.881921,0.882643,0.883363,0.884081,0.884797,0.885511,0.886223,0.886932,
  0.887640,0.888345,0.889048,0.889750,0.890449,0.891146,0.891841,0.892534,
  0.893224,0.893913,0.894599,0.895284,0.895966,0.896646,0.897325,0.898001,
  0.898674,0.899346,0.900016,0.900683,0.901349,0.902012,0.902673,0.903332,
  0.903989,0.904644,0.905297,0.905947,0.906596,0.907242,0.907886,0.908528,
  0.909168,0.909806,0.910441,0.911075,0.911706,0.912335,0.912962,0.913587,
  0.914210,0.914830,0.915449,0.916065,0.916679,0.917291,0.917901,0.918508,
  0.919114,0.919717,0.920318,0.920917,0.921514,0.922109,0.922701,0.923291,
  0.923880,0.924465,0.925049,0.925631,0.926210,0.926787,0.927363,0.927935,
  0.928506,0.929075,0.929641,0.930205,0.930767,0.931327,0.931884,0.932440,
  0.932993,0.933544,0.934093,0.934639,0.935184,0.935726,0.936266,0.936803,
  0.937339,0.937872,0.938404,0.938932,0.939459,0.939984,0.940506,0.941026,
  0.941544,0.942060,0.942573,0.943084,0.943593,0.944100,0.944605,0.945107,
  0.945607,0.946105,0.946601,0.947094,0.947586,0.948075,0.948561,0.949046,
  0.949528,0.950008,0.950486,0.950962,0.951435,0.951906,0.952375,0.952842,
  0.953306,0.953768,0.954228,0.954686,0.955141,0.955594,0.956045,0.956494,
  0.956940,0.957385,0.957826,0.958266,0.958703,0.959139,0.959572,0.960002,
  0.960431,0.960857,0.961280,0.961702,0.962121,0.962538,0.962953,0.963366,
  0.963776,0.964184,0.964590,0.964993,0.965394,0.965793,0.966190,0.966584,
  0.966976,0.967366,0.967754,0.968139,0.968522,0.968903,0.969281,0.969657,
  0.970031,0.970403,0.970772,0.971139,0.971504,0.971866,0.972226,0.972584,
  0.972940,0.973293,0.973644,0.973993,0.974339,0.974684,0.975025,0.975365,
  0.975702,0.976037,0.976370,0.976700,0.977028,0.977354,0.977677,0.977999,
  0.978317,0.978634,0.978948,0.979260,0.979570,0.979877,0.980182,0.980485,
  0.980785,0.981083,0.981379,0.981673,0.981964,0.982253,0.982539,0.982824,
  0.983105,0.983385,0.983662,0.983937,0.984210,0.984480,0.984749,0.985014,
  0.985278,0.985539,0.985798,0.986054,0.986308,0.986560,0.986809,0.987057,
  0.987301,0.987544,0.987784,0.988022,0.988258,0.988491,0.988722,0.988950,
  0.989177,0.989400,0.989622,0.989841,0.990058,0.990273,0.990485,0.990695,
  0.990903,0.991108,0.991311,0.991511,0.991710,0.991906,0.992099,0.992291,
  0.992480,0.992666,0.992850,0.993032,0.993212,0.993389,0.993564,0.993737,
  0.993907,0.994075,0.994240,0.994404,0.994565,0.994723,0.994879,0.995033,
  0.995185,0.995334,0.995481,0.995625,0.995767,0.995907,0.996045,0.996180,
  0.996313,0.996443,0.996571,0.996697,0.996820,0.996941,0.997060,0.997176,
  0.997290,0.997402,0.997511,0.997618,0.997723,0.997825,0.997925,0.998023,
  0.998118,0.998211,0.998302,0.998390,0.998476,0.998559,0.998640,0.998719,
  0.998795,0.998870,0.998941,0.999011,0.999078,0.999142,0.999205,0.999265,
  0.999322,0.999378,0.999431,0.999481,0.999529,0.999575,0.999619,0.999660,
  0.999699,0.999735,0.999769,0.999801,0.999831,0.999858,0.999882,0.999905,
  0.999925,0.999942,0.999958,0.999971,0.999981,0.999989,0.999995,0.999999};

  float __fastcall FastSin(float a)
  {
    int index;
    int quad;

    index = 1024 * a / (M_PI * 0.5);
    quad = ( index >> 10 ) & 3;
    index &= 1023;
    switch ( quad )
    {
      case 0:
        return sintable[index];
      case 1:
        return sintable[1023-index];
      case 2:
        return -sintable[index];
      case 3:
        return -sintable[1023-index];
      default:
        break;
    }
    return 0;
  }

  float __fastcall FastCos(float a)
  {
    int index;
    int quad;

    index = 1024 * a / (M_PI * 0.5);
    quad = ( index >> 10 ) & 3;
    index &= 1023;
    switch ( quad )
    {
      case 3:
        return sintable[index];
      case 0:
        return sintable[1023-index];
      case 1:
        return -sintable[index];
      case 2:
        return -sintable[1023-index];
      default:
        break;
    }
    return 0;
  }
#else
  #ifdef WIN32
    #if ASM
      __declspec(naked) float __fastcall FastSin(float a)
      {
        __asm
        {
          fld   DWORD PTR [esp+4]
          fsin
          ret 4
        }
      }

      __declspec(naked) float __fastcall FastCos(float a)
      {
        __asm
        {
          fld   DWORD PTR [esp+4]
          fcos
          ret 4
        }
      }
    #else
      float __fastcall FastSin(float a)
      {
        return sinf(a);
      }

      float __fastcall FastCos(float a)
      {
        return cosf(a);
      }
    #endif
  #else
    float FastSin(float a)
    {
      return sin(a);
    }

    float FastCos(float a)
    {
      return cos(a);
    }
  #endif
#endif

// Reciprocal square root
float rSqrt( float number )
{
  long i;
  float x2, y;
  const float threehalfs = 1.5F;

  x2 = number * 0.5F;
  y  = number;
  i  = * ( long * ) &y;           // evil floating point bit level hacking
  i  = 0x5f3759df - ( i >> 1 );
  y  = * ( float * ) &i;
  y  = y * ( threehalfs - ( x2 * y * y ) );   // 1st iteration
//  y  = y * ( threehalfs - ( x2 * y * y ) );   // 2nd iteration, this can be removed

  return y;
}

#ifdef WIN32
  #if ASM
    #if 0
      __declspec(naked) float __fastcall InverseSqrt(float a)
      {
        __asm
        {
          mov   eax, 0be6eb508h
          mov   DWORD PTR [esp-12],03fc00000h       ;  1.5 on the stack
          sub   eax, DWORD PTR [esp+4]            ; a
          sub   DWORD PTR [esp+4], 800000h          ; a/2 a=Y0
          shr   eax, 1                      ; firs approx in eax=R0
          mov   DWORD PTR [esp-8], eax

          fld   DWORD PTR [esp-8]               ;r
          fmul  st, st                          ;r*r
          fld   DWORD PTR [esp-8]               ;r
          fxch  st(1)
          fmul  DWORD PTR [esp+4];a ;r*r*y0
          fld   DWORD PTR [esp-12];load 1.5
          fld   st(0)
          fsub  st,st(2)                  ;r1 = 1.5 - y1
                                    ;x1 = st(3)
                                    ;y1 = st(2)
                                    ;1.5 = st(1)
                                    ;r1 = st(0)

          fld   st(1)
          fxch  st(1)
          fmul  st(3),st                  ; y2=y1*r1*...
          fmul  st(3),st                  ; y2=y1*r1*r1
          fmulp st(4),st                        ; x2=x1*r1
          fsub  st,st(2)                        ; r2=1.5-y2
                                    ;x2=st(3)
                                    ;y2=st(2)
                                    ;1.5=st(1)
                                    ;r2 = st(0)

          fmul  st(2),st                  ;y3=y2*r2*...
          fmul  st(3),st                  ;x3=x2*r2
          fmulp st(2),st                  ;y3=y2*r2*r2
          fxch  st(1)
          fsubp st(1),st                  ;r3= 1.5 - y3
                                    ;x3 = st(1)
                                    ;r3 = st(0)
          fmulp st(1), st
          ret 4
        }
      }
    #else
      float __fastcall InverseSqrt(float a)
      {
        float ahalf = 0.5f * a;
        int i = *(int *) &a;        // get bits for floating value
        i = 0x5f3759df - (i >> 1);      // gives initial guess y0
        a = *(float *) &i;          // convert bits back to float
        return a * (1.5f - ahalf * a * a);  // Newton step, repeating increases accuracy
      }
    #endif
  #else
    float __fastcall InverseSqrt(float a)
    {
      return 1.f/sqrtf(a);
    }
  #endif
#else
  float InverseSqrt(float a)
  {
    return 1/sqrt(a);
  }
#endif

// Aproximations:
#ifdef WIN32
  float __fastcall FastSqrt(float a)
  {
    return a?a*InverseSqrt(a):0;
  }
#else
  float FastSqrt(float a)
  {
    return sqrt(a);
  }
#endif

// optimized dot product
#if 0
#ifndef DotProduct
  #if ASM
    #pragma warning (disable: 4035)
    //__declspec( naked )
    float __cdecl DotProduct(const vec3_t v1, const vec3_t v2)
    {
      FLOAT dotret;
      _asm
      {
        mov     ecx, v1
        mov     eax, v2

        ;optimized dot product; 15 cycles
        fld dword ptr   [eax+0]     ;starts & ends on cycle 0
        fmul dword ptr  [ecx+0]     ;starts on cycle 1
        fld dword ptr   [eax+4]     ;starts & ends on cycle 2
        fmul dword ptr  [ecx+4]     ;starts on cycle 3
        fld dword ptr   [eax+8]     ;starts & ends on cycle 4
        fmul dword ptr  [ecx+8]     ;starts on cycle 5
        fxch            st(1)       ;no cost
        faddp           st(2),st(0) ;starts on cycle 6, stalls for cycles 7-8
        faddp           st(1),st(0) ;starts on cycle 9, stalls for cycles 10-12
        fstp dword ptr  [dotret]    ;starts on cycle 13, ends on cycle 14

    //    ret
      }
      return dotret;
    }
    #pragma warning( default: 4035 )
  #else
    float __cdecl DotProduct(const vec3_t v1, const vec3_t v2)
    {
      return v1[0]*v2[0] + v1[1]*v2[1] + v1[2]*v2[2];
    }
  #endif
#endif
#endif

// Matrix initialisation
void InitMat3x3(float *A)
{
  memset(A, 0, 9*sizeof(float));
  A[0] = A[4] = A[8] = 1.f;
}

void InitMat4x4(float *A)
{
  memset(A, 0, 16*sizeof(float));
  A[0] = A[5] = A[10] = A[15] = 1.f;
}

// Matrix multiplication
void MultMat3x3(float *A, float *B, float *C)
{
  float t[9];

  t[0] = A[0]*B[0] + A[1]*B[3] + A[2]*B[6];
  t[1] = A[0]*B[1] + A[1]*B[4] + A[2]*B[7];
  t[2] = A[0]*B[2] + A[1]*B[5] + A[2]*B[8];
  t[3] = A[3]*B[0] + A[4]*B[3] + A[5]*B[6];
  t[4] = A[3]*B[1] + A[4]*B[4] + A[5]*B[7];
  t[5] = A[3]*B[2] + A[4]*B[5] + A[5]*B[8];
  t[6] = A[6]*B[0] + A[7]*B[3] + A[8]*B[6];
  t[7] = A[6]*B[1] + A[7]*B[4] + A[8]*B[7];
  t[8] = A[6]*B[2] + A[7]*B[5] + A[8]*B[8];

  memcpy(C, t, 9*sizeof(float));
}

// Matrix multiplication
void MultMat4x4(float *A, float *B, float *C)
{
  float t[16];

  t[0]  = A[0]*B[0]  + A[1]*B[4]  + A[2]*B[8]   + A[3]*B[12];
  t[1]  = A[0]*B[1]  + A[1]*B[5]  + A[2]*B[9]   + A[3]*B[13];
  t[2]  = A[0]*B[2]  + A[1]*B[6]  + A[2]*B[10]  + A[3]*B[14];
  t[3]  = A[0]*B[3]  + A[1]*B[7]  + A[2]*B[11]  + A[3]*B[15];
  t[4]  = A[4]*B[0]  + A[5]*B[4]  + A[6]*B[8]   + A[7]*B[12];
  t[5]  = A[4]*B[1]  + A[5]*B[5]  + A[6]*B[9]   + A[7]*B[13];
  t[6]  = A[4]*B[2]  + A[5]*B[6]  + A[6]*B[10]  + A[7]*B[14];
  t[7]  = A[4]*B[3]  + A[5]*B[7]  + A[6]*B[11]  + A[7]*B[15];
  t[8]  = A[8]*B[0]  + A[9]*B[4]  + A[10]*B[8]  + A[11]*B[12];
  t[9]  = A[8]*B[1]  + A[9]*B[5]  + A[10]*B[9]  + A[11]*B[13];
  t[10] = A[8]*B[2]  + A[9]*B[6]  + A[10]*B[10] + A[11]*B[14];
  t[11] = A[8]*B[3]  + A[9]*B[7]  + A[10]*B[11] + A[11]*B[15];
  t[12] = A[12]*B[0] + A[13]*B[4] + A[14]*B[8]  + A[15]*B[12];
  t[13] = A[12]*B[1] + A[13]*B[5] + A[14]*B[9]  + A[15]*B[13];
  t[14] = A[12]*B[2] + A[13]*B[6] + A[14]*B[10] + A[15]*B[14];
  t[15] = A[12]*B[3] + A[13]*B[7] + A[14]*B[11] + A[15]*B[15];

  memcpy(C, t, 16*sizeof(float));
}

// Vector and matrix multiplication
void MultVect3x3(float *A, float *v, float *dest)
{
  #if 0
    dest[0] = A[0]*v[0] + A[3]*v[1] + A[6]*v[2];
    dest[1] = A[1]*v[0] + A[4]*v[1] + A[6]*v[2];
    dest[2] = A[2]*v[0] + A[5]*v[1] + A[7]*v[2];
  #else
    dest[0] = DotProduct(&A[0], v);
    dest[1] = DotProduct(&A[3], v);
    dest[2] = DotProduct(&A[6], v);
  #endif
}

void MultVect4x4(float *A, float *v, float *dest)
{
  vec3_t t;

  #if 0
    t[0] = A[0]*v[0] + A[4]*v[1] + A[8]*v[2]  + A[12];
    t[1] = A[1]*v[0] + A[5]*v[1] + A[9]*v[2]  + A[13];
    t[2] = A[2]*v[0] + A[6]*v[1] + A[10]*v[2] + A[14];
  #else
    t[0] = A[0]*v[0] + A[1]*v[1] + A[2]*v[2]  + A[12];
    t[1] = A[4]*v[0] + A[5]*v[1] + A[6]*v[2]  + A[13];
    t[2] = A[8]*v[0] + A[9]*v[1] + A[10]*v[2] + A[14];
  #endif

  // Recopie le vecteur obtenu dans le vecteur de destination
  // pour la m�me raison que dans le produit matriciel
  VectorCopy(t, dest);
}

// Fast normalization of 3 component vector (does not test if the vector has 0 length)
void FastNormVect3(float *v)
{
  float ilength;

  ilength = rSqrt(DotProduct(v, v));

  v[0] *= ilength;
  v[1] *= ilength;
  v[2] *= ilength;
}

// Fast normalization of 2 component vector (does not test if the vector has 0 length)
void FastNormVect2(float *v)
{
  float ilength;

  ilength = rSqrt(v[0]*v[0] + v[1]*v[1]);

  v[0] *= ilength;
  v[1] *= ilength;
}

// Slow normalization that returns the norm
vec_t VectorNormalize(vec3_t v)
{
  float length, ilength;

  length = v[0]*v[0] + v[1]*v[1] + v[2]*v[2];

  if (length)
  {
    length = sqrtf(length);   // FIXME
    ilength = 1/length;
    v[0] *= ilength;
    v[1] *= ilength;
    v[2] *= ilength;
  }

  return length;
}

vec_t VectorNormalize2(vec3_t v, vec3_t out)
{
  float length, ilength;

  length = v[0]*v[0] + v[1]*v[1] + v[2]*v[2];

  if (length)
  {
    length = sqrtf(length);   // FIXME
    ilength = 1/length;
    out[0] = v[0]*ilength;
    out[1] = v[1]*ilength;
    out[2] = v[2]*ilength;
  }
  else
  {
    VectorClear (out);
  }

  return length;
}

void VectorNormalizeFast(vec3_t v)
{
  float ilength = InverseSqrt(DotProduct(v,v));

  v[0] *= ilength;
  v[1] *= ilength;
  v[2] *= ilength;
}

float ColorNormalize(vec3_t in, vec3_t out)
{
  float f = max (max (in[0], in[1]), in[2]);

  if ( f > 1.0 )
  {
    f = 1.f / f;
    out[0] = in[0] * f;
    out[1] = in[1] * f;
    out[2] = in[2] * f;
  }
  else
  {
    out[0] = in[0];
    out[1] = in[1];
    out[2] = in[2];
  }

  return f;
}

// Cross Product
#ifndef CrossProduct
void CrossProduct(const vec3_t v1, const vec3_t v2, vec3_t cross)
{
  cross[0] = v1[1]*v2[2] - v1[2]*v2[1];
  cross[1] = v1[2]*v2[0] - v1[0]*v2[2];
  cross[2] = v1[0]*v2[1] - v1[1]*v2[0];
}
#endif

// Three control point Bezier interpolation
// mu ranges from 0 to 1, start to end of the curve
void BezierCurve3(vec3_t p1, vec3_t p2, vec3_t p3, float mu, vec3_t dest)
{
  float mum1, mum12, mu2;

  mu2 = mu * mu;
  mum1 = 1 - mu;
  mum12 = mum1 * mum1;
  dest[0] = p1[0] * mum12 + 2 * p2[0] * mum1 * mu + p3[0] * mu2;
  dest[1] = p1[1] * mum12 + 2 * p2[1] * mum1 * mu + p3[1] * mu2;
  dest[2] = p1[2] * mum12 + 2 * p2[2] * mum1 * mu + p3[2] * mu2;
}

// Four control point Bezier interpolation
// mu ranges from 0 to 1, start to end of curve
void BezierCurve4(vec3_t p1, vec3_t p2, vec3_t p3, vec3_t p4, float mu, vec3_t dest)
{
  float mum1, mum13, mu3;

  mum1 = 1 - mu;
  mum13 = mum1 * mum1 * mum1;
  mu3 = mu * mu * mu;

  dest[0] = mum13*p1[0] + 3*mu*mum1*mum1*p2[0] + 3*mu*mu*mum1*p3[0] + mu3*p4[0];
  dest[1] = mum13*p1[1] + 3*mu*mum1*mum1*p2[1] + 3*mu*mu*mum1*p3[1] + mu3*p4[1];
  dest[2] = mum13*p1[2] + 3*mu*mum1*mum1*p2[2] + 3*mu*mu*mum1*p3[2] + mu3*p4[2];
}

// General Bezier curve
// Number of control points is n
// 0 <= mu < 1    IMPORTANT, the last point is not computed
void BezierCurveN(vec3_t *p, int n, float mu, vec3_t dest)
{
  int kn, nn, nkn;
  float blend, muk, munk;
  VectorClear(dest);

  muk = 1;
  munk = powf(1-mu, (float) (n-1));

  for (int k = 0; k < n; ++k)
  {
    nn = n-1;
    kn = k;
    nkn = nn - k;
    blend = muk * munk;
    muk *= mu;
    munk /= (1-mu);
    while (nn >= 1)
    {
      blend *= nn; --nn;
      if (kn > 1) { blend /= ((float) kn); --kn; }
      if (nkn > 1) { blend /= ((float) nkn); --nkn; }
    }
    dest[0] += (p[k][0] * blend);
    dest[1] += (p[k][1] * blend);
    dest[2] += (p[k][2] * blend);
  }
}

//-----------------------------------------------------------------------------
// Bounding box related functions
//-----------------------------------------------------------------------------

// Calculate the bounding box of a mesh surface
void CalcFaceBounds(Surface *surf)
{
  float minx, miny, minz, maxx, maxy, maxz;

  vertex_t * v = surf->firstvert[0];

  minx = maxx = v->v_point[0];
  miny = maxy = v->v_point[1];
  minz = maxz = v->v_point[2];
  ++v;

  for (int i = 1; i < surf->numverts[0]; ++i, ++v)
  {
    minx = min(minx, v->v_point[0]);
    miny = min(miny, v->v_point[1]);
    minz = min(minz, v->v_point[2]);
    maxx = max(maxx, v->v_point[0]);
    maxy = max(maxy, v->v_point[1]);
    maxz = max(maxz, v->v_point[2]);
  }
  surf->bbox[0] = minx;
  surf->bbox[1] = miny;
  surf->bbox[2] = minz;
  surf->bbox[3] = maxx;
  surf->bbox[4] = maxy;
  surf->bbox[5] = maxz;
}

void ClearBounds(bboxf_t bbox)
{
  bbox[0] = bbox[1] = bbox[2] = 99999;
  bbox[3] = bbox[4] = bbox[5] = -99999;
}

void ClearBounds(vec3_t mins, vec3_t maxs)
{
  mins[0] = mins[1] = mins[2] = 99999;
  maxs[0] = maxs[1] = maxs[2] = -99999;
}

bool BoundsIntersect(bboxf_t bbox, vec3_t mins2, vec3_t maxs2)
{
  return (bbox[0] <= maxs2[0] && bbox[1] <= maxs2[1] && bbox[2] <= maxs2[2] &&
      bbox[3] >= mins2[0] && bbox[4] >= mins2[1] && bbox[5] >= mins2[2]);
}

bool BoundsAndSphereIntersect(bboxf_t bbox, vec3_t centre, float radius)
{
  return (bbox[0] <= centre[0]+radius && bbox[1] <= centre[1]+radius && bbox[2] <= centre[2]+radius &&
      bbox[3] >= centre[0]-radius && bbox[4] >= centre[1]-radius && bbox[5] >= centre[2]-radius);
}

bool BoundsIntersect(vec3_t mins1, vec3_t maxs1, vec3_t mins2, vec3_t maxs2)
{
  return (mins1[0] <= maxs2[0] && mins1[1] <= maxs2[1] && mins1[2] <= maxs2[2] &&
      maxs1[0] >= mins2[0] && maxs1[1] >= mins2[1] && maxs1[2] >= mins2[2]);
}

bool BoundsAndSphereIntersect(vec3_t mins, vec3_t maxs, vec3_t centre, float radius)
{
  return (mins[0] <= centre[0]+radius && mins[1] <= centre[1]+radius && mins[2] <= centre[2]+radius &&
      maxs[0] >= centre[0]-radius && maxs[1] >= centre[1]-radius && maxs[2] >= centre[2]-radius);
}

bool GetIntersection(bboxf_t bbox1, bboxf_t bbox2, bboxf_t dest)
{
  dest[0] = max(bbox1[0], bbox2[0]);
  dest[1] = max(bbox1[1], bbox2[1]);
  dest[2] = max(bbox1[2], bbox2[2]);
  dest[3] = min(bbox1[3], bbox2[3]);
  dest[4] = min(bbox1[4], bbox2[4]);
  dest[5] = min(bbox1[5], bbox2[5]);

  // if volume exists, we have a collision between bounding boxes
  return (dest[0] <= dest[3] &&
      dest[1] <= dest[4] &&
      dest[2] <= dest[5]);
}

void AddPointToBounds(vec3_t v, bboxf_t bbox)
{
  if (v[0] < bbox[0]) bbox[0] = v[0];
  if (v[0] > bbox[3]) bbox[3] = v[0];
  if (v[1] < bbox[1]) bbox[1] = v[1];
  if (v[1] > bbox[4]) bbox[4] = v[1];
  if (v[2] < bbox[2]) bbox[2] = v[2];
  if (v[2] > bbox[5]) bbox[5] = v[2];
}

void AddPointToBounds(vec3_t v, vec3_t mins, vec3_t maxs)
{
  #if 1
    if (v[0] < mins[0]) mins[0] = v[0];
    if (v[0] > maxs[0]) maxs[0] = v[0];
    if (v[1] < mins[1]) mins[1] = v[1];
    if (v[1] > maxs[1]) maxs[1] = v[1];
    if (v[2] < mins[2]) mins[2] = v[2];
    if (v[2] > maxs[2]) maxs[2] = v[2];
  #else
    vec_t val;
    val = v[0];
    if (val < mins[0]) mins[0] = val;
    if (val > maxs[0]) maxs[0] = val;
    val = v[1];
    if (val < mins[1]) mins[1] = val;
    if (val > maxs[1]) maxs[1] = val;
    val = v[2];
    if (val < mins[2]) mins[2] = val;
    if (val > maxs[2]) maxs[2] = val;
  #endif
}

bool PointInBounds(vec3_t point, bboxf_t bbox)
{
  return (point[0] >= bbox[0] && point[0] <= bbox[3] &&
          point[1] >= bbox[1] && point[1] <= bbox[4] &&
          point[2] >= bbox[2] && point[2] <= bbox[5]);
}

//-----------------------------------------------------------------------------
// Plane operations:
//-----------------------------------------------------------------------------

// Generate a plane given 3 points
//
// Returns false if the triangle is degenrate.
// The normal will point out of the clock for clockwise ordered points
bool PlaneFromPoints(vec4_t plane, const vec3_t a, const vec3_t b, const vec3_t c)
{
  vec3_t d1, d2;

  VectorSub(b, a, d1);
  VectorSub(c, a, d2);
  CrossProduct(d2, d1, plane);
  if (VectorNormalize(plane) == 0) return false;
  plane[3] = DotProduct(a, plane);
  return true;
}

void PlaneFromPoints(vertex_t verts[3], cplane_t *plane)
{
  vec3_t v1, v2;

  VectorSub(verts[1].v_point, verts[0].v_point, v1);
  VectorSub(verts[2].v_point, verts[0].v_point, v2);
  CrossProduct(v2, v1, plane->normal);
  VectorNormalize(plane->normal);
  plane->dist = DotProduct(verts[0].v_point, plane->normal);
}

void PlaneFromPoints(vec3_t verts[3], cplane_t *plane)
{
  vec3_t v1, v2;

  VectorSub(verts[1], verts[0], v1);
  VectorSub(verts[2], verts[0], v2);
  CrossProduct(v2, v1, plane->normal);
  VectorNormalize(plane->normal);
  plane->dist = DotProduct(verts[0], plane->normal);
}

void CategorizePlane(cplane_t *plane)
{
  plane->signbits = 0;
  plane->type = PLANE_ANYZ;
  if (plane->normal[0] < 0)     plane->signbits |= 1;
  if (plane->normal[0] == 1.0f) plane->type = 0;
  if (plane->normal[1] < 0)     plane->signbits |= 1<<1;
  if (plane->normal[1] == 1.0f) plane->type = 1;
  if (plane->normal[2] < 0)     plane->signbits |= 1<<2;
  if (plane->normal[2] == 1.0f) plane->type = 2;
}

float PointDistance(vec3_t v, cplane_t* p)
{
  if (p->type < PLANE_NON_AXIAL) return v[p->type]*p->normal[p->type]-p->dist;
  else return DotProduct(v, p->normal)-p->dist;
}

// returns the side of the plane in which the box is
#if !defined(WIN32) || !defined(ASM)
  int BoxOnPlaneSide(vec3_t emins, vec3_t emaxs, struct cplane_s *p)
  {
    float dist1, dist2;
    int   sides;

    // fast axial cases
    if (p->type < 3)
    {
      if (p->dist <= emins[p->type]) return 1;
      if (p->dist >= emaxs[p->type]) return 2;
      return 3;
    }

    // general case
    switch (p->signbits)
    {
      case 0:
        // 000 -> PPP
        dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
        dist2 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
        break;
      case 1:
        // 001 -> PPN
        dist1 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
        dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
        break;
      case 2:
        // 010 -> PNP
        dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
        dist2 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
        break;
      case 3:
        // 011 -> PNN
        dist1 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
        dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
        break;
      case 4:
        // 100 -> NPP
        dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
        dist2 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
        break;
      case 5:
        // 101 -> NPN
        dist1 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
        dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
        break;
      case 6:
        // 110 -> NNP
        dist1 = p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
        dist2 = p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
        break;
      case 7:
        // 111 -> NNN
        dist1 = p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
        dist2 = p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
        break;
      default:
        dist1 = dist2 = 0;    // shut up compiler
        break;
    }

    sides = 0;
    if (dist1 >= p->dist) sides = 1;
    if (dist2 < p->dist) sides |= 2;

    return sides;
  }
#else
  #pragma warning(disable: 4035)
  __declspec(naked) int BoxOnPlaneSide(vec3_t emins, vec3_t emaxs, struct cplane_s *p)
  {
    static int bops_initialized;
    static int Ljmptab[8];

    __asm
    {
      push ebx

      cmp bops_initialized, 1
      je  initialized
      mov bops_initialized, 1

      mov Ljmptab[0*4], offset Lcase0
      mov Ljmptab[1*4], offset Lcase1
      mov Ljmptab[2*4], offset Lcase2
      mov Ljmptab[3*4], offset Lcase3
      mov Ljmptab[4*4], offset Lcase4
      mov Ljmptab[5*4], offset Lcase5
      mov Ljmptab[6*4], offset Lcase6
      mov Ljmptab[7*4], offset Lcase7

  initialized:

      mov edx,dword ptr[4+12+esp]
      mov ecx,dword ptr[4+4+esp]
      xor eax,eax
      mov ebx,dword ptr[4+8+esp]
      mov al,byte ptr[17+edx]
      cmp al,8
      jge Lerror
      fld dword ptr[0+edx]
      fld st(0)
      jmp dword ptr[Ljmptab+eax*4]
  Lcase0:
      fmul dword ptr[ebx]
      fld dword ptr[0+4+edx]
      fxch st(2)
      fmul dword ptr[ecx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[4+ebx]
      fld dword ptr[0+8+edx]
      fxch st(2)
      fmul dword ptr[4+ecx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[8+ebx]
      fxch st(5)
      faddp st(3),st(0)
      fmul dword ptr[8+ecx]
      fxch st(1)
      faddp st(3),st(0)
      fxch st(3)
      faddp st(2),st(0)
      jmp LSetSides
  Lcase1:
      fmul dword ptr[ecx]
      fld dword ptr[0+4+edx]
      fxch st(2)
      fmul dword ptr[ebx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[4+ebx]
      fld dword ptr[0+8+edx]
      fxch st(2)
      fmul dword ptr[4+ecx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[8+ebx]
      fxch st(5)
      faddp st(3),st(0)
      fmul dword ptr[8+ecx]
      fxch st(1)
      faddp st(3),st(0)
      fxch st(3)
      faddp st(2),st(0)
      jmp LSetSides
  Lcase2:
      fmul dword ptr[ebx]
      fld dword ptr[0+4+edx]
      fxch st(2)
      fmul dword ptr[ecx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[4+ecx]
      fld dword ptr[0+8+edx]
      fxch st(2)
      fmul dword ptr[4+ebx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[8+ebx]
      fxch st(5)
      faddp st(3),st(0)
      fmul dword ptr[8+ecx]
      fxch st(1)
      faddp st(3),st(0)
      fxch st(3)
      faddp st(2),st(0)
      jmp LSetSides
  Lcase3:
      fmul dword ptr[ecx]
      fld dword ptr[0+4+edx]
      fxch st(2)
      fmul dword ptr[ebx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[4+ecx]
      fld dword ptr[0+8+edx]
      fxch st(2)
      fmul dword ptr[4+ebx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[8+ebx]
      fxch st(5)
      faddp st(3),st(0)
      fmul dword ptr[8+ecx]
      fxch st(1)
      faddp st(3),st(0)
      fxch st(3)
      faddp st(2),st(0)
      jmp LSetSides
  Lcase4:
      fmul dword ptr[ebx]
      fld dword ptr[0+4+edx]
      fxch st(2)
      fmul dword ptr[ecx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[4+ebx]
      fld dword ptr[0+8+edx]
      fxch st(2)
      fmul dword ptr[4+ecx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[8+ecx]
      fxch st(5)
      faddp st(3),st(0)
      fmul dword ptr[8+ebx]
      fxch st(1)
      faddp st(3),st(0)
      fxch st(3)
      faddp st(2),st(0)
      jmp LSetSides
  Lcase5:
      fmul dword ptr[ecx]
      fld dword ptr[0+4+edx]
      fxch st(2)
      fmul dword ptr[ebx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[4+ebx]
      fld dword ptr[0+8+edx]
      fxch st(2)
      fmul dword ptr[4+ecx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[8+ecx]
      fxch st(5)
      faddp st(3),st(0)
      fmul dword ptr[8+ebx]
      fxch st(1)
      faddp st(3),st(0)
      fxch st(3)
      faddp st(2),st(0)
      jmp LSetSides
  Lcase6:
      fmul dword ptr[ebx]
      fld dword ptr[0+4+edx]
      fxch st(2)
      fmul dword ptr[ecx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[4+ecx]
      fld dword ptr[0+8+edx]
      fxch st(2)
      fmul dword ptr[4+ebx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[8+ecx]
      fxch st(5)
      faddp st(3),st(0)
      fmul dword ptr[8+ebx]
      fxch st(1)
      faddp st(3),st(0)
      fxch st(3)
      faddp st(2),st(0)
      jmp LSetSides
  Lcase7:
      fmul dword ptr[ecx]
      fld dword ptr[0+4+edx]
      fxch st(2)
      fmul dword ptr[ebx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[4+ecx]
      fld dword ptr[0+8+edx]
      fxch st(2)
      fmul dword ptr[4+ebx]
      fxch st(2)
      fld st(0)
      fmul dword ptr[8+ecx]
      fxch st(5)
      faddp st(3),st(0)
      fmul dword ptr[8+ebx]
      fxch st(1)
      faddp st(3),st(0)
      fxch st(3)
      faddp st(2),st(0)
  LSetSides:
      faddp st(2),st(0)
      fcomp dword ptr[12+edx]
      xor ecx,ecx
      fnstsw ax
      fcomp dword ptr[12+edx]
      and ah,1
      xor ah,1
      add cl,ah
      fnstsw ax
      and ah,1
      add ah,ah
      add cl,ah
      pop ebx
      mov eax,ecx
      ret
  Lerror:
      int 3
    }
  }
  #pragma warning(default: 4035)
#endif