xref: /reactos/dll/directx/wine/d3dx9_36/math.c (revision 682f85ad)
1 #ifdef __REACTOS__
2 #include "precomp.h"
3 #else
4 /*
5  * Mathematical operations specific to D3DX9.
6  *
7  * Copyright (C) 2008 David Adam
8  * Copyright (C) 2008 Luis Busquets
9  * Copyright (C) 2008 Jérôme Gardou
10  * Copyright (C) 2008 Philip Nilsson
11  * Copyright (C) 2008 Henri Verbeet
12  *
13  * This library is free software; you can redistribute it and/or
14  * modify it under the terms of the GNU Lesser General Public
15  * License as published by the Free Software Foundation; either
16  * version 2.1 of the License, or (at your option) any later version.
17  *
18  * This library is distributed in the hope that it will be useful,
19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21  * Lesser General Public License for more details.
22  *
23  * You should have received a copy of the GNU Lesser General Public
24  * License along with this library; if not, write to the Free Software
25  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
26  */
27 
28 
29 #include <float.h>
30 
31 #include "d3dx9_private.h"
32 #endif /* __REACTOS__ */
33 
34 WINE_DEFAULT_DEBUG_CHANNEL(d3dx);
35 
36 struct ID3DXMatrixStackImpl
37 {
38   ID3DXMatrixStack ID3DXMatrixStack_iface;
39   LONG ref;
40 
41   unsigned int current;
42   unsigned int stack_size;
43   D3DXMATRIX *stack;
44 };
45 
46 static const unsigned int INITIAL_STACK_SIZE = 32;
47 
48 /*_________________D3DXColor____________________*/
49 
50 D3DXCOLOR* WINAPI D3DXColorAdjustContrast(D3DXCOLOR *pout, const D3DXCOLOR *pc, FLOAT s)
51 {
52     TRACE("pout %p, pc %p, s %f\n", pout, pc, s);
53 
54     pout->r = 0.5f + s * (pc->r - 0.5f);
55     pout->g = 0.5f + s * (pc->g - 0.5f);
56     pout->b = 0.5f + s * (pc->b - 0.5f);
57     pout->a = pc->a;
58     return pout;
59 }
60 
61 D3DXCOLOR* WINAPI D3DXColorAdjustSaturation(D3DXCOLOR *pout, const D3DXCOLOR *pc, FLOAT s)
62 {
63     FLOAT grey;
64 
65     TRACE("pout %p, pc %p, s %f\n", pout, pc, s);
66 
67     grey = pc->r * 0.2125f + pc->g * 0.7154f + pc->b * 0.0721f;
68     pout->r = grey + s * (pc->r - grey);
69     pout->g = grey + s * (pc->g - grey);
70     pout->b = grey + s * (pc->b - grey);
71     pout->a = pc->a;
72     return pout;
73 }
74 
75 /*_________________Misc__________________________*/
76 
77 FLOAT WINAPI D3DXFresnelTerm(FLOAT costheta, FLOAT refractionindex)
78 {
79     FLOAT a, d, g, result;
80 
81     TRACE("costheta %f, refractionindex %f\n", costheta, refractionindex);
82 
83     g = sqrtf(refractionindex * refractionindex + costheta * costheta - 1.0f);
84     a = g + costheta;
85     d = g - costheta;
86     result = (costheta * a - 1.0f) * (costheta * a - 1.0f) / ((costheta * d + 1.0f) * (costheta * d + 1.0f)) + 1.0f;
87     result *= 0.5f * d * d / (a * a);
88 
89     return result;
90 }
91 
92 /*_________________D3DXMatrix____________________*/
93 
94 D3DXMATRIX * WINAPI D3DXMatrixAffineTransformation(D3DXMATRIX *out, FLOAT scaling, const D3DXVECTOR3 *rotationcenter,
95         const D3DXQUATERNION *rotation, const D3DXVECTOR3 *translation)
96 {
97     TRACE("out %p, scaling %f, rotationcenter %p, rotation %p, translation %p\n",
98             out, scaling, rotationcenter, rotation, translation);
99 
100     D3DXMatrixIdentity(out);
101 
102     if (rotation)
103     {
104         FLOAT temp00, temp01, temp02, temp10, temp11, temp12, temp20, temp21, temp22;
105 
106         temp00 = 1.0f - 2.0f * (rotation->y * rotation->y + rotation->z * rotation->z);
107         temp01 = 2.0f * (rotation->x * rotation->y + rotation->z * rotation->w);
108         temp02 = 2.0f * (rotation->x * rotation->z - rotation->y * rotation->w);
109         temp10 = 2.0f * (rotation->x * rotation->y - rotation->z * rotation->w);
110         temp11 = 1.0f - 2.0f * (rotation->x * rotation->x + rotation->z * rotation->z);
111         temp12 = 2.0f * (rotation->y * rotation->z + rotation->x * rotation->w);
112         temp20 = 2.0f * (rotation->x * rotation->z + rotation->y * rotation->w);
113         temp21 = 2.0f * (rotation->y * rotation->z - rotation->x * rotation->w);
114         temp22 = 1.0f - 2.0f * (rotation->x * rotation->x + rotation->y * rotation->y);
115 
116         out->u.m[0][0] = scaling * temp00;
117         out->u.m[0][1] = scaling * temp01;
118         out->u.m[0][2] = scaling * temp02;
119         out->u.m[1][0] = scaling * temp10;
120         out->u.m[1][1] = scaling * temp11;
121         out->u.m[1][2] = scaling * temp12;
122         out->u.m[2][0] = scaling * temp20;
123         out->u.m[2][1] = scaling * temp21;
124         out->u.m[2][2] = scaling * temp22;
125 
126         if (rotationcenter)
127         {
128             out->u.m[3][0] = rotationcenter->x * (1.0f - temp00) - rotationcenter->y * temp10
129                     - rotationcenter->z * temp20;
130             out->u.m[3][1] = rotationcenter->y * (1.0f - temp11) - rotationcenter->x * temp01
131                     - rotationcenter->z * temp21;
132             out->u.m[3][2] = rotationcenter->z * (1.0f - temp22) - rotationcenter->x * temp02
133                     - rotationcenter->y * temp12;
134         }
135     }
136     else
137     {
138         out->u.m[0][0] = scaling;
139         out->u.m[1][1] = scaling;
140         out->u.m[2][2] = scaling;
141     }
142 
143     if (translation)
144     {
145         out->u.m[3][0] += translation->x;
146         out->u.m[3][1] += translation->y;
147         out->u.m[3][2] += translation->z;
148     }
149 
150     return out;
151 }
152 
153 D3DXMATRIX * WINAPI D3DXMatrixAffineTransformation2D(D3DXMATRIX *out, FLOAT scaling,
154         const D3DXVECTOR2 *rotationcenter, FLOAT rotation, const D3DXVECTOR2 *translation)
155 {
156     FLOAT tmp1, tmp2, s;
157 
158     TRACE("out %p, scaling %f, rotationcenter %p, rotation %f, translation %p\n",
159             out, scaling, rotationcenter, rotation, translation);
160 
161     s = sinf(rotation / 2.0f);
162     tmp1 = 1.0f - 2.0f * s * s;
163     tmp2 = 2.0f * s * cosf(rotation / 2.0f);
164 
165     D3DXMatrixIdentity(out);
166     out->u.m[0][0] = scaling * tmp1;
167     out->u.m[0][1] = scaling * tmp2;
168     out->u.m[1][0] = -scaling * tmp2;
169     out->u.m[1][1] = scaling * tmp1;
170 
171     if (rotationcenter)
172     {
173         FLOAT x, y;
174 
175         x = rotationcenter->x;
176         y = rotationcenter->y;
177 
178         out->u.m[3][0] = y * tmp2 - x * tmp1 + x;
179         out->u.m[3][1] = -x * tmp2 - y * tmp1 + y;
180     }
181 
182     if (translation)
183     {
184         out->u.m[3][0] += translation->x;
185         out->u.m[3][1] += translation->y;
186     }
187 
188     return out;
189 }
190 
191 HRESULT WINAPI D3DXMatrixDecompose(D3DXVECTOR3 *poutscale, D3DXQUATERNION *poutrotation, D3DXVECTOR3 *pouttranslation, const D3DXMATRIX *pm)
192 {
193     D3DXMATRIX normalized;
194     D3DXVECTOR3 vec;
195 
196     TRACE("poutscale %p, poutrotation %p, pouttranslation %p, pm %p\n", poutscale, poutrotation, pouttranslation, pm);
197 
198     /*Compute the scaling part.*/
199     vec.x=pm->u.m[0][0];
200     vec.y=pm->u.m[0][1];
201     vec.z=pm->u.m[0][2];
202     poutscale->x=D3DXVec3Length(&vec);
203 
204     vec.x=pm->u.m[1][0];
205     vec.y=pm->u.m[1][1];
206     vec.z=pm->u.m[1][2];
207     poutscale->y=D3DXVec3Length(&vec);
208 
209     vec.x=pm->u.m[2][0];
210     vec.y=pm->u.m[2][1];
211     vec.z=pm->u.m[2][2];
212     poutscale->z=D3DXVec3Length(&vec);
213 
214     /*Compute the translation part.*/
215     pouttranslation->x=pm->u.m[3][0];
216     pouttranslation->y=pm->u.m[3][1];
217     pouttranslation->z=pm->u.m[3][2];
218 
219     /*Let's calculate the rotation now*/
220     if ( (poutscale->x == 0.0f) || (poutscale->y == 0.0f) || (poutscale->z == 0.0f) ) return D3DERR_INVALIDCALL;
221 
222     normalized.u.m[0][0]=pm->u.m[0][0]/poutscale->x;
223     normalized.u.m[0][1]=pm->u.m[0][1]/poutscale->x;
224     normalized.u.m[0][2]=pm->u.m[0][2]/poutscale->x;
225     normalized.u.m[1][0]=pm->u.m[1][0]/poutscale->y;
226     normalized.u.m[1][1]=pm->u.m[1][1]/poutscale->y;
227     normalized.u.m[1][2]=pm->u.m[1][2]/poutscale->y;
228     normalized.u.m[2][0]=pm->u.m[2][0]/poutscale->z;
229     normalized.u.m[2][1]=pm->u.m[2][1]/poutscale->z;
230     normalized.u.m[2][2]=pm->u.m[2][2]/poutscale->z;
231 
232     D3DXQuaternionRotationMatrix(poutrotation,&normalized);
233     return S_OK;
234 }
235 
236 FLOAT WINAPI D3DXMatrixDeterminant(const D3DXMATRIX *pm)
237 {
238     FLOAT t[3], v[4];
239 
240     TRACE("pm %p\n", pm);
241 
242     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
243     t[1] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
244     t[2] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
245     v[0] = pm->u.m[1][1] * t[0] - pm->u.m[2][1] * t[1] + pm->u.m[3][1] * t[2];
246     v[1] = -pm->u.m[1][0] * t[0] + pm->u.m[2][0] * t[1] - pm->u.m[3][0] * t[2];
247 
248     t[0] = pm->u.m[1][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[1][1];
249     t[1] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
250     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
251     v[2] = pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1] + pm->u.m[1][3] * t[2];
252     v[3] = -pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] - pm->u.m[1][2] * t[2];
253 
254     return pm->u.m[0][0] * v[0] + pm->u.m[0][1] * v[1] +
255         pm->u.m[0][2] * v[2] + pm->u.m[0][3] * v[3];
256 }
257 
258 D3DXMATRIX* WINAPI D3DXMatrixInverse(D3DXMATRIX *pout, FLOAT *pdeterminant, const D3DXMATRIX *pm)
259 {
260     FLOAT det, t[3], v[16];
261     UINT i, j;
262 
263     TRACE("pout %p, pdeterminant %p, pm %p\n", pout, pdeterminant, pm);
264 
265     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
266     t[1] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
267     t[2] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
268     v[0] = pm->u.m[1][1] * t[0] - pm->u.m[2][1] * t[1] + pm->u.m[3][1] * t[2];
269     v[4] = -pm->u.m[1][0] * t[0] + pm->u.m[2][0] * t[1] - pm->u.m[3][0] * t[2];
270 
271     t[0] = pm->u.m[1][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[1][1];
272     t[1] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
273     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
274     v[8] = pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1] + pm->u.m[1][3] * t[2];
275     v[12] = -pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] - pm->u.m[1][2] * t[2];
276 
277     det = pm->u.m[0][0] * v[0] + pm->u.m[0][1] * v[4] +
278         pm->u.m[0][2] * v[8] + pm->u.m[0][3] * v[12];
279     if (det == 0.0f)
280         return NULL;
281     if (pdeterminant)
282         *pdeterminant = det;
283 
284     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
285     t[1] = pm->u.m[0][2] * pm->u.m[3][3] - pm->u.m[0][3] * pm->u.m[3][2];
286     t[2] = pm->u.m[0][2] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][2];
287     v[1] = -pm->u.m[0][1] * t[0] + pm->u.m[2][1] * t[1] - pm->u.m[3][1] * t[2];
288     v[5] = pm->u.m[0][0] * t[0] - pm->u.m[2][0] * t[1] + pm->u.m[3][0] * t[2];
289 
290     t[0] = pm->u.m[0][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[0][1];
291     t[1] = pm->u.m[3][0] * pm->u.m[0][1] - pm->u.m[0][0] * pm->u.m[3][1];
292     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
293     v[9] = -pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1]- pm->u.m[0][3] * t[2];
294     v[13] = pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] + pm->u.m[0][2] * t[2];
295 
296     t[0] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
297     t[1] = pm->u.m[0][2] * pm->u.m[3][3] - pm->u.m[0][3] * pm->u.m[3][2];
298     t[2] = pm->u.m[0][2] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][2];
299     v[2] = pm->u.m[0][1] * t[0] - pm->u.m[1][1] * t[1] + pm->u.m[3][1] * t[2];
300     v[6] = -pm->u.m[0][0] * t[0] + pm->u.m[1][0] * t[1] - pm->u.m[3][0] * t[2];
301 
302     t[0] = pm->u.m[0][0] * pm->u.m[1][1] - pm->u.m[1][0] * pm->u.m[0][1];
303     t[1] = pm->u.m[3][0] * pm->u.m[0][1] - pm->u.m[0][0] * pm->u.m[3][1];
304     t[2] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
305     v[10] = pm->u.m[3][3] * t[0] + pm->u.m[1][3] * t[1] + pm->u.m[0][3] * t[2];
306     v[14] = -pm->u.m[3][2] * t[0] - pm->u.m[1][2] * t[1] - pm->u.m[0][2] * t[2];
307 
308     t[0] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
309     t[1] = pm->u.m[0][2] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][2];
310     t[2] = pm->u.m[0][2] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][2];
311     v[3] = -pm->u.m[0][1] * t[0] + pm->u.m[1][1] * t[1] - pm->u.m[2][1] * t[2];
312     v[7] = pm->u.m[0][0] * t[0] - pm->u.m[1][0] * t[1] + pm->u.m[2][0] * t[2];
313 
314     v[11] = -pm->u.m[0][0] * (pm->u.m[1][1] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][1]) +
315         pm->u.m[1][0] * (pm->u.m[0][1] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][1]) -
316         pm->u.m[2][0] * (pm->u.m[0][1] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][1]);
317 
318     v[15] = pm->u.m[0][0] * (pm->u.m[1][1] * pm->u.m[2][2] - pm->u.m[1][2] * pm->u.m[2][1]) -
319         pm->u.m[1][0] * (pm->u.m[0][1] * pm->u.m[2][2] - pm->u.m[0][2] * pm->u.m[2][1]) +
320         pm->u.m[2][0] * (pm->u.m[0][1] * pm->u.m[1][2] - pm->u.m[0][2] * pm->u.m[1][1]);
321 
322     det = 1.0f / det;
323 
324     for (i = 0; i < 4; i++)
325         for (j = 0; j < 4; j++)
326             pout->u.m[i][j] = v[4 * i + j] * det;
327 
328     return pout;
329 }
330 
331 D3DXMATRIX * WINAPI D3DXMatrixLookAtLH(D3DXMATRIX *out, const D3DXVECTOR3 *eye, const D3DXVECTOR3 *at,
332         const D3DXVECTOR3 *up)
333 {
334     D3DXVECTOR3 right, upn, vec;
335 
336     TRACE("out %p, eye %p, at %p, up %p\n", out, eye, at, up);
337 
338     D3DXVec3Subtract(&vec, at, eye);
339     D3DXVec3Normalize(&vec, &vec);
340     D3DXVec3Cross(&right, up, &vec);
341     D3DXVec3Cross(&upn, &vec, &right);
342     D3DXVec3Normalize(&right, &right);
343     D3DXVec3Normalize(&upn, &upn);
344     out->u.m[0][0] = right.x;
345     out->u.m[1][0] = right.y;
346     out->u.m[2][0] = right.z;
347     out->u.m[3][0] = -D3DXVec3Dot(&right, eye);
348     out->u.m[0][1] = upn.x;
349     out->u.m[1][1] = upn.y;
350     out->u.m[2][1] = upn.z;
351     out->u.m[3][1] = -D3DXVec3Dot(&upn, eye);
352     out->u.m[0][2] = vec.x;
353     out->u.m[1][2] = vec.y;
354     out->u.m[2][2] = vec.z;
355     out->u.m[3][2] = -D3DXVec3Dot(&vec, eye);
356     out->u.m[0][3] = 0.0f;
357     out->u.m[1][3] = 0.0f;
358     out->u.m[2][3] = 0.0f;
359     out->u.m[3][3] = 1.0f;
360 
361     return out;
362 }
363 
364 D3DXMATRIX * WINAPI D3DXMatrixLookAtRH(D3DXMATRIX *out, const D3DXVECTOR3 *eye, const D3DXVECTOR3 *at,
365         const D3DXVECTOR3 *up)
366 {
367     D3DXVECTOR3 right, upn, vec;
368 
369     TRACE("out %p, eye %p, at %p, up %p\n", out, eye, at, up);
370 
371     D3DXVec3Subtract(&vec, at, eye);
372     D3DXVec3Normalize(&vec, &vec);
373     D3DXVec3Cross(&right, up, &vec);
374     D3DXVec3Cross(&upn, &vec, &right);
375     D3DXVec3Normalize(&right, &right);
376     D3DXVec3Normalize(&upn, &upn);
377     out->u.m[0][0] = -right.x;
378     out->u.m[1][0] = -right.y;
379     out->u.m[2][0] = -right.z;
380     out->u.m[3][0] = D3DXVec3Dot(&right, eye);
381     out->u.m[0][1] = upn.x;
382     out->u.m[1][1] = upn.y;
383     out->u.m[2][1] = upn.z;
384     out->u.m[3][1] = -D3DXVec3Dot(&upn, eye);
385     out->u.m[0][2] = -vec.x;
386     out->u.m[1][2] = -vec.y;
387     out->u.m[2][2] = -vec.z;
388     out->u.m[3][2] = D3DXVec3Dot(&vec, eye);
389     out->u.m[0][3] = 0.0f;
390     out->u.m[1][3] = 0.0f;
391     out->u.m[2][3] = 0.0f;
392     out->u.m[3][3] = 1.0f;
393 
394     return out;
395 }
396 
397 D3DXMATRIX* WINAPI D3DXMatrixMultiply(D3DXMATRIX *pout, const D3DXMATRIX *pm1, const D3DXMATRIX *pm2)
398 {
399     D3DXMATRIX out;
400     int i,j;
401 
402     TRACE("pout %p, pm1 %p, pm2 %p\n", pout, pm1, pm2);
403 
404     for (i=0; i<4; i++)
405     {
406         for (j=0; j<4; j++)
407         {
408             out.u.m[i][j] = pm1->u.m[i][0] * pm2->u.m[0][j] + pm1->u.m[i][1] * pm2->u.m[1][j] + pm1->u.m[i][2] * pm2->u.m[2][j] + pm1->u.m[i][3] * pm2->u.m[3][j];
409         }
410     }
411 
412     *pout = out;
413     return pout;
414 }
415 
416 D3DXMATRIX* WINAPI D3DXMatrixMultiplyTranspose(D3DXMATRIX *pout, const D3DXMATRIX *pm1, const D3DXMATRIX *pm2)
417 {
418     D3DXMATRIX temp;
419     int i, j;
420 
421     TRACE("pout %p, pm1 %p, pm2 %p\n", pout, pm1, pm2);
422 
423     for (i = 0; i < 4; i++)
424         for (j = 0; j < 4; j++)
425             temp.u.m[j][i] = pm1->u.m[i][0] * pm2->u.m[0][j] + pm1->u.m[i][1] * pm2->u.m[1][j] + pm1->u.m[i][2] * pm2->u.m[2][j] + pm1->u.m[i][3] * pm2->u.m[3][j];
426 
427     *pout = temp;
428     return pout;
429 }
430 
431 D3DXMATRIX* WINAPI D3DXMatrixOrthoLH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
432 {
433     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
434 
435     D3DXMatrixIdentity(pout);
436     pout->u.m[0][0] = 2.0f / w;
437     pout->u.m[1][1] = 2.0f / h;
438     pout->u.m[2][2] = 1.0f / (zf - zn);
439     pout->u.m[3][2] = zn / (zn - zf);
440     return pout;
441 }
442 
443 D3DXMATRIX* WINAPI D3DXMatrixOrthoOffCenterLH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
444 {
445     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
446 
447     D3DXMatrixIdentity(pout);
448     pout->u.m[0][0] = 2.0f / (r - l);
449     pout->u.m[1][1] = 2.0f / (t - b);
450     pout->u.m[2][2] = 1.0f / (zf -zn);
451     pout->u.m[3][0] = -1.0f -2.0f *l / (r - l);
452     pout->u.m[3][1] = 1.0f + 2.0f * t / (b - t);
453     pout->u.m[3][2] = zn / (zn -zf);
454     return pout;
455 }
456 
457 D3DXMATRIX* WINAPI D3DXMatrixOrthoOffCenterRH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
458 {
459     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
460 
461     D3DXMatrixIdentity(pout);
462     pout->u.m[0][0] = 2.0f / (r - l);
463     pout->u.m[1][1] = 2.0f / (t - b);
464     pout->u.m[2][2] = 1.0f / (zn -zf);
465     pout->u.m[3][0] = -1.0f -2.0f *l / (r - l);
466     pout->u.m[3][1] = 1.0f + 2.0f * t / (b - t);
467     pout->u.m[3][2] = zn / (zn -zf);
468     return pout;
469 }
470 
471 D3DXMATRIX* WINAPI D3DXMatrixOrthoRH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
472 {
473     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
474 
475     D3DXMatrixIdentity(pout);
476     pout->u.m[0][0] = 2.0f / w;
477     pout->u.m[1][1] = 2.0f / h;
478     pout->u.m[2][2] = 1.0f / (zn - zf);
479     pout->u.m[3][2] = zn / (zn - zf);
480     return pout;
481 }
482 
483 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveFovLH(D3DXMATRIX *pout, FLOAT fovy, FLOAT aspect, FLOAT zn, FLOAT zf)
484 {
485     TRACE("pout %p, fovy %f, aspect %f, zn %f, zf %f\n", pout, fovy, aspect, zn, zf);
486 
487     D3DXMatrixIdentity(pout);
488     pout->u.m[0][0] = 1.0f / (aspect * tanf(fovy/2.0f));
489     pout->u.m[1][1] = 1.0f / tanf(fovy/2.0f);
490     pout->u.m[2][2] = zf / (zf - zn);
491     pout->u.m[2][3] = 1.0f;
492     pout->u.m[3][2] = (zf * zn) / (zn - zf);
493     pout->u.m[3][3] = 0.0f;
494     return pout;
495 }
496 
497 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveFovRH(D3DXMATRIX *pout, FLOAT fovy, FLOAT aspect, FLOAT zn, FLOAT zf)
498 {
499     TRACE("pout %p, fovy %f, aspect %f, zn %f, zf %f\n", pout, fovy, aspect, zn, zf);
500 
501     D3DXMatrixIdentity(pout);
502     pout->u.m[0][0] = 1.0f / (aspect * tanf(fovy/2.0f));
503     pout->u.m[1][1] = 1.0f / tanf(fovy/2.0f);
504     pout->u.m[2][2] = zf / (zn - zf);
505     pout->u.m[2][3] = -1.0f;
506     pout->u.m[3][2] = (zf * zn) / (zn - zf);
507     pout->u.m[3][3] = 0.0f;
508     return pout;
509 }
510 
511 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveLH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
512 {
513     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
514 
515     D3DXMatrixIdentity(pout);
516     pout->u.m[0][0] = 2.0f * zn / w;
517     pout->u.m[1][1] = 2.0f * zn / h;
518     pout->u.m[2][2] = zf / (zf - zn);
519     pout->u.m[3][2] = (zn * zf) / (zn - zf);
520     pout->u.m[2][3] = 1.0f;
521     pout->u.m[3][3] = 0.0f;
522     return pout;
523 }
524 
525 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveOffCenterLH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
526 {
527     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
528 
529     D3DXMatrixIdentity(pout);
530     pout->u.m[0][0] = 2.0f * zn / (r - l);
531     pout->u.m[1][1] = -2.0f * zn / (b - t);
532     pout->u.m[2][0] = -1.0f - 2.0f * l / (r - l);
533     pout->u.m[2][1] = 1.0f + 2.0f * t / (b - t);
534     pout->u.m[2][2] = - zf / (zn - zf);
535     pout->u.m[3][2] = (zn * zf) / (zn -zf);
536     pout->u.m[2][3] = 1.0f;
537     pout->u.m[3][3] = 0.0f;
538     return pout;
539 }
540 
541 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveOffCenterRH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
542 {
543     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
544 
545     D3DXMatrixIdentity(pout);
546     pout->u.m[0][0] = 2.0f * zn / (r - l);
547     pout->u.m[1][1] = -2.0f * zn / (b - t);
548     pout->u.m[2][0] = 1.0f + 2.0f * l / (r - l);
549     pout->u.m[2][1] = -1.0f -2.0f * t / (b - t);
550     pout->u.m[2][2] = zf / (zn - zf);
551     pout->u.m[3][2] = (zn * zf) / (zn -zf);
552     pout->u.m[2][3] = -1.0f;
553     pout->u.m[3][3] = 0.0f;
554     return pout;
555 }
556 
557 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveRH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
558 {
559     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
560 
561     D3DXMatrixIdentity(pout);
562     pout->u.m[0][0] = 2.0f * zn / w;
563     pout->u.m[1][1] = 2.0f * zn / h;
564     pout->u.m[2][2] = zf / (zn - zf);
565     pout->u.m[3][2] = (zn * zf) / (zn - zf);
566     pout->u.m[2][3] = -1.0f;
567     pout->u.m[3][3] = 0.0f;
568     return pout;
569 }
570 
571 D3DXMATRIX* WINAPI D3DXMatrixReflect(D3DXMATRIX *pout, const D3DXPLANE *pplane)
572 {
573     D3DXPLANE Nplane;
574 
575     TRACE("pout %p, pplane %p\n", pout, pplane);
576 
577     D3DXPlaneNormalize(&Nplane, pplane);
578     D3DXMatrixIdentity(pout);
579     pout->u.m[0][0] = 1.0f - 2.0f * Nplane.a * Nplane.a;
580     pout->u.m[0][1] = -2.0f * Nplane.a * Nplane.b;
581     pout->u.m[0][2] = -2.0f * Nplane.a * Nplane.c;
582     pout->u.m[1][0] = -2.0f * Nplane.a * Nplane.b;
583     pout->u.m[1][1] = 1.0f - 2.0f * Nplane.b * Nplane.b;
584     pout->u.m[1][2] = -2.0f * Nplane.b * Nplane.c;
585     pout->u.m[2][0] = -2.0f * Nplane.c * Nplane.a;
586     pout->u.m[2][1] = -2.0f * Nplane.c * Nplane.b;
587     pout->u.m[2][2] = 1.0f - 2.0f * Nplane.c * Nplane.c;
588     pout->u.m[3][0] = -2.0f * Nplane.d * Nplane.a;
589     pout->u.m[3][1] = -2.0f * Nplane.d * Nplane.b;
590     pout->u.m[3][2] = -2.0f * Nplane.d * Nplane.c;
591     return pout;
592 }
593 
594 D3DXMATRIX * WINAPI D3DXMatrixRotationAxis(D3DXMATRIX *out, const D3DXVECTOR3 *v, FLOAT angle)
595 {
596     D3DXVECTOR3 nv;
597     FLOAT sangle, cangle, cdiff;
598 
599     TRACE("out %p, v %p, angle %f\n", out, v, angle);
600 
601     D3DXVec3Normalize(&nv, v);
602     sangle = sinf(angle);
603     cangle = cosf(angle);
604     cdiff = 1.0f - cangle;
605 
606     out->u.m[0][0] = cdiff * nv.x * nv.x + cangle;
607     out->u.m[1][0] = cdiff * nv.x * nv.y - sangle * nv.z;
608     out->u.m[2][0] = cdiff * nv.x * nv.z + sangle * nv.y;
609     out->u.m[3][0] = 0.0f;
610     out->u.m[0][1] = cdiff * nv.y * nv.x + sangle * nv.z;
611     out->u.m[1][1] = cdiff * nv.y * nv.y + cangle;
612     out->u.m[2][1] = cdiff * nv.y * nv.z - sangle * nv.x;
613     out->u.m[3][1] = 0.0f;
614     out->u.m[0][2] = cdiff * nv.z * nv.x - sangle * nv.y;
615     out->u.m[1][2] = cdiff * nv.z * nv.y + sangle * nv.x;
616     out->u.m[2][2] = cdiff * nv.z * nv.z + cangle;
617     out->u.m[3][2] = 0.0f;
618     out->u.m[0][3] = 0.0f;
619     out->u.m[1][3] = 0.0f;
620     out->u.m[2][3] = 0.0f;
621     out->u.m[3][3] = 1.0f;
622 
623     return out;
624 }
625 
626 D3DXMATRIX* WINAPI D3DXMatrixRotationQuaternion(D3DXMATRIX *pout, const D3DXQUATERNION *pq)
627 {
628     TRACE("pout %p, pq %p\n", pout, pq);
629 
630     D3DXMatrixIdentity(pout);
631     pout->u.m[0][0] = 1.0f - 2.0f * (pq->y * pq->y + pq->z * pq->z);
632     pout->u.m[0][1] = 2.0f * (pq->x *pq->y + pq->z * pq->w);
633     pout->u.m[0][2] = 2.0f * (pq->x * pq->z - pq->y * pq->w);
634     pout->u.m[1][0] = 2.0f * (pq->x * pq->y - pq->z * pq->w);
635     pout->u.m[1][1] = 1.0f - 2.0f * (pq->x * pq->x + pq->z * pq->z);
636     pout->u.m[1][2] = 2.0f * (pq->y *pq->z + pq->x *pq->w);
637     pout->u.m[2][0] = 2.0f * (pq->x * pq->z + pq->y * pq->w);
638     pout->u.m[2][1] = 2.0f * (pq->y *pq->z - pq->x *pq->w);
639     pout->u.m[2][2] = 1.0f - 2.0f * (pq->x * pq->x + pq->y * pq->y);
640     return pout;
641 }
642 
643 D3DXMATRIX* WINAPI D3DXMatrixRotationX(D3DXMATRIX *pout, FLOAT angle)
644 {
645     TRACE("pout %p, angle %f\n", pout, angle);
646 
647     D3DXMatrixIdentity(pout);
648     pout->u.m[1][1] = cosf(angle);
649     pout->u.m[2][2] = cosf(angle);
650     pout->u.m[1][2] = sinf(angle);
651     pout->u.m[2][1] = -sinf(angle);
652     return pout;
653 }
654 
655 D3DXMATRIX* WINAPI D3DXMatrixRotationY(D3DXMATRIX *pout, FLOAT angle)
656 {
657     TRACE("pout %p, angle %f\n", pout, angle);
658 
659     D3DXMatrixIdentity(pout);
660     pout->u.m[0][0] = cosf(angle);
661     pout->u.m[2][2] = cosf(angle);
662     pout->u.m[0][2] = -sinf(angle);
663     pout->u.m[2][0] = sinf(angle);
664     return pout;
665 }
666 
667 D3DXMATRIX * WINAPI D3DXMatrixRotationYawPitchRoll(D3DXMATRIX *out, FLOAT yaw, FLOAT pitch, FLOAT roll)
668 {
669     FLOAT sroll, croll, spitch, cpitch, syaw, cyaw;
670 
671     TRACE("out %p, yaw %f, pitch %f, roll %f\n", out, yaw, pitch, roll);
672 
673     sroll = sinf(roll);
674     croll = cosf(roll);
675     spitch = sinf(pitch);
676     cpitch = cosf(pitch);
677     syaw = sinf(yaw);
678     cyaw = cosf(yaw);
679 
680     out->u.m[0][0] = sroll * spitch * syaw + croll * cyaw;
681     out->u.m[0][1] = sroll * cpitch;
682     out->u.m[0][2] = sroll * spitch * cyaw - croll * syaw;
683     out->u.m[0][3] = 0.0f;
684     out->u.m[1][0] = croll * spitch * syaw - sroll * cyaw;
685     out->u.m[1][1] = croll * cpitch;
686     out->u.m[1][2] = croll * spitch * cyaw + sroll * syaw;
687     out->u.m[1][3] = 0.0f;
688     out->u.m[2][0] = cpitch * syaw;
689     out->u.m[2][1] = -spitch;
690     out->u.m[2][2] = cpitch * cyaw;
691     out->u.m[2][3] = 0.0f;
692     out->u.m[3][0] = 0.0f;
693     out->u.m[3][1] = 0.0f;
694     out->u.m[3][2] = 0.0f;
695     out->u.m[3][3] = 1.0f;
696 
697     return out;
698 }
699 
700 D3DXMATRIX* WINAPI D3DXMatrixRotationZ(D3DXMATRIX *pout, FLOAT angle)
701 {
702     TRACE("pout %p, angle %f\n", pout, angle);
703 
704     D3DXMatrixIdentity(pout);
705     pout->u.m[0][0] = cosf(angle);
706     pout->u.m[1][1] = cosf(angle);
707     pout->u.m[0][1] = sinf(angle);
708     pout->u.m[1][0] = -sinf(angle);
709     return pout;
710 }
711 
712 D3DXMATRIX* WINAPI D3DXMatrixScaling(D3DXMATRIX *pout, FLOAT sx, FLOAT sy, FLOAT sz)
713 {
714     TRACE("pout %p, sx %f, sy %f, sz %f\n", pout, sx, sy, sz);
715 
716     D3DXMatrixIdentity(pout);
717     pout->u.m[0][0] = sx;
718     pout->u.m[1][1] = sy;
719     pout->u.m[2][2] = sz;
720     return pout;
721 }
722 
723 D3DXMATRIX* WINAPI D3DXMatrixShadow(D3DXMATRIX *pout, const D3DXVECTOR4 *plight, const D3DXPLANE *pplane)
724 {
725     D3DXPLANE Nplane;
726     FLOAT dot;
727 
728     TRACE("pout %p, plight %p, pplane %p\n", pout, plight, pplane);
729 
730     D3DXPlaneNormalize(&Nplane, pplane);
731     dot = D3DXPlaneDot(&Nplane, plight);
732     pout->u.m[0][0] = dot - Nplane.a * plight->x;
733     pout->u.m[0][1] = -Nplane.a * plight->y;
734     pout->u.m[0][2] = -Nplane.a * plight->z;
735     pout->u.m[0][3] = -Nplane.a * plight->w;
736     pout->u.m[1][0] = -Nplane.b * plight->x;
737     pout->u.m[1][1] = dot - Nplane.b * plight->y;
738     pout->u.m[1][2] = -Nplane.b * plight->z;
739     pout->u.m[1][3] = -Nplane.b * plight->w;
740     pout->u.m[2][0] = -Nplane.c * plight->x;
741     pout->u.m[2][1] = -Nplane.c * plight->y;
742     pout->u.m[2][2] = dot - Nplane.c * plight->z;
743     pout->u.m[2][3] = -Nplane.c * plight->w;
744     pout->u.m[3][0] = -Nplane.d * plight->x;
745     pout->u.m[3][1] = -Nplane.d * plight->y;
746     pout->u.m[3][2] = -Nplane.d * plight->z;
747     pout->u.m[3][3] = dot - Nplane.d * plight->w;
748     return pout;
749 }
750 
751 D3DXMATRIX * WINAPI D3DXMatrixTransformation(D3DXMATRIX *out, const D3DXVECTOR3 *scaling_center,
752         const D3DXQUATERNION *scaling_rotation, const D3DXVECTOR3 *scaling,
753         const D3DXVECTOR3 *rotation_center, const D3DXQUATERNION *rotation,
754         const D3DXVECTOR3 *translation)
755 {
756     static const D3DXVECTOR3 zero_vector;
757     D3DXMATRIX m1, msr1, ms, msr, msc, mrc1, mr, mrc, mt;
758     D3DXVECTOR3 sc, rc;
759     D3DXQUATERNION q;
760 
761     TRACE("out %p, scaling_center %p, scaling_rotation %p, scaling %p, rotation_center %p,"
762             " rotation %p, translation %p.\n",
763             out, scaling_center, scaling_rotation, scaling, rotation_center, rotation, translation);
764 
765     if (scaling)
766     {
767         sc = scaling_center ? *scaling_center : zero_vector;
768         D3DXMatrixTranslation(&m1, -sc.x, -sc.y, -sc.z);
769         if (scaling_rotation)
770         {
771             q.x = -scaling_rotation->x;
772             q.y = -scaling_rotation->y;
773             q.z = -scaling_rotation->z;
774             q.w = scaling_rotation->w;
775             D3DXMatrixRotationQuaternion(&msr1, &q);
776             D3DXMatrixMultiply(&m1, &m1, &msr1);
777         }
778         D3DXMatrixScaling(&ms, scaling->x, scaling->y, scaling->z);
779         D3DXMatrixMultiply(&m1, &m1, &ms);
780         if (scaling_rotation)
781         {
782             D3DXMatrixRotationQuaternion(&msr, scaling_rotation);
783             D3DXMatrixMultiply(&m1, &m1, &msr);
784         }
785         D3DXMatrixTranslation(&msc, sc.x, sc.y, sc.z);
786         D3DXMatrixMultiply(&m1, &m1, &msc);
787     }
788     else
789     {
790         D3DXMatrixIdentity(&m1);
791     }
792 
793     if (rotation)
794     {
795         rc = rotation_center ? *rotation_center : zero_vector;
796         D3DXMatrixTranslation(&mrc1, -rc.x, -rc.y, -rc.z);
797         D3DXMatrixMultiply(&m1, &m1, &mrc1);
798         D3DXMatrixRotationQuaternion(&mr, rotation);
799         D3DXMatrixMultiply(&m1, &m1, &mr);
800         D3DXMatrixTranslation(&mrc, rc.x, rc.y, rc.z);
801         D3DXMatrixMultiply(&m1, &m1, &mrc);
802     }
803 
804     if (translation)
805     {
806         D3DXMatrixTranslation(&mt, translation->x, translation->y, translation->z);
807         D3DXMatrixMultiply(out, &m1, &mt);
808     }
809     else
810     {
811         *out = m1;
812     }
813 
814     return out;
815 }
816 
817 static void vec3_from_vec2(D3DXVECTOR3 *v3, const D3DXVECTOR2 *v2)
818 {
819     if (!v2)
820         return;
821 
822     v3->x = v2->x;
823     v3->y = v2->y;
824     v3->z = 0.0f;
825 }
826 
827 D3DXMATRIX * WINAPI D3DXMatrixTransformation2D(D3DXMATRIX *out, const D3DXVECTOR2 *scaling_center,
828         float scaling_rotation, const D3DXVECTOR2 *scaling, const D3DXVECTOR2 *rotation_center,
829         float rotation, const D3DXVECTOR2 *translation)
830 {
831     D3DXVECTOR3 r_c, s, s_c, t;
832     D3DXQUATERNION r, s_r;
833 
834     TRACE("out %p, scaling_center %p, scaling_rotation %.8e, scaling %p, rotation_center %p, "
835             "rotation %.8e, translation %p.\n",
836             out, scaling_center, scaling_rotation, scaling, rotation_center, rotation, translation);
837 
838     vec3_from_vec2(&s_c, scaling_center);
839     vec3_from_vec2(&s, scaling);
840     if (scaling)
841         s.z = 1.0f;
842     vec3_from_vec2(&r_c, rotation_center);
843     vec3_from_vec2(&t, translation);
844 
845     if (rotation)
846     {
847         r.w = cosf(rotation / 2.0f);
848         r.x = 0.0f;
849         r.y = 0.0f;
850         r.z = sinf(rotation / 2.0f);
851     }
852 
853     if (scaling_rotation)
854     {
855         s_r.w = cosf(scaling_rotation / 2.0f);
856         s_r.x = 0.0f;
857         s_r.y = 0.0f;
858         s_r.z = sinf(scaling_rotation / 2.0f);
859     }
860 
861     return D3DXMatrixTransformation(out, scaling_center ? &s_c : NULL,
862             scaling_rotation ? &s_r : NULL, scaling ? &s : NULL, rotation_center ? &r_c: NULL,
863             rotation ? &r : NULL, translation ? &t : NULL);
864 }
865 
866 D3DXMATRIX* WINAPI D3DXMatrixTranslation(D3DXMATRIX *pout, FLOAT x, FLOAT y, FLOAT z)
867 {
868     TRACE("pout %p, x %f, y %f, z %f\n", pout, x, y, z);
869 
870     D3DXMatrixIdentity(pout);
871     pout->u.m[3][0] = x;
872     pout->u.m[3][1] = y;
873     pout->u.m[3][2] = z;
874     return pout;
875 }
876 
877 D3DXMATRIX* WINAPI D3DXMatrixTranspose(D3DXMATRIX *pout, const D3DXMATRIX *pm)
878 {
879     const D3DXMATRIX m = *pm;
880     int i,j;
881 
882     TRACE("pout %p, pm %p\n", pout, pm);
883 
884     for (i=0; i<4; i++)
885         for (j=0; j<4; j++) pout->u.m[i][j] = m.u.m[j][i];
886 
887     return pout;
888 }
889 
890 /*_________________D3DXMatrixStack____________________*/
891 
892 
893 static inline struct ID3DXMatrixStackImpl *impl_from_ID3DXMatrixStack(ID3DXMatrixStack *iface)
894 {
895   return CONTAINING_RECORD(iface, struct ID3DXMatrixStackImpl, ID3DXMatrixStack_iface);
896 }
897 
898 static HRESULT WINAPI ID3DXMatrixStackImpl_QueryInterface(ID3DXMatrixStack *iface, REFIID riid, void **out)
899 {
900     TRACE("iface %p, riid %s, out %p.\n", iface, debugstr_guid(riid), out);
901 
902     if (IsEqualGUID(riid, &IID_ID3DXMatrixStack)
903             || IsEqualGUID(riid, &IID_IUnknown))
904     {
905         ID3DXMatrixStack_AddRef(iface);
906         *out = iface;
907         return S_OK;
908     }
909 
910     WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));
911 
912     *out = NULL;
913     return E_NOINTERFACE;
914 }
915 
916 static ULONG WINAPI ID3DXMatrixStackImpl_AddRef(ID3DXMatrixStack *iface)
917 {
918     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
919     ULONG ref = InterlockedIncrement(&This->ref);
920     TRACE("(%p) : AddRef from %d\n", This, ref - 1);
921     return ref;
922 }
923 
924 static ULONG WINAPI ID3DXMatrixStackImpl_Release(ID3DXMatrixStack *iface)
925 {
926     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
927     ULONG ref = InterlockedDecrement(&This->ref);
928     if (!ref)
929     {
930         HeapFree(GetProcessHeap(), 0, This->stack);
931         HeapFree(GetProcessHeap(), 0, This);
932     }
933     TRACE("(%p) : ReleaseRef to %d\n", This, ref);
934     return ref;
935 }
936 
937 static D3DXMATRIX* WINAPI ID3DXMatrixStackImpl_GetTop(ID3DXMatrixStack *iface)
938 {
939     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
940 
941     TRACE("iface %p\n", iface);
942 
943     return &This->stack[This->current];
944 }
945 
946 static HRESULT WINAPI ID3DXMatrixStackImpl_LoadIdentity(ID3DXMatrixStack *iface)
947 {
948     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
949 
950     TRACE("iface %p\n", iface);
951 
952     D3DXMatrixIdentity(&This->stack[This->current]);
953 
954     return D3D_OK;
955 }
956 
957 static HRESULT WINAPI ID3DXMatrixStackImpl_LoadMatrix(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
958 {
959     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
960 
961     TRACE("iface %p, pm %p\n", iface, pm);
962 
963     This->stack[This->current] = *pm;
964 
965     return D3D_OK;
966 }
967 
968 static HRESULT WINAPI ID3DXMatrixStackImpl_MultMatrix(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
969 {
970     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
971 
972     TRACE("iface %p, pm %p\n", iface, pm);
973 
974     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], pm);
975 
976     return D3D_OK;
977 }
978 
979 static HRESULT WINAPI ID3DXMatrixStackImpl_MultMatrixLocal(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
980 {
981     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
982 
983     TRACE("iface %p, pm %p\n", iface, pm);
984 
985     D3DXMatrixMultiply(&This->stack[This->current], pm, &This->stack[This->current]);
986 
987     return D3D_OK;
988 }
989 
990 static HRESULT WINAPI ID3DXMatrixStackImpl_Pop(ID3DXMatrixStack *iface)
991 {
992     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
993 
994     TRACE("iface %p\n", iface);
995 
996     /* Popping the last element on the stack returns D3D_OK, but does nothing. */
997     if (!This->current) return D3D_OK;
998 
999     if (This->current <= This->stack_size / 4 && This->stack_size >= INITIAL_STACK_SIZE * 2)
1000     {
1001         unsigned int new_size;
1002         D3DXMATRIX *new_stack;
1003 
1004         new_size = This->stack_size / 2;
1005         new_stack = HeapReAlloc(GetProcessHeap(), 0, This->stack, new_size * sizeof(*new_stack));
1006         if (new_stack)
1007         {
1008             This->stack_size = new_size;
1009             This->stack = new_stack;
1010         }
1011     }
1012 
1013     --This->current;
1014 
1015     return D3D_OK;
1016 }
1017 
1018 static HRESULT WINAPI ID3DXMatrixStackImpl_Push(ID3DXMatrixStack *iface)
1019 {
1020     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1021 
1022     TRACE("iface %p\n", iface);
1023 
1024     if (This->current == This->stack_size - 1)
1025     {
1026         unsigned int new_size;
1027         D3DXMATRIX *new_stack;
1028 
1029         if (This->stack_size > UINT_MAX / 2) return E_OUTOFMEMORY;
1030 
1031         new_size = This->stack_size * 2;
1032         new_stack = HeapReAlloc(GetProcessHeap(), 0, This->stack, new_size * sizeof(*new_stack));
1033         if (!new_stack) return E_OUTOFMEMORY;
1034 
1035         This->stack_size = new_size;
1036         This->stack = new_stack;
1037     }
1038 
1039     ++This->current;
1040     This->stack[This->current] = This->stack[This->current - 1];
1041 
1042     return D3D_OK;
1043 }
1044 
1045 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateAxis(ID3DXMatrixStack *iface, const D3DXVECTOR3 *pv, FLOAT angle)
1046 {
1047     D3DXMATRIX temp;
1048     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1049 
1050     TRACE("iface %p, pv %p, angle %f\n", iface, pv, angle);
1051 
1052     D3DXMatrixRotationAxis(&temp, pv, angle);
1053     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1054 
1055     return D3D_OK;
1056 }
1057 
1058 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateAxisLocal(ID3DXMatrixStack *iface, const D3DXVECTOR3 *pv, FLOAT angle)
1059 {
1060     D3DXMATRIX temp;
1061     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1062 
1063     TRACE("iface %p, pv %p, angle %f\n", iface, pv, angle);
1064 
1065     D3DXMatrixRotationAxis(&temp, pv, angle);
1066     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1067 
1068     return D3D_OK;
1069 }
1070 
1071 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateYawPitchRoll(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1072 {
1073     D3DXMATRIX temp;
1074     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1075 
1076     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1077 
1078     D3DXMatrixRotationYawPitchRoll(&temp, x, y, z);
1079     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1080 
1081     return D3D_OK;
1082 }
1083 
1084 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateYawPitchRollLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1085 {
1086     D3DXMATRIX temp;
1087     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1088 
1089     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1090 
1091     D3DXMatrixRotationYawPitchRoll(&temp, x, y, z);
1092     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1093 
1094     return D3D_OK;
1095 }
1096 
1097 static HRESULT WINAPI ID3DXMatrixStackImpl_Scale(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1098 {
1099     D3DXMATRIX temp;
1100     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1101 
1102     TRACE("iface %p,x %f, y %f, z %f\n", iface, x, y, z);
1103 
1104     D3DXMatrixScaling(&temp, x, y, z);
1105     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1106 
1107     return D3D_OK;
1108 }
1109 
1110 static HRESULT WINAPI ID3DXMatrixStackImpl_ScaleLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1111 {
1112     D3DXMATRIX temp;
1113     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1114 
1115     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1116 
1117     D3DXMatrixScaling(&temp, x, y, z);
1118     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1119 
1120     return D3D_OK;
1121 }
1122 
1123 static HRESULT WINAPI ID3DXMatrixStackImpl_Translate(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1124 {
1125     D3DXMATRIX temp;
1126     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1127 
1128     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1129 
1130     D3DXMatrixTranslation(&temp, x, y, z);
1131     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1132 
1133     return D3D_OK;
1134 }
1135 
1136 static HRESULT WINAPI ID3DXMatrixStackImpl_TranslateLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1137 {
1138     D3DXMATRIX temp;
1139     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1140 
1141     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1142 
1143     D3DXMatrixTranslation(&temp, x, y, z);
1144     D3DXMatrixMultiply(&This->stack[This->current], &temp,&This->stack[This->current]);
1145 
1146     return D3D_OK;
1147 }
1148 
1149 static const ID3DXMatrixStackVtbl ID3DXMatrixStack_Vtbl =
1150 {
1151     ID3DXMatrixStackImpl_QueryInterface,
1152     ID3DXMatrixStackImpl_AddRef,
1153     ID3DXMatrixStackImpl_Release,
1154     ID3DXMatrixStackImpl_Pop,
1155     ID3DXMatrixStackImpl_Push,
1156     ID3DXMatrixStackImpl_LoadIdentity,
1157     ID3DXMatrixStackImpl_LoadMatrix,
1158     ID3DXMatrixStackImpl_MultMatrix,
1159     ID3DXMatrixStackImpl_MultMatrixLocal,
1160     ID3DXMatrixStackImpl_RotateAxis,
1161     ID3DXMatrixStackImpl_RotateAxisLocal,
1162     ID3DXMatrixStackImpl_RotateYawPitchRoll,
1163     ID3DXMatrixStackImpl_RotateYawPitchRollLocal,
1164     ID3DXMatrixStackImpl_Scale,
1165     ID3DXMatrixStackImpl_ScaleLocal,
1166     ID3DXMatrixStackImpl_Translate,
1167     ID3DXMatrixStackImpl_TranslateLocal,
1168     ID3DXMatrixStackImpl_GetTop
1169 };
1170 
1171 HRESULT WINAPI D3DXCreateMatrixStack(DWORD flags, ID3DXMatrixStack **stack)
1172 {
1173     struct ID3DXMatrixStackImpl *object;
1174 
1175     TRACE("flags %#x, stack %p.\n", flags, stack);
1176 
1177     if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object))))
1178     {
1179         *stack = NULL;
1180         return E_OUTOFMEMORY;
1181     }
1182     object->ID3DXMatrixStack_iface.lpVtbl = &ID3DXMatrixStack_Vtbl;
1183     object->ref = 1;
1184 
1185     if (!(object->stack = HeapAlloc(GetProcessHeap(), 0, INITIAL_STACK_SIZE * sizeof(*object->stack))))
1186     {
1187         HeapFree(GetProcessHeap(), 0, object);
1188         *stack = NULL;
1189         return E_OUTOFMEMORY;
1190     }
1191 
1192     object->current = 0;
1193     object->stack_size = INITIAL_STACK_SIZE;
1194     D3DXMatrixIdentity(&object->stack[0]);
1195 
1196     TRACE("Created matrix stack %p.\n", object);
1197 
1198     *stack = &object->ID3DXMatrixStack_iface;
1199     return D3D_OK;
1200 }
1201 
1202 /*_________________D3DXPLANE________________*/
1203 
1204 D3DXPLANE* WINAPI D3DXPlaneFromPointNormal(D3DXPLANE *pout, const D3DXVECTOR3 *pvpoint, const D3DXVECTOR3 *pvnormal)
1205 {
1206     TRACE("pout %p, pvpoint %p, pvnormal %p\n", pout, pvpoint, pvnormal);
1207 
1208     pout->a = pvnormal->x;
1209     pout->b = pvnormal->y;
1210     pout->c = pvnormal->z;
1211     pout->d = -D3DXVec3Dot(pvpoint, pvnormal);
1212     return pout;
1213 }
1214 
1215 D3DXPLANE* WINAPI D3DXPlaneFromPoints(D3DXPLANE *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3)
1216 {
1217     D3DXVECTOR3 edge1, edge2, normal, Nnormal;
1218 
1219     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p\n", pout, pv1, pv2, pv3);
1220 
1221     edge1.x = 0.0f; edge1.y = 0.0f; edge1.z = 0.0f;
1222     edge2.x = 0.0f; edge2.y = 0.0f; edge2.z = 0.0f;
1223     D3DXVec3Subtract(&edge1, pv2, pv1);
1224     D3DXVec3Subtract(&edge2, pv3, pv1);
1225     D3DXVec3Cross(&normal, &edge1, &edge2);
1226     D3DXVec3Normalize(&Nnormal, &normal);
1227     D3DXPlaneFromPointNormal(pout, pv1, &Nnormal);
1228     return pout;
1229 }
1230 
1231 D3DXVECTOR3* WINAPI D3DXPlaneIntersectLine(D3DXVECTOR3 *pout, const D3DXPLANE *pp, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2)
1232 {
1233     D3DXVECTOR3 direction, normal;
1234     FLOAT dot, temp;
1235 
1236     TRACE("pout %p, pp %p, pv1 %p, pv2 %p\n", pout, pp, pv1, pv2);
1237 
1238     normal.x = pp->a;
1239     normal.y = pp->b;
1240     normal.z = pp->c;
1241     direction.x = pv2->x - pv1->x;
1242     direction.y = pv2->y - pv1->y;
1243     direction.z = pv2->z - pv1->z;
1244     dot = D3DXVec3Dot(&normal, &direction);
1245     if ( !dot ) return NULL;
1246     temp = ( pp->d + D3DXVec3Dot(&normal, pv1) ) / dot;
1247     pout->x = pv1->x - temp * direction.x;
1248     pout->y = pv1->y - temp * direction.y;
1249     pout->z = pv1->z - temp * direction.z;
1250     return pout;
1251 }
1252 
1253 D3DXPLANE * WINAPI D3DXPlaneNormalize(D3DXPLANE *out, const D3DXPLANE *p)
1254 {
1255     FLOAT norm;
1256 
1257     TRACE("out %p, p %p\n", out, p);
1258 
1259     norm = sqrtf(p->a * p->a + p->b * p->b + p->c * p->c);
1260     if (norm)
1261     {
1262         out->a = p->a / norm;
1263         out->b = p->b / norm;
1264         out->c = p->c / norm;
1265         out->d = p->d / norm;
1266     }
1267     else
1268     {
1269         out->a = 0.0f;
1270         out->b = 0.0f;
1271         out->c = 0.0f;
1272         out->d = 0.0f;
1273     }
1274 
1275     return out;
1276 }
1277 
1278 D3DXPLANE* WINAPI D3DXPlaneTransform(D3DXPLANE *pout, const D3DXPLANE *pplane, const D3DXMATRIX *pm)
1279 {
1280     const D3DXPLANE plane = *pplane;
1281 
1282     TRACE("pout %p, pplane %p, pm %p\n", pout, pplane, pm);
1283 
1284     pout->a = pm->u.m[0][0] * plane.a + pm->u.m[1][0] * plane.b + pm->u.m[2][0] * plane.c + pm->u.m[3][0] * plane.d;
1285     pout->b = pm->u.m[0][1] * plane.a + pm->u.m[1][1] * plane.b + pm->u.m[2][1] * plane.c + pm->u.m[3][1] * plane.d;
1286     pout->c = pm->u.m[0][2] * plane.a + pm->u.m[1][2] * plane.b + pm->u.m[2][2] * plane.c + pm->u.m[3][2] * plane.d;
1287     pout->d = pm->u.m[0][3] * plane.a + pm->u.m[1][3] * plane.b + pm->u.m[2][3] * plane.c + pm->u.m[3][3] * plane.d;
1288     return pout;
1289 }
1290 
1291 D3DXPLANE* WINAPI D3DXPlaneTransformArray(D3DXPLANE* out, UINT outstride, const D3DXPLANE* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1292 {
1293     UINT i;
1294 
1295     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1296 
1297     for (i = 0; i < elements; ++i) {
1298         D3DXPlaneTransform(
1299             (D3DXPLANE*)((char*)out + outstride * i),
1300             (const D3DXPLANE*)((const char*)in + instride * i),
1301             matrix);
1302     }
1303     return out;
1304 }
1305 
1306 /*_________________D3DXQUATERNION________________*/
1307 
1308 D3DXQUATERNION* WINAPI D3DXQuaternionBaryCentric(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3, FLOAT f, FLOAT g)
1309 {
1310     D3DXQUATERNION temp1, temp2;
1311 
1312      TRACE("pout %p, pq1 %p, pq2 %p, pq3 %p, f %f, g %f\n", pout, pq1, pq2, pq3, f, g);
1313 
1314     D3DXQuaternionSlerp(pout, D3DXQuaternionSlerp(&temp1, pq1, pq2, f + g), D3DXQuaternionSlerp(&temp2, pq1, pq3, f+g), g / (f + g));
1315     return pout;
1316 }
1317 
1318 D3DXQUATERNION * WINAPI D3DXQuaternionExp(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1319 {
1320     FLOAT norm;
1321 
1322     TRACE("out %p, q %p\n", out, q);
1323 
1324     norm = sqrtf(q->x * q->x + q->y * q->y + q->z * q->z);
1325     if (norm)
1326     {
1327         out->x = sinf(norm) * q->x / norm;
1328         out->y = sinf(norm) * q->y / norm;
1329         out->z = sinf(norm) * q->z / norm;
1330         out->w = cosf(norm);
1331     }
1332     else
1333     {
1334         out->x = 0.0f;
1335         out->y = 0.0f;
1336         out->z = 0.0f;
1337         out->w = 1.0f;
1338     }
1339 
1340     return out;
1341 }
1342 
1343 D3DXQUATERNION* WINAPI D3DXQuaternionInverse(D3DXQUATERNION *pout, const D3DXQUATERNION *pq)
1344 {
1345     FLOAT norm;
1346 
1347     TRACE("pout %p, pq %p\n", pout, pq);
1348 
1349     norm = D3DXQuaternionLengthSq(pq);
1350 
1351     pout->x = -pq->x / norm;
1352     pout->y = -pq->y / norm;
1353     pout->z = -pq->z / norm;
1354     pout->w = pq->w / norm;
1355     return pout;
1356 }
1357 
1358 D3DXQUATERNION * WINAPI D3DXQuaternionLn(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1359 {
1360     FLOAT t;
1361 
1362     TRACE("out %p, q %p\n", out, q);
1363 
1364     if ((q->w >= 1.0f) || (q->w == -1.0f))
1365         t = 1.0f;
1366     else
1367         t = acosf(q->w) / sqrtf(1.0f - q->w * q->w);
1368 
1369     out->x = t * q->x;
1370     out->y = t * q->y;
1371     out->z = t * q->z;
1372     out->w = 0.0f;
1373 
1374     return out;
1375 }
1376 
1377 D3DXQUATERNION* WINAPI D3DXQuaternionMultiply(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2)
1378 {
1379     D3DXQUATERNION out;
1380 
1381     TRACE("pout %p, pq1 %p, pq2 %p\n", pout, pq1, pq2);
1382 
1383     out.x = pq2->w * pq1->x + pq2->x * pq1->w + pq2->y * pq1->z - pq2->z * pq1->y;
1384     out.y = pq2->w * pq1->y - pq2->x * pq1->z + pq2->y * pq1->w + pq2->z * pq1->x;
1385     out.z = pq2->w * pq1->z + pq2->x * pq1->y - pq2->y * pq1->x + pq2->z * pq1->w;
1386     out.w = pq2->w * pq1->w - pq2->x * pq1->x - pq2->y * pq1->y - pq2->z * pq1->z;
1387     *pout = out;
1388     return pout;
1389 }
1390 
1391 D3DXQUATERNION * WINAPI D3DXQuaternionNormalize(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1392 {
1393     FLOAT norm;
1394 
1395     TRACE("out %p, q %p\n", out, q);
1396 
1397     norm = D3DXQuaternionLength(q);
1398 
1399     out->x = q->x / norm;
1400     out->y = q->y / norm;
1401     out->z = q->z / norm;
1402     out->w = q->w / norm;
1403 
1404     return out;
1405 }
1406 
1407 D3DXQUATERNION * WINAPI D3DXQuaternionRotationAxis(D3DXQUATERNION *out, const D3DXVECTOR3 *v, FLOAT angle)
1408 {
1409     D3DXVECTOR3 temp;
1410 
1411     TRACE("out %p, v %p, angle %f\n", out, v, angle);
1412 
1413     D3DXVec3Normalize(&temp, v);
1414 
1415     out->x = sinf(angle / 2.0f) * temp.x;
1416     out->y = sinf(angle / 2.0f) * temp.y;
1417     out->z = sinf(angle / 2.0f) * temp.z;
1418     out->w = cosf(angle / 2.0f);
1419 
1420     return out;
1421 }
1422 
1423 D3DXQUATERNION * WINAPI D3DXQuaternionRotationMatrix(D3DXQUATERNION *out, const D3DXMATRIX *m)
1424 {
1425     FLOAT s, trace;
1426 
1427     TRACE("out %p, m %p\n", out, m);
1428 
1429     trace = m->u.m[0][0] + m->u.m[1][1] + m->u.m[2][2] + 1.0f;
1430     if (trace > 1.0f)
1431     {
1432         s = 2.0f * sqrtf(trace);
1433         out->x = (m->u.m[1][2] - m->u.m[2][1]) / s;
1434         out->y = (m->u.m[2][0] - m->u.m[0][2]) / s;
1435         out->z = (m->u.m[0][1] - m->u.m[1][0]) / s;
1436         out->w = 0.25f * s;
1437     }
1438     else
1439     {
1440         int i, maxi = 0;
1441 
1442         for (i = 1; i < 3; i++)
1443         {
1444             if (m->u.m[i][i] > m->u.m[maxi][maxi])
1445                 maxi = i;
1446         }
1447 
1448         switch (maxi)
1449         {
1450             case 0:
1451                 s = 2.0f * sqrtf(1.0f + m->u.m[0][0] - m->u.m[1][1] - m->u.m[2][2]);
1452                 out->x = 0.25f * s;
1453                 out->y = (m->u.m[0][1] + m->u.m[1][0]) / s;
1454                 out->z = (m->u.m[0][2] + m->u.m[2][0]) / s;
1455                 out->w = (m->u.m[1][2] - m->u.m[2][1]) / s;
1456                 break;
1457 
1458             case 1:
1459                 s = 2.0f * sqrtf(1.0f + m->u.m[1][1] - m->u.m[0][0] - m->u.m[2][2]);
1460                 out->x = (m->u.m[0][1] + m->u.m[1][0]) / s;
1461                 out->y = 0.25f * s;
1462                 out->z = (m->u.m[1][2] + m->u.m[2][1]) / s;
1463                 out->w = (m->u.m[2][0] - m->u.m[0][2]) / s;
1464                 break;
1465 
1466             case 2:
1467                 s = 2.0f * sqrtf(1.0f + m->u.m[2][2] - m->u.m[0][0] - m->u.m[1][1]);
1468                 out->x = (m->u.m[0][2] + m->u.m[2][0]) / s;
1469                 out->y = (m->u.m[1][2] + m->u.m[2][1]) / s;
1470                 out->z = 0.25f * s;
1471                 out->w = (m->u.m[0][1] - m->u.m[1][0]) / s;
1472                 break;
1473         }
1474     }
1475 
1476     return out;
1477 }
1478 
1479 D3DXQUATERNION * WINAPI D3DXQuaternionRotationYawPitchRoll(D3DXQUATERNION *out, FLOAT yaw, FLOAT pitch, FLOAT roll)
1480 {
1481     FLOAT syaw, cyaw, spitch, cpitch, sroll, croll;
1482 
1483     TRACE("out %p, yaw %f, pitch %f, roll %f\n", out, yaw, pitch, roll);
1484 
1485     syaw = sinf(yaw / 2.0f);
1486     cyaw = cosf(yaw / 2.0f);
1487     spitch = sinf(pitch / 2.0f);
1488     cpitch = cosf(pitch / 2.0f);
1489     sroll = sinf(roll / 2.0f);
1490     croll = cosf(roll / 2.0f);
1491 
1492     out->x = syaw * cpitch * sroll + cyaw * spitch * croll;
1493     out->y = syaw * cpitch * croll - cyaw * spitch * sroll;
1494     out->z = cyaw * cpitch * sroll - syaw * spitch * croll;
1495     out->w = cyaw * cpitch * croll + syaw * spitch * sroll;
1496 
1497     return out;
1498 }
1499 
1500 D3DXQUATERNION * WINAPI D3DXQuaternionSlerp(D3DXQUATERNION *out, const D3DXQUATERNION *q1,
1501         const D3DXQUATERNION *q2, FLOAT t)
1502 {
1503     FLOAT dot, temp;
1504 
1505     TRACE("out %p, q1 %p, q2 %p, t %f\n", out, q1, q2, t);
1506 
1507     temp = 1.0f - t;
1508     dot = D3DXQuaternionDot(q1, q2);
1509     if (dot < 0.0f)
1510     {
1511         t = -t;
1512         dot = -dot;
1513     }
1514 
1515     if (1.0f - dot > 0.001f)
1516     {
1517         FLOAT theta = acosf(dot);
1518 
1519         temp = sinf(theta * temp) / sinf(theta);
1520         t = sinf(theta * t) / sinf(theta);
1521     }
1522 
1523     out->x = temp * q1->x + t * q2->x;
1524     out->y = temp * q1->y + t * q2->y;
1525     out->z = temp * q1->z + t * q2->z;
1526     out->w = temp * q1->w + t * q2->w;
1527 
1528     return out;
1529 }
1530 
1531 D3DXQUATERNION* WINAPI D3DXQuaternionSquad(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3, const D3DXQUATERNION *pq4, FLOAT t)
1532 {
1533     D3DXQUATERNION temp1, temp2;
1534 
1535     TRACE("pout %p, pq1 %p, pq2 %p, pq3 %p, pq4 %p, t %f\n", pout, pq1, pq2, pq3, pq4, t);
1536 
1537     D3DXQuaternionSlerp(pout, D3DXQuaternionSlerp(&temp1, pq1, pq4, t), D3DXQuaternionSlerp(&temp2, pq2, pq3, t), 2.0f * t * (1.0f - t));
1538     return pout;
1539 }
1540 
1541 static D3DXQUATERNION add_diff(const D3DXQUATERNION *q1, const D3DXQUATERNION *q2, const FLOAT add)
1542 {
1543     D3DXQUATERNION temp;
1544 
1545     temp.x = q1->x + add * q2->x;
1546     temp.y = q1->y + add * q2->y;
1547     temp.z = q1->z + add * q2->z;
1548     temp.w = q1->w + add * q2->w;
1549 
1550     return temp;
1551 }
1552 
1553 void WINAPI D3DXQuaternionSquadSetup(D3DXQUATERNION *paout, D3DXQUATERNION *pbout, D3DXQUATERNION *pcout, const D3DXQUATERNION *pq0, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3)
1554 {
1555     D3DXQUATERNION q, temp1, temp2, temp3, zero;
1556     D3DXQUATERNION aout, cout;
1557 
1558     TRACE("paout %p, pbout %p, pcout %p, pq0 %p, pq1 %p, pq2 %p, pq3 %p\n", paout, pbout, pcout, pq0, pq1, pq2, pq3);
1559 
1560     zero.x = 0.0f;
1561     zero.y = 0.0f;
1562     zero.z = 0.0f;
1563     zero.w = 0.0f;
1564 
1565     if (D3DXQuaternionDot(pq0, pq1) < 0.0f)
1566         temp2 = add_diff(&zero, pq0, -1.0f);
1567     else
1568         temp2 = *pq0;
1569 
1570     if (D3DXQuaternionDot(pq1, pq2) < 0.0f)
1571         cout = add_diff(&zero, pq2, -1.0f);
1572     else
1573         cout = *pq2;
1574 
1575     if (D3DXQuaternionDot(&cout, pq3) < 0.0f)
1576         temp3 = add_diff(&zero, pq3, -1.0f);
1577     else
1578         temp3 = *pq3;
1579 
1580     D3DXQuaternionInverse(&temp1, pq1);
1581     D3DXQuaternionMultiply(&temp2, &temp1, &temp2);
1582     D3DXQuaternionLn(&temp2, &temp2);
1583     D3DXQuaternionMultiply(&q, &temp1, &cout);
1584     D3DXQuaternionLn(&q, &q);
1585     temp1 = add_diff(&temp2, &q, 1.0f);
1586     temp1.x *= -0.25f;
1587     temp1.y *= -0.25f;
1588     temp1.z *= -0.25f;
1589     temp1.w *= -0.25f;
1590     D3DXQuaternionExp(&temp1, &temp1);
1591     D3DXQuaternionMultiply(&aout, pq1, &temp1);
1592 
1593     D3DXQuaternionInverse(&temp1, &cout);
1594     D3DXQuaternionMultiply(&temp2, &temp1, pq1);
1595     D3DXQuaternionLn(&temp2, &temp2);
1596     D3DXQuaternionMultiply(&q, &temp1, &temp3);
1597     D3DXQuaternionLn(&q, &q);
1598     temp1 = add_diff(&temp2, &q, 1.0f);
1599     temp1.x *= -0.25f;
1600     temp1.y *= -0.25f;
1601     temp1.z *= -0.25f;
1602     temp1.w *= -0.25f;
1603     D3DXQuaternionExp(&temp1, &temp1);
1604     D3DXQuaternionMultiply(pbout, &cout, &temp1);
1605     *paout = aout;
1606     *pcout = cout;
1607 }
1608 
1609 void WINAPI D3DXQuaternionToAxisAngle(const D3DXQUATERNION *pq, D3DXVECTOR3 *paxis, FLOAT *pangle)
1610 {
1611     TRACE("pq %p, paxis %p, pangle %p\n", pq, paxis, pangle);
1612 
1613     if (paxis)
1614     {
1615         paxis->x = pq->x;
1616         paxis->y = pq->y;
1617         paxis->z = pq->z;
1618     }
1619     if (pangle)
1620         *pangle = 2.0f * acosf(pq->w);
1621 }
1622 
1623 /*_________________D3DXVec2_____________________*/
1624 
1625 D3DXVECTOR2* WINAPI D3DXVec2BaryCentric(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pv3, FLOAT f, FLOAT g)
1626 {
1627     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
1628 
1629     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
1630     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
1631     return pout;
1632 }
1633 
1634 D3DXVECTOR2* WINAPI D3DXVec2CatmullRom(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv0, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pv3, FLOAT s)
1635 {
1636     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
1637 
1638     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
1639     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
1640     return pout;
1641 }
1642 
1643 D3DXVECTOR2* WINAPI D3DXVec2Hermite(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pt1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pt2, FLOAT s)
1644 {
1645     FLOAT h1, h2, h3, h4;
1646 
1647     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
1648 
1649     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
1650     h2 = s * s * s - 2.0f * s * s + s;
1651     h3 = -2.0f * s * s * s + 3.0f * s * s;
1652     h4 = s * s * s - s * s;
1653 
1654     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
1655     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
1656     return pout;
1657 }
1658 
1659 D3DXVECTOR2* WINAPI D3DXVec2Normalize(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv)
1660 {
1661     FLOAT norm;
1662 
1663     TRACE("pout %p, pv %p\n", pout, pv);
1664 
1665     norm = D3DXVec2Length(pv);
1666     if ( !norm )
1667     {
1668         pout->x = 0.0f;
1669         pout->y = 0.0f;
1670     }
1671     else
1672     {
1673         pout->x = pv->x / norm;
1674         pout->y = pv->y / norm;
1675     }
1676 
1677     return pout;
1678 }
1679 
1680 D3DXVECTOR4* WINAPI D3DXVec2Transform(D3DXVECTOR4 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1681 {
1682     D3DXVECTOR4 out;
1683 
1684     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1685 
1686     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y  + pm->u.m[3][0];
1687     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y  + pm->u.m[3][1];
1688     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y  + pm->u.m[3][2];
1689     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y  + pm->u.m[3][3];
1690     *pout = out;
1691     return pout;
1692 }
1693 
1694 D3DXVECTOR4* WINAPI D3DXVec2TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR2* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1695 {
1696     UINT i;
1697 
1698     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1699 
1700     for (i = 0; i < elements; ++i) {
1701         D3DXVec2Transform(
1702             (D3DXVECTOR4*)((char*)out + outstride * i),
1703             (const D3DXVECTOR2*)((const char*)in + instride * i),
1704             matrix);
1705     }
1706     return out;
1707 }
1708 
1709 D3DXVECTOR2* WINAPI D3DXVec2TransformCoord(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1710 {
1711     D3DXVECTOR2 v;
1712     FLOAT norm;
1713 
1714     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1715 
1716     v = *pv;
1717     norm = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[3][3];
1718 
1719     pout->x = (pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y + pm->u.m[3][0]) / norm;
1720     pout->y = (pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y + pm->u.m[3][1]) / norm;
1721 
1722     return pout;
1723 }
1724 
1725 D3DXVECTOR2* WINAPI D3DXVec2TransformCoordArray(D3DXVECTOR2* out, UINT outstride, const D3DXVECTOR2* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1726 {
1727     UINT i;
1728 
1729     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1730 
1731     for (i = 0; i < elements; ++i) {
1732         D3DXVec2TransformCoord(
1733             (D3DXVECTOR2*)((char*)out + outstride * i),
1734             (const D3DXVECTOR2*)((const char*)in + instride * i),
1735             matrix);
1736     }
1737     return out;
1738 }
1739 
1740 D3DXVECTOR2* WINAPI D3DXVec2TransformNormal(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1741 {
1742     const D3DXVECTOR2 v = *pv;
1743 
1744     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1745 
1746     pout->x = pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y;
1747     pout->y = pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y;
1748     return pout;
1749 }
1750 
1751 D3DXVECTOR2* WINAPI D3DXVec2TransformNormalArray(D3DXVECTOR2* out, UINT outstride, const D3DXVECTOR2 *in, UINT instride, const D3DXMATRIX *matrix, UINT elements)
1752 {
1753     UINT i;
1754 
1755     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1756 
1757     for (i = 0; i < elements; ++i) {
1758         D3DXVec2TransformNormal(
1759             (D3DXVECTOR2*)((char*)out + outstride * i),
1760             (const D3DXVECTOR2*)((const char*)in + instride * i),
1761             matrix);
1762     }
1763     return out;
1764 }
1765 
1766 /*_________________D3DXVec3_____________________*/
1767 
1768 D3DXVECTOR3* WINAPI D3DXVec3BaryCentric(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3, FLOAT f, FLOAT g)
1769 {
1770     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
1771 
1772     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
1773     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
1774     pout->z = (1.0f-f-g) * (pv1->z) + f * (pv2->z) + g * (pv3->z);
1775     return pout;
1776 }
1777 
1778 D3DXVECTOR3* WINAPI D3DXVec3CatmullRom( D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv0, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3, FLOAT s)
1779 {
1780     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
1781 
1782     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
1783     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
1784     pout->z = 0.5f * (2.0f * pv1->z + (pv2->z - pv0->z) *s + (2.0f *pv0->z - 5.0f * pv1->z + 4.0f * pv2->z - pv3->z) * s * s + (pv3->z -3.0f * pv2->z + 3.0f * pv1->z - pv0->z) * s * s * s);
1785     return pout;
1786 }
1787 
1788 D3DXVECTOR3* WINAPI D3DXVec3Hermite(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pt1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pt2, FLOAT s)
1789 {
1790     FLOAT h1, h2, h3, h4;
1791 
1792     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
1793 
1794     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
1795     h2 = s * s * s - 2.0f * s * s + s;
1796     h3 = -2.0f * s * s * s + 3.0f * s * s;
1797     h4 = s * s * s - s * s;
1798 
1799     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
1800     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
1801     pout->z = h1 * (pv1->z) + h2 * (pt1->z) + h3 * (pv2->z) + h4 * (pt2->z);
1802     return pout;
1803 }
1804 
1805 D3DXVECTOR3* WINAPI D3DXVec3Normalize(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv)
1806 {
1807     FLOAT norm;
1808 
1809     TRACE("pout %p, pv %p\n", pout, pv);
1810 
1811     norm = D3DXVec3Length(pv);
1812     if ( !norm )
1813     {
1814         pout->x = 0.0f;
1815         pout->y = 0.0f;
1816         pout->z = 0.0f;
1817     }
1818     else
1819     {
1820         pout->x = pv->x / norm;
1821         pout->y = pv->y / norm;
1822         pout->z = pv->z / norm;
1823     }
1824 
1825     return pout;
1826 }
1827 
1828 D3DXVECTOR3* WINAPI D3DXVec3Project(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DVIEWPORT9 *pviewport, const D3DXMATRIX *pprojection, const D3DXMATRIX *pview, const D3DXMATRIX *pworld)
1829 {
1830     D3DXMATRIX m;
1831 
1832     TRACE("pout %p, pv %p, pviewport %p, pprojection %p, pview %p, pworld %p\n", pout, pv, pviewport, pprojection, pview, pworld);
1833 
1834     D3DXMatrixIdentity(&m);
1835     if (pworld) D3DXMatrixMultiply(&m, &m, pworld);
1836     if (pview) D3DXMatrixMultiply(&m, &m, pview);
1837     if (pprojection) D3DXMatrixMultiply(&m, &m, pprojection);
1838 
1839     D3DXVec3TransformCoord(pout, pv, &m);
1840 
1841     if (pviewport)
1842     {
1843         pout->x = pviewport->X +  ( 1.0f + pout->x ) * pviewport->Width / 2.0f;
1844         pout->y = pviewport->Y +  ( 1.0f - pout->y ) * pviewport->Height / 2.0f;
1845         pout->z = pviewport->MinZ + pout->z * ( pviewport->MaxZ - pviewport->MinZ );
1846     }
1847     return pout;
1848 }
1849 
1850 D3DXVECTOR3* WINAPI D3DXVec3ProjectArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DVIEWPORT9* viewport, const D3DXMATRIX* projection, const D3DXMATRIX* view, const D3DXMATRIX* world, UINT elements)
1851 {
1852     UINT i;
1853 
1854     TRACE("out %p, outstride %u, in %p, instride %u, viewport %p, projection %p, view %p, world %p, elements %u\n",
1855         out, outstride, in, instride, viewport, projection, view, world, elements);
1856 
1857     for (i = 0; i < elements; ++i) {
1858         D3DXVec3Project(
1859             (D3DXVECTOR3*)((char*)out + outstride * i),
1860             (const D3DXVECTOR3*)((const char*)in + instride * i),
1861             viewport, projection, view, world);
1862     }
1863     return out;
1864 }
1865 
1866 D3DXVECTOR4* WINAPI D3DXVec3Transform(D3DXVECTOR4 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1867 {
1868     D3DXVECTOR4 out;
1869 
1870     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1871 
1872     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0];
1873     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1];
1874     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2];
1875     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] * pv->z + pm->u.m[3][3];
1876     *pout = out;
1877     return pout;
1878 }
1879 
1880 D3DXVECTOR4* WINAPI D3DXVec3TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1881 {
1882     UINT i;
1883 
1884     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1885 
1886     for (i = 0; i < elements; ++i) {
1887         D3DXVec3Transform(
1888             (D3DXVECTOR4*)((char*)out + outstride * i),
1889             (const D3DXVECTOR3*)((const char*)in + instride * i),
1890             matrix);
1891     }
1892     return out;
1893 }
1894 
1895 D3DXVECTOR3* WINAPI D3DXVec3TransformCoord(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1896 {
1897     D3DXVECTOR3 out;
1898     FLOAT norm;
1899 
1900     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1901 
1902     norm = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] *pv->z + pm->u.m[3][3];
1903 
1904     out.x = (pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0]) / norm;
1905     out.y = (pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1]) / norm;
1906     out.z = (pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2]) / norm;
1907 
1908     *pout = out;
1909 
1910     return pout;
1911 }
1912 
1913 D3DXVECTOR3* WINAPI D3DXVec3TransformCoordArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1914 {
1915     UINT i;
1916 
1917     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1918 
1919     for (i = 0; i < elements; ++i) {
1920         D3DXVec3TransformCoord(
1921             (D3DXVECTOR3*)((char*)out + outstride * i),
1922             (const D3DXVECTOR3*)((const char*)in + instride * i),
1923             matrix);
1924     }
1925     return out;
1926 }
1927 
1928 D3DXVECTOR3* WINAPI D3DXVec3TransformNormal(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1929 {
1930     const D3DXVECTOR3 v = *pv;
1931 
1932     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1933 
1934     pout->x = pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y + pm->u.m[2][0] * v.z;
1935     pout->y = pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y + pm->u.m[2][1] * v.z;
1936     pout->z = pm->u.m[0][2] * v.x + pm->u.m[1][2] * v.y + pm->u.m[2][2] * v.z;
1937     return pout;
1938 
1939 }
1940 
1941 D3DXVECTOR3* WINAPI D3DXVec3TransformNormalArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1942 {
1943     UINT i;
1944 
1945     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1946 
1947     for (i = 0; i < elements; ++i) {
1948         D3DXVec3TransformNormal(
1949             (D3DXVECTOR3*)((char*)out + outstride * i),
1950             (const D3DXVECTOR3*)((const char*)in + instride * i),
1951             matrix);
1952     }
1953     return out;
1954 }
1955 
1956 D3DXVECTOR3 * WINAPI D3DXVec3Unproject(D3DXVECTOR3 *out, const D3DXVECTOR3 *v,
1957         const D3DVIEWPORT9 *viewport, const D3DXMATRIX *projection, const D3DXMATRIX *view,
1958         const D3DXMATRIX *world)
1959 {
1960     D3DXMATRIX m;
1961 
1962     TRACE("out %p, v %p, viewport %p, projection %p, view %p, world %p.\n",
1963             out, v, viewport, projection, view, world);
1964 
1965     D3DXMatrixIdentity(&m);
1966     if (world)
1967         D3DXMatrixMultiply(&m, &m, world);
1968     if (view)
1969         D3DXMatrixMultiply(&m, &m, view);
1970     if (projection)
1971         D3DXMatrixMultiply(&m, &m, projection);
1972     D3DXMatrixInverse(&m, NULL, &m);
1973 
1974     *out = *v;
1975     if (viewport)
1976     {
1977         out->x = 2.0f * (out->x - viewport->X) / viewport->Width - 1.0f;
1978         out->y = 1.0f - 2.0f * (out->y - viewport->Y) / viewport->Height;
1979         out->z = (out->z - viewport->MinZ) / (viewport->MaxZ - viewport->MinZ);
1980     }
1981     D3DXVec3TransformCoord(out, out, &m);
1982     return out;
1983 }
1984 
1985 D3DXVECTOR3* WINAPI D3DXVec3UnprojectArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DVIEWPORT9* viewport, const D3DXMATRIX* projection, const D3DXMATRIX* view, const D3DXMATRIX* world, UINT elements)
1986 {
1987     UINT i;
1988 
1989     TRACE("out %p, outstride %u, in %p, instride %u, viewport %p, projection %p, view %p, world %p, elements %u\n",
1990         out, outstride, in, instride, viewport, projection, view, world, elements);
1991 
1992     for (i = 0; i < elements; ++i) {
1993         D3DXVec3Unproject(
1994             (D3DXVECTOR3*)((char*)out + outstride * i),
1995             (const D3DXVECTOR3*)((const char*)in + instride * i),
1996             viewport, projection, view, world);
1997     }
1998     return out;
1999 }
2000 
2001 /*_________________D3DXVec4_____________________*/
2002 
2003 D3DXVECTOR4* WINAPI D3DXVec4BaryCentric(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3, FLOAT f, FLOAT g)
2004 {
2005     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
2006 
2007     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
2008     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
2009     pout->z = (1.0f-f-g) * (pv1->z) + f * (pv2->z) + g * (pv3->z);
2010     pout->w = (1.0f-f-g) * (pv1->w) + f * (pv2->w) + g * (pv3->w);
2011     return pout;
2012 }
2013 
2014 D3DXVECTOR4* WINAPI D3DXVec4CatmullRom(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv0, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3, FLOAT s)
2015 {
2016     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
2017 
2018     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
2019     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
2020     pout->z = 0.5f * (2.0f * pv1->z + (pv2->z - pv0->z) *s + (2.0f *pv0->z - 5.0f * pv1->z + 4.0f * pv2->z - pv3->z) * s * s + (pv3->z -3.0f * pv2->z + 3.0f * pv1->z - pv0->z) * s * s * s);
2021     pout->w = 0.5f * (2.0f * pv1->w + (pv2->w - pv0->w) *s + (2.0f *pv0->w - 5.0f * pv1->w + 4.0f * pv2->w - pv3->w) * s * s + (pv3->w -3.0f * pv2->w + 3.0f * pv1->w - pv0->w) * s * s * s);
2022     return pout;
2023 }
2024 
2025 D3DXVECTOR4* WINAPI D3DXVec4Cross(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3)
2026 {
2027     D3DXVECTOR4 out;
2028 
2029     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p\n", pout, pv1, pv2, pv3);
2030 
2031     out.x = pv1->y * (pv2->z * pv3->w - pv3->z * pv2->w) - pv1->z * (pv2->y * pv3->w - pv3->y * pv2->w) + pv1->w * (pv2->y * pv3->z - pv2->z *pv3->y);
2032     out.y = -(pv1->x * (pv2->z * pv3->w - pv3->z * pv2->w) - pv1->z * (pv2->x * pv3->w - pv3->x * pv2->w) + pv1->w * (pv2->x * pv3->z - pv3->x * pv2->z));
2033     out.z = pv1->x * (pv2->y * pv3->w - pv3->y * pv2->w) - pv1->y * (pv2->x *pv3->w - pv3->x * pv2->w) + pv1->w * (pv2->x * pv3->y - pv3->x * pv2->y);
2034     out.w = -(pv1->x * (pv2->y * pv3->z - pv3->y * pv2->z) - pv1->y * (pv2->x * pv3->z - pv3->x *pv2->z) + pv1->z * (pv2->x * pv3->y - pv3->x * pv2->y));
2035     *pout = out;
2036     return pout;
2037 }
2038 
2039 D3DXVECTOR4* WINAPI D3DXVec4Hermite(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pt1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pt2, FLOAT s)
2040 {
2041     FLOAT h1, h2, h3, h4;
2042 
2043     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
2044 
2045     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
2046     h2 = s * s * s - 2.0f * s * s + s;
2047     h3 = -2.0f * s * s * s + 3.0f * s * s;
2048     h4 = s * s * s - s * s;
2049 
2050     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
2051     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
2052     pout->z = h1 * (pv1->z) + h2 * (pt1->z) + h3 * (pv2->z) + h4 * (pt2->z);
2053     pout->w = h1 * (pv1->w) + h2 * (pt1->w) + h3 * (pv2->w) + h4 * (pt2->w);
2054     return pout;
2055 }
2056 
2057 D3DXVECTOR4* WINAPI D3DXVec4Normalize(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv)
2058 {
2059     FLOAT norm;
2060 
2061     TRACE("pout %p, pv %p\n", pout, pv);
2062 
2063     norm = D3DXVec4Length(pv);
2064 
2065     pout->x = pv->x / norm;
2066     pout->y = pv->y / norm;
2067     pout->z = pv->z / norm;
2068     pout->w = pv->w / norm;
2069 
2070     return pout;
2071 }
2072 
2073 D3DXVECTOR4* WINAPI D3DXVec4Transform(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv, const D3DXMATRIX *pm)
2074 {
2075     D3DXVECTOR4 out;
2076 
2077     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
2078 
2079     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0] * pv->w;
2080     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1] * pv->w;
2081     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2] * pv->w;
2082     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] * pv->z + pm->u.m[3][3] * pv->w;
2083     *pout = out;
2084     return pout;
2085 }
2086 
2087 D3DXVECTOR4* WINAPI D3DXVec4TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR4* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
2088 {
2089     UINT i;
2090 
2091     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
2092 
2093     for (i = 0; i < elements; ++i) {
2094         D3DXVec4Transform(
2095             (D3DXVECTOR4*)((char*)out + outstride * i),
2096             (const D3DXVECTOR4*)((const char*)in + instride * i),
2097             matrix);
2098     }
2099     return out;
2100 }
2101 
2102 unsigned short float_32_to_16(const float in)
2103 {
2104     int exp = 0, origexp;
2105     float tmp = fabsf(in);
2106     int sign = (copysignf(1, in) < 0);
2107     unsigned int mantissa;
2108     unsigned short ret;
2109 
2110     /* Deal with special numbers */
2111     if (isinf(in)) return (sign ? 0xffff : 0x7fff);
2112     if (isnan(in)) return (sign ? 0xffff : 0x7fff);
2113     if (in == 0.0f) return (sign ? 0x8000 : 0x0000);
2114 
2115     if (tmp < (float)(1u << 10))
2116     {
2117         do
2118         {
2119             tmp *= 2.0f;
2120             exp--;
2121         } while (tmp < (float)(1u << 10));
2122     }
2123     else if (tmp >= (float)(1u << 11))
2124     {
2125         do
2126         {
2127             tmp /= 2.0f;
2128             exp++;
2129         } while (tmp >= (float)(1u << 11));
2130     }
2131 
2132     exp += 10;  /* Normalize the mantissa */
2133     exp += 15;  /* Exponent is encoded with excess 15 */
2134 
2135     origexp = exp;
2136 
2137     mantissa = (unsigned int) tmp;
2138     if ((tmp - mantissa == 0.5f && mantissa % 2 == 1) || /* round half to even */
2139         (tmp - mantissa > 0.5f))
2140     {
2141         mantissa++; /* round to nearest, away from zero */
2142     }
2143     if (mantissa == 2048)
2144     {
2145         mantissa = 1024;
2146         exp++;
2147     }
2148 
2149     if (exp > 31)
2150     {
2151         /* too big */
2152         ret = 0x7fff; /* INF */
2153     }
2154     else if (exp <= 0)
2155     {
2156         unsigned int rounding = 0;
2157 
2158         /* Denormalized half float */
2159 
2160         /* return 0x0000 (=0.0) for numbers too small to represent in half floats */
2161         if (exp < -11)
2162             return (sign ? 0x8000 : 0x0000);
2163 
2164         exp = origexp;
2165 
2166         /* the 13 extra bits from single precision are used for rounding */
2167         mantissa = (unsigned int)(tmp * (1u << 13));
2168         mantissa >>= 1 - exp; /* denormalize */
2169 
2170         mantissa -= ~(mantissa >> 13) & 1; /* round half to even */
2171         /* remove 13 least significant bits to get half float precision */
2172         mantissa >>= 12;
2173         rounding = mantissa & 1;
2174         mantissa >>= 1;
2175 
2176         ret = mantissa + rounding;
2177     }
2178     else
2179     {
2180         ret = (exp << 10) | (mantissa & 0x3ff);
2181     }
2182 
2183     ret |= ((sign ? 1 : 0) << 15); /* Add the sign */
2184     return ret;
2185 }
2186 
2187 D3DXFLOAT16 *WINAPI D3DXFloat32To16Array(D3DXFLOAT16 *pout, const FLOAT *pin, UINT n)
2188 {
2189     unsigned int i;
2190 
2191     TRACE("pout %p, pin %p, n %u\n", pout, pin, n);
2192 
2193     for (i = 0; i < n; ++i)
2194     {
2195         pout[i].value = float_32_to_16(pin[i]);
2196     }
2197 
2198     return pout;
2199 }
2200 
2201 /* Native d3dx9's D3DXFloat16to32Array lacks support for NaN and Inf. Specifically, e = 16 is treated as a
2202  * regular number - e.g., 0x7fff is converted to 131008.0 and 0xffff to -131008.0. */
2203 float float_16_to_32(const unsigned short in)
2204 {
2205     const unsigned short s = (in & 0x8000);
2206     const unsigned short e = (in & 0x7C00) >> 10;
2207     const unsigned short m = in & 0x3FF;
2208     const float sgn = (s ? -1.0f : 1.0f);
2209 
2210     if (e == 0)
2211     {
2212         if (m == 0) return sgn * 0.0f; /* +0.0 or -0.0 */
2213         else return sgn * powf(2, -14.0f) * (m / 1024.0f);
2214     }
2215     else
2216     {
2217         return sgn * powf(2, e - 15.0f) * (1.0f + (m / 1024.0f));
2218     }
2219 }
2220 
2221 FLOAT *WINAPI D3DXFloat16To32Array(FLOAT *pout, const D3DXFLOAT16 *pin, UINT n)
2222 {
2223     unsigned int i;
2224 
2225     TRACE("pout %p, pin %p, n %u\n", pout, pin, n);
2226 
2227     for (i = 0; i < n; ++i)
2228     {
2229         pout[i] = float_16_to_32(pin[i].value);
2230     }
2231 
2232     return pout;
2233 }
2234 
2235 /*_________________D3DXSH________________*/
2236 
2237 FLOAT* WINAPI D3DXSHAdd(FLOAT *out, UINT order, const FLOAT *a, const FLOAT *b)
2238 {
2239     UINT i;
2240 
2241     TRACE("out %p, order %u, a %p, b %p\n", out, order, a, b);
2242 
2243     for (i = 0; i < order * order; i++)
2244         out[i] = a[i] + b[i];
2245 
2246     return out;
2247 }
2248 
2249 FLOAT WINAPI D3DXSHDot(UINT order, const FLOAT *a, const FLOAT *b)
2250 {
2251     FLOAT s;
2252     UINT i;
2253 
2254     TRACE("order %u, a %p, b %p\n", order, a, b);
2255 
2256     s = a[0] * b[0];
2257     for (i = 1; i < order * order; i++)
2258         s += a[i] * b[i];
2259 
2260     return s;
2261 }
2262 
2263 static void weightedcapintegrale(FLOAT *out, UINT order, FLOAT angle)
2264 {
2265     FLOAT coeff[3];
2266 
2267     coeff[0] = cosf(angle);
2268 
2269     out[0] = 2.0f * D3DX_PI * (1.0f - coeff[0]);
2270     out[1] = D3DX_PI * sinf(angle) * sinf(angle);
2271     if (order <= 2)
2272         return;
2273 
2274     out[2] = coeff[0] * out[1];
2275     if (order == 3)
2276         return;
2277 
2278     coeff[1] = coeff[0] * coeff[0];
2279     coeff[2] = coeff[1] * coeff[1];
2280 
2281     out[3] = D3DX_PI * (-1.25f * coeff[2] + 1.5f * coeff[1] - 0.25f);
2282     if (order == 4)
2283         return;
2284 
2285     out[4] = -0.25f * D3DX_PI * coeff[0] * (7.0f * coeff[2] - 10.0f * coeff[1] + 3.0f);
2286     if (order == 5)
2287         return;
2288 
2289     out[5] = D3DX_PI * (-2.625f * coeff[2] * coeff[1] + 4.375f * coeff[2] - 1.875f * coeff[1] + 0.125f);
2290 }
2291 
2292 HRESULT WINAPI D3DXSHEvalConeLight(UINT order, const D3DXVECTOR3 *dir, FLOAT radius,
2293     FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *rout, FLOAT *gout, FLOAT *bout)
2294 {
2295     FLOAT cap[6], clamped_angle, norm, scale, temp;
2296     UINT i, index, j;
2297 
2298     TRACE("order %u, dir %p, radius %f, red %f, green %f, blue %f, rout %p, gout %p, bout %p\n",
2299         order, dir, radius, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2300 
2301     if (radius <= 0.0f)
2302         return D3DXSHEvalDirectionalLight(order, dir, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2303 
2304     clamped_angle = (radius > D3DX_PI / 2.0f) ? (D3DX_PI / 2.0f) : radius;
2305     norm = sinf(clamped_angle) * sinf(clamped_angle);
2306 
2307     if (order > D3DXSH_MAXORDER)
2308     {
2309         WARN("Order clamped at D3DXSH_MAXORDER\n");
2310         order = D3DXSH_MAXORDER;
2311     }
2312 
2313     weightedcapintegrale(cap, order, radius);
2314     D3DXSHEvalDirection(rout, order, dir);
2315 
2316     for (i = 0; i < order; i++)
2317     {
2318         scale = cap[i] / norm;
2319 
2320         for (j = 0; j < 2 * i + 1; j++)
2321         {
2322             index = i * i + j;
2323             temp = rout[index] * scale;
2324 
2325             rout[index] = temp * Rintensity;
2326             if (gout)
2327                 gout[index] = temp * Gintensity;
2328             if (bout)
2329                 bout[index] = temp * Bintensity;
2330         }
2331     }
2332 
2333     return D3D_OK;
2334 }
2335 
2336 FLOAT* WINAPI D3DXSHEvalDirection(FLOAT *out, UINT order, const D3DXVECTOR3 *dir)
2337 {
2338     const FLOAT dirxx = dir->x * dir->x;
2339     const FLOAT dirxy = dir->x * dir->y;
2340     const FLOAT dirxz = dir->x * dir->z;
2341     const FLOAT diryy = dir->y * dir->y;
2342     const FLOAT diryz = dir->y * dir->z;
2343     const FLOAT dirzz = dir->z * dir->z;
2344     const FLOAT dirxxxx = dirxx * dirxx;
2345     const FLOAT diryyyy = diryy * diryy;
2346     const FLOAT dirzzzz = dirzz * dirzz;
2347     const FLOAT dirxyxy = dirxy * dirxy;
2348 
2349     TRACE("out %p, order %u, dir %p\n", out, order, dir);
2350 
2351     if ((order < D3DXSH_MINORDER) || (order > D3DXSH_MAXORDER))
2352         return out;
2353 
2354     out[0] = 0.5f / sqrtf(D3DX_PI);
2355     out[1] = -0.5f / sqrtf(D3DX_PI / 3.0f) * dir->y;
2356     out[2] = 0.5f / sqrtf(D3DX_PI / 3.0f) * dir->z;
2357     out[3] = -0.5f / sqrtf(D3DX_PI / 3.0f) * dir->x;
2358     if (order == 2)
2359         return out;
2360 
2361     out[4] = 0.5f / sqrtf(D3DX_PI / 15.0f) * dirxy;
2362     out[5] = -0.5f / sqrtf(D3DX_PI / 15.0f) * diryz;
2363     out[6] = 0.25f / sqrtf(D3DX_PI / 5.0f) * (3.0f * dirzz - 1.0f);
2364     out[7] = -0.5f / sqrtf(D3DX_PI / 15.0f) * dirxz;
2365     out[8] = 0.25f / sqrtf(D3DX_PI / 15.0f) * (dirxx - diryy);
2366     if (order == 3)
2367         return out;
2368 
2369     out[9] = -sqrtf(70.0f / D3DX_PI) / 8.0f * dir->y * (3.0f * dirxx - diryy);
2370     out[10] = sqrtf(105.0f / D3DX_PI) / 2.0f * dirxy * dir->z;
2371     out[11] = -sqrtf(42.0f / D3DX_PI) / 8.0f * dir->y * (-1.0f + 5.0f * dirzz);
2372     out[12] = sqrtf(7.0f / D3DX_PI) / 4.0f * dir->z * (5.0f * dirzz - 3.0f);
2373     out[13] = sqrtf(42.0f / D3DX_PI) / 8.0f * dir->x * (1.0f - 5.0f * dirzz);
2374     out[14] = sqrtf(105.0f / D3DX_PI) / 4.0f * dir->z * (dirxx - diryy);
2375     out[15] = -sqrtf(70.0f / D3DX_PI) / 8.0f * dir->x * (dirxx - 3.0f * diryy);
2376     if (order == 4)
2377         return out;
2378 
2379     out[16] = 0.75f * sqrtf(35.0f / D3DX_PI) * dirxy * (dirxx - diryy);
2380     out[17] = 3.0f * dir->z * out[9];
2381     out[18] = 0.75f * sqrtf(5.0f / D3DX_PI) * dirxy * (7.0f * dirzz - 1.0f);
2382     out[19] = 0.375f * sqrtf(10.0f / D3DX_PI) * diryz * (3.0f - 7.0f * dirzz);
2383     out[20] = 3.0f / (16.0f * sqrtf(D3DX_PI)) * (35.0f * dirzzzz - 30.f * dirzz + 3.0f);
2384     out[21] = 0.375f * sqrtf(10.0f / D3DX_PI) * dirxz * (3.0f - 7.0f * dirzz);
2385     out[22] = 0.375f * sqrtf(5.0f / D3DX_PI) * (dirxx - diryy) * (7.0f * dirzz - 1.0f);
2386     out[23] = 3.0f * dir->z * out[15];
2387     out[24] = 3.0f / 16.0f * sqrtf(35.0f / D3DX_PI) * (dirxxxx - 6.0f * dirxyxy + diryyyy);
2388     if (order == 5)
2389         return out;
2390 
2391     out[25] = -3.0f/ 32.0f * sqrtf(154.0f / D3DX_PI) * dir->y * (5.0f * dirxxxx - 10.0f * dirxyxy + diryyyy);
2392     out[26] = 0.75f * sqrtf(385.0f / D3DX_PI) * dirxy * dir->z * (dirxx - diryy);
2393     out[27] = sqrtf(770.0f / D3DX_PI) / 32.0f * dir->y * (3.0f * dirxx - diryy) * (1.0f - 9.0f * dirzz);
2394     out[28] = sqrtf(1155.0f / D3DX_PI) / 4.0f * dirxy * dir->z * (3.0f * dirzz - 1.0f);
2395     out[29] = sqrtf(165.0f / D3DX_PI) / 16.0f * dir->y * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
2396     out[30] = sqrtf(11.0f / D3DX_PI) / 16.0f * dir->z * (63.0f * dirzzzz - 70.0f * dirzz + 15.0f);
2397     out[31] = sqrtf(165.0f / D3DX_PI) / 16.0f * dir->x * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
2398     out[32] = sqrtf(1155.0f / D3DX_PI) / 8.0f * dir->z * (dirxx - diryy) * (3.0f * dirzz - 1.0f);
2399     out[33] = sqrtf(770.0f / D3DX_PI) / 32.0f * dir->x * (dirxx - 3.0f * diryy) * (1.0f - 9.0f * dirzz);
2400     out[34] = 3.0f / 16.0f * sqrtf(385.0f / D3DX_PI) * dir->z * (dirxxxx - 6.0f * dirxyxy + diryyyy);
2401     out[35] = -3.0f/ 32.0f * sqrtf(154.0f / D3DX_PI) * dir->x * (dirxxxx - 10.0f * dirxyxy + 5.0f * diryyyy);
2402 
2403     return out;
2404 }
2405 
2406 HRESULT WINAPI D3DXSHEvalDirectionalLight(UINT order, const D3DXVECTOR3 *dir, FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *Rout, FLOAT *Gout, FLOAT *Bout)
2407 {
2408     FLOAT s, temp;
2409     UINT j;
2410 
2411     TRACE("Order %u, Vector %p, Red %f, Green %f, Blue %f, Rout %p, Gout %p, Bout %p\n", order, dir, Rintensity, Gintensity, Bintensity, Rout, Gout, Bout);
2412 
2413     s = 0.75f;
2414     if ( order > 2 )
2415         s += 5.0f / 16.0f;
2416     if ( order > 4 )
2417         s -= 3.0f / 32.0f;
2418     s /= D3DX_PI;
2419 
2420     D3DXSHEvalDirection(Rout, order, dir);
2421     for (j = 0; j < order * order; j++)
2422     {
2423         temp = Rout[j] / s;
2424 
2425         Rout[j] = Rintensity * temp;
2426         if ( Gout )
2427             Gout[j] = Gintensity * temp;
2428         if ( Bout )
2429             Bout[j] = Bintensity * temp;
2430     }
2431 
2432     return D3D_OK;
2433 }
2434 
2435 HRESULT WINAPI D3DXSHEvalHemisphereLight(UINT order, const D3DXVECTOR3 *dir, D3DXCOLOR top, D3DXCOLOR bottom,
2436     FLOAT *rout, FLOAT *gout, FLOAT *bout)
2437 {
2438     FLOAT a[2], temp[4];
2439     UINT i, j;
2440 
2441     TRACE("order %u, dir %p, rout %p, gout %p, bout %p\n", order, dir, rout, gout, bout);
2442 
2443     D3DXSHEvalDirection(temp, 2, dir);
2444 
2445     a[0] = (top.r + bottom.r) * 3.0f * D3DX_PI;
2446     a[1] = (top.r - bottom.r) * D3DX_PI;
2447     for (i = 0; i < order; i++)
2448         for (j = 0; j < 2 * i + 1; j++)
2449             if (i < 2)
2450                 rout[i * i + j] = temp[i * i + j] * a[i];
2451             else
2452                 rout[i * i + j] = 0.0f;
2453 
2454     if (gout)
2455     {
2456         a[0] = (top.g + bottom.g) * 3.0f * D3DX_PI;
2457         a[1] = (top.g - bottom.g) * D3DX_PI;
2458         for (i = 0; i < order; i++)
2459             for (j = 0; j < 2 * i + 1; j++)
2460                 if (i < 2)
2461                     gout[i * i + j] = temp[i * i + j] * a[i];
2462                 else
2463                     gout[i * i + j] = 0.0f;
2464     }
2465 
2466     if (bout)
2467     {
2468         a[0] = (top.b + bottom.b) * 3.0f * D3DX_PI;
2469         a[1] = (top.b - bottom.b) * D3DX_PI;
2470         for (i = 0; i < order; i++)
2471             for (j = 0; j < 2 * i + 1; j++)
2472                 if (i < 2)
2473                     bout[i * i + j] = temp[i * i + j] * a[i];
2474                 else
2475                     bout[i * i + j] = 0.0f;
2476     }
2477 
2478     return D3D_OK;
2479 }
2480 
2481 HRESULT WINAPI D3DXSHEvalSphericalLight(UINT order, const D3DXVECTOR3 *dir, FLOAT radius,
2482     FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *rout, FLOAT *gout, FLOAT *bout)
2483 {
2484     D3DXVECTOR3 normal;
2485     FLOAT cap[6], clamped_angle, dist, temp;
2486     UINT i, index, j;
2487 
2488     TRACE("order %u, dir %p, radius %f, red %f, green %f, blue %f, rout %p, gout %p, bout %p\n",
2489         order, dir, radius, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2490 
2491     if (order > D3DXSH_MAXORDER)
2492     {
2493         WARN("Order clamped at D3DXSH_MAXORDER\n");
2494         order = D3DXSH_MAXORDER;
2495     }
2496 
2497     if (radius < 0.0f)
2498         radius = -radius;
2499 
2500     dist = D3DXVec3Length(dir);
2501     clamped_angle = (dist <= radius) ? D3DX_PI / 2.0f : asinf(radius / dist);
2502 
2503     weightedcapintegrale(cap, order, clamped_angle);
2504     D3DXVec3Normalize(&normal, dir);
2505     D3DXSHEvalDirection(rout, order, &normal);
2506 
2507     for (i = 0; i < order; i++)
2508         for (j = 0; j < 2 * i + 1; j++)
2509         {
2510             index = i * i + j;
2511             temp = rout[index] * cap[i];
2512 
2513             rout[index] = temp * Rintensity;
2514             if (gout)
2515                 gout[index] = temp * Gintensity;
2516             if (bout)
2517                 bout[index] = temp * Bintensity;
2518         }
2519 
2520     return D3D_OK;
2521 }
2522 
2523 FLOAT * WINAPI D3DXSHMultiply2(FLOAT *out, const FLOAT *a, const FLOAT *b)
2524 {
2525     FLOAT ta, tb;
2526 
2527     TRACE("out %p, a %p, b %p\n", out, a, b);
2528 
2529     ta = 0.28209479f * a[0];
2530     tb = 0.28209479f * b[0];
2531 
2532     out[0] = 0.28209479f * D3DXSHDot(2, a, b);
2533     out[1] = ta * b[1] + tb * a[1];
2534     out[2] = ta * b[2] + tb * a[2];
2535     out[3] = ta * b[3] + tb * a[3];
2536 
2537     return out;
2538 }
2539 
2540 FLOAT * WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b)
2541 {
2542     FLOAT t, ta, tb;
2543 
2544     TRACE("out %p, a %p, b %p\n", out, a, b);
2545 
2546     out[0] = 0.28209479f * a[0] * b[0];
2547 
2548     ta = 0.28209479f * a[0] - 0.12615663f * a[6] - 0.21850969f * a[8];
2549     tb = 0.28209479f * b[0] - 0.12615663f * b[6] - 0.21850969f * b[8];
2550     out[1] = ta * b[1] + tb * a[1];
2551     t = a[1] * b[1];
2552     out[0] += 0.28209479f * t;
2553     out[6] = -0.12615663f * t;
2554     out[8] = -0.21850969f * t;
2555 
2556     ta = 0.21850969f * a[5];
2557     tb = 0.21850969f * b[5];
2558     out[1] += ta * b[2] + tb * a[2];
2559     out[2] = ta * b[1] + tb * a[1];
2560     t = a[1] * b[2] +a[2] * b[1];
2561     out[5] = 0.21850969f * t;
2562 
2563     ta = 0.21850969f * a[4];
2564     tb = 0.21850969f * b[4];
2565     out[1] += ta * b[3] + tb * a[3];
2566     out[3]  = ta * b[1] + tb * a[1];
2567     t = a[1] * b[3] + a[3] * b[1];
2568     out[4] = 0.21850969f * t;
2569 
2570     ta = 0.28209480f * a[0] + 0.25231326f * a[6];
2571     tb = 0.28209480f * b[0] + 0.25231326f * b[6];
2572     out[2] += ta * b[2] + tb * a[2];
2573     t = a[2] * b[2];
2574     out[0] += 0.28209480f * t;
2575     out[6] += 0.25231326f * t;
2576 
2577     ta = 0.21850969f * a[7];
2578     tb = 0.21850969f * b[7];
2579     out[2] += ta * b[3] + tb * a[3];
2580     out[3] += ta * b[2] + tb * a[2];
2581     t = a[2] * b[3] + a[3] * b[2];
2582     out[7] = 0.21850969f * t;
2583 
2584     ta = 0.28209479f * a[0] - 0.12615663f * a[6] + 0.21850969f * a[8];
2585     tb = 0.28209479f * b[0] - 0.12615663f * b[6] + 0.21850969f * b[8];
2586     out[3] += ta * b[3] + tb * a[3];
2587     t = a[3] * b[3];
2588     out[0] += 0.28209479f * t;
2589     out[6] -= 0.12615663f * t;
2590     out[8] += 0.21850969f * t;
2591 
2592     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2593     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2594     out[4] += ta * b[4] + tb * a[4];
2595     t = a[4] * b[4];
2596     out[0] += 0.28209479f * t;
2597     out[6] -= 0.18022375f * t;
2598 
2599     ta = 0.15607835f * a[7];
2600     tb = 0.15607835f * b[7];
2601     out[4] += ta * b[5] + tb * a[5];
2602     out[5] += ta * b[4] + tb * a[4];
2603     t = a[4] * b[5] + a[5] * b[4];
2604     out[7] += 0.15607835f * t;
2605 
2606     ta = 0.28209479f * a[0] + 0.09011188f * a[6] - 0.15607835f * a[8];
2607     tb = 0.28209479f * b[0] + 0.09011188f * b[6] - 0.15607835f * b[8];
2608     out[5] += ta * b[5] + tb * a[5];
2609     t = a[5] * b[5];
2610     out[0] += 0.28209479f * t;
2611     out[6] += 0.09011188f * t;
2612     out[8] -= 0.15607835f * t;
2613 
2614     ta = 0.28209480f * a[0];
2615     tb = 0.28209480f * b[0];
2616     out[6] += ta * b[6] + tb * a[6];
2617     t = a[6] * b[6];
2618     out[0] += 0.28209480f * t;
2619     out[6] += 0.18022376f * t;
2620 
2621     ta = 0.28209479f * a[0] + 0.09011188f * a[6] + 0.15607835f * a[8];
2622     tb = 0.28209479f * b[0] + 0.09011188f * b[6] + 0.15607835f * b[8];
2623     out[7] += ta * b[7] + tb * a[7];
2624     t = a[7] * b[7];
2625     out[0] += 0.28209479f * t;
2626     out[6] += 0.09011188f * t;
2627     out[8] += 0.15607835f * t;
2628 
2629     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2630     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2631     out[8] += ta * b[8] + tb * a[8];
2632     t = a[8] * b[8];
2633     out[0] += 0.28209479f * t;
2634     out[6] -= 0.18022375f * t;
2635 
2636     return out;
2637 }
2638 
2639 FLOAT * WINAPI D3DXSHMultiply4(FLOAT *out, const FLOAT *a, const FLOAT *b)
2640 {
2641     FLOAT ta, tb, t;
2642 
2643     TRACE("out %p, a %p, b %p\n", out, a, b);
2644 
2645     out[0] = 0.28209479f * a[0] * b[0];
2646 
2647     ta = 0.28209479f * a[0] - 0.12615663f * a[6] - 0.21850969f * a[8];
2648     tb = 0.28209479f * b[0] - 0.12615663f * b[6] - 0.21850969f * b[8];
2649     out[1] = ta * b[1] + tb * a[1];
2650     t = a[1] * b[1];
2651     out[0] += 0.28209479f * t;
2652     out[6] = -0.12615663f * t;
2653     out[8] = -0.21850969f * t;
2654 
2655     ta = 0.21850969f * a[3] - 0.05839917f * a[13] - 0.22617901f * a[15];
2656     tb = 0.21850969f * b[3] - 0.05839917f * b[13] - 0.22617901f * b[15];
2657     out[1] += ta * b[4] + tb * a[4];
2658     out[4] = ta * b[1] + tb * a[1];
2659     t = a[1] * b[4] + a[4] * b[1];
2660     out[3] = 0.21850969f * t;
2661     out[13] = -0.05839917f * t;
2662     out[15] = -0.22617901f * t;
2663 
2664     ta = 0.21850969f * a[2] - 0.14304817f * a[12] - 0.18467439f * a[14];
2665     tb = 0.21850969f * b[2] - 0.14304817f * b[12] - 0.18467439f * b[14];
2666     out[1] += ta * b[5] + tb * a[5];
2667     out[5] = ta * b[1] + tb * a[1];
2668     t = a[1] * b[5] + a[5] * b[1];
2669     out[2] = 0.21850969f * t;
2670     out[12] = -0.14304817f * t;
2671     out[14] = -0.18467439f * t;
2672 
2673     ta = 0.20230066f * a[11];
2674     tb = 0.20230066f * b[11];
2675     out[1] += ta * b[6] + tb * a[6];
2676     out[6] += ta * b[1] + tb * a[1];
2677     t = a[1] * b[6] + a[6] * b[1];
2678     out[11] = 0.20230066f * t;
2679 
2680     ta = 0.22617901f * a[9] + 0.05839917f * a[11];
2681     tb = 0.22617901f * b[9] + 0.05839917f * b[11];
2682     out[1] += ta * b[8] + tb * a[8];
2683     out[8] += ta * b[1] + tb * a[1];
2684     t = a[1] * b[8] + a[8] * b[1];
2685     out[9] = 0.22617901f * t;
2686     out[11] += 0.05839917f * t;
2687 
2688     ta = 0.28209480f * a[0] + 0.25231326f * a[6];
2689     tb = 0.28209480f * b[0] + 0.25231326f * b[6];
2690     out[2] += ta * b[2] + tb * a[2];
2691     t = a[2] * b[2];
2692     out[0] += 0.28209480f * t;
2693     out[6] += 0.25231326f * t;
2694 
2695     ta = 0.24776671f * a[12];
2696     tb = 0.24776671f * b[12];
2697     out[2] += ta * b[6] + tb * a[6];
2698     out[6] += ta * b[2] + tb * a[2];
2699     t = a[2] * b[6] + a[6] * b[2];
2700     out[12] += 0.24776671f * t;
2701 
2702     ta = 0.28209480f * a[0] - 0.12615663f * a[6] + 0.21850969f * a[8];
2703     tb = 0.28209480f * b[0] - 0.12615663f * b[6] + 0.21850969f * b[8];
2704     out[3] += ta * b[3] + tb * a[3];
2705     t = a[3] * b[3];
2706     out[0] += 0.28209480f * t;
2707     out[6] -= 0.12615663f * t;
2708     out[8] += 0.21850969f * t;
2709 
2710     ta = 0.20230066f * a[13];
2711     tb = 0.20230066f * b[13];
2712     out[3] += ta * b[6] + tb * a[6];
2713     out[6] += ta * b[3] + tb * a[3];
2714     t = a[3] * b[6] + a[6] * b[3];
2715     out[13] += 0.20230066f * t;
2716 
2717     ta = 0.21850969f * a[2] - 0.14304817f * a[12] + 0.18467439f * a[14];
2718     tb = 0.21850969f * b[2] - 0.14304817f * b[12] + 0.18467439f * b[14];
2719     out[3] += ta * b[7] + tb * a[7];
2720     out[7] = ta * b[3] + tb * a[3];
2721     t = a[3] * b[7] + a[7] * b[3];
2722     out[2] += 0.21850969f * t;
2723     out[12] -= 0.14304817f * t;
2724     out[14] += 0.18467439f * t;
2725 
2726     ta = -0.05839917f * a[13] + 0.22617901f * a[15];
2727     tb = -0.05839917f * b[13] + 0.22617901f * b[15];
2728     out[3] += ta * b[8] + tb * a[8];
2729     out[8] += ta * b[3] + tb * a[3];
2730     t = a[3] * b[8] + a[8] * b[3];
2731     out[13] -= 0.05839917f * t;
2732     out[15] += 0.22617901f * t;
2733 
2734     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2735     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2736     out[4] += ta * b[4] + tb * a[4];
2737     t = a[4] * b[4];
2738     out[0] += 0.28209479f * t;
2739     out[6] -= 0.18022375f * t;
2740 
2741     ta = 0.15607835f * a[7];
2742     tb = 0.15607835f * b[7];
2743     out[4] += ta * b[5] + tb * a[5];
2744     out[5] += ta * b[4] + tb * a[4];
2745     t = a[4] * b[5] + a[5] * b[4];
2746     out[7] += 0.15607835f * t;
2747 
2748     ta = 0.22617901f * a[3] - 0.09403160f * a[13];
2749     tb = 0.22617901f * b[3] - 0.09403160f * b[13];
2750     out[4] += ta * b[9] + tb * a[9];
2751     out[9] += ta * b[4] + tb * a[4];
2752     t = a[4] * b[9] + a[9] * b[4];
2753     out[3] += 0.22617901f * t;
2754     out[13] -= 0.09403160f * t;
2755 
2756     ta = 0.18467439f * a[2] - 0.18806319f * a[12];
2757     tb = 0.18467439f * b[2] - 0.18806319f * b[12];
2758     out[4] += ta * b[10] + tb * a [10];
2759     out[10] = ta * b[4] + tb * a[4];
2760     t = a[4] * b[10] + a[10] * b[4];
2761     out[2] += 0.18467439f * t;
2762     out[12] -= 0.18806319f * t;
2763 
2764     ta = -0.05839917f * a[3] + 0.14567312f * a[13] + 0.09403160f * a[15];
2765     tb = -0.05839917f * b[3] + 0.14567312f * b[13] + 0.09403160f * b[15];
2766     out[4] += ta * b[11] + tb * a[11];
2767     out[11] += ta * b[4] + tb * a[4];
2768     t = a[4] * b[11] + a[11] * b[4];
2769     out[3] -= 0.05839917f * t;
2770     out[13] += 0.14567312f * t;
2771     out[15] += 0.09403160f * t;
2772 
2773     ta = 0.28209479f * a[0] + 0.09011186f * a[6] - 0.15607835f * a[8];
2774     tb = 0.28209479f * b[0] + 0.09011186f * b[6] - 0.15607835f * b[8];
2775     out[5] += ta * b[5] + tb * a[5];
2776     t = a[5] * b[5];
2777     out[0] += 0.28209479f * t;
2778     out[6] += 0.09011186f * t;
2779     out[8] -= 0.15607835f * t;
2780 
2781     ta = 0.14867701f * a[14];
2782     tb = 0.14867701f * b[14];
2783     out[5] += ta * b[9] + tb * a[9];
2784     out[9] += ta * b[5] + tb * a[5];
2785     t = a[5] * b[9] + a[9] * b[5];
2786     out[14] += 0.14867701f * t;
2787 
2788     ta = 0.18467439f * a[3] + 0.11516472f * a[13] - 0.14867701f * a[15];
2789     tb = 0.18467439f * b[3] + 0.11516472f * b[13] - 0.14867701f * b[15];
2790     out[5] += ta * b[10] + tb * a[10];
2791     out[10] += ta * b[5] + tb * a[5];
2792     t = a[5] * b[10] + a[10] * b[5];
2793     out[3] += 0.18467439f * t;
2794     out[13] += 0.11516472f * t;
2795     out[15] -= 0.14867701f * t;
2796 
2797     ta = 0.23359668f * a[2] + 0.05947080f * a[12] - 0.11516472f * a[14];
2798     tb = 0.23359668f * b[2] + 0.05947080f * b[12] - 0.11516472f * b[14];
2799     out[5] += ta * b[11] + tb * a[11];
2800     out[11] += ta * b[5] + tb * a[5];
2801     t = a[5] * b[11] + a[11] * b[5];
2802     out[2] += 0.23359668f * t;
2803     out[12] += 0.05947080f * t;
2804     out[14] -= 0.11516472f * t;
2805 
2806     ta = 0.28209479f * a[0];
2807     tb = 0.28209479f * b[0];
2808     out[6] += ta * b[6] + tb * a[6];
2809     t = a[6] * b[6];
2810     out[0] += 0.28209479f * t;
2811     out[6] += 0.18022376f * t;
2812 
2813     ta = 0.09011186f * a[6] + 0.28209479f * a[0] + 0.15607835f * a[8];
2814     tb = 0.09011186f * b[6] + 0.28209479f * b[0] + 0.15607835f * b[8];
2815     out[7] += ta * b[7] + tb * a[7];
2816     t = a[7] * b[7];
2817     out[6] += 0.09011186f * t;
2818     out[0] += 0.28209479f * t;
2819     out[8] += 0.15607835f * t;
2820 
2821     ta = 0.14867701f * a[9] + 0.18467439f * a[1] + 0.11516472f * a[11];
2822     tb = 0.14867701f * b[9] + 0.18467439f * b[1] + 0.11516472f * b[11];
2823     out[7] += ta * b[10] + tb * a[10];
2824     out[10] += ta * b[7] + tb * a[7];
2825     t = a[7] * b[10] + a[10] * b[7];
2826     out[9] += 0.14867701f * t;
2827     out[1] += 0.18467439f * t;
2828     out[11] += 0.11516472f * t;
2829 
2830     ta = 0.05947080f * a[12] + 0.23359668f * a[2] + 0.11516472f * a[14];
2831     tb = 0.05947080f * b[12] + 0.23359668f * b[2] + 0.11516472f * b[14];
2832     out[7] += ta * b[13] + tb * a[13];
2833     out[13] += ta * b[7]+ tb * a[7];
2834     t = a[7] * b[13] + a[13] * b[7];
2835     out[12] += 0.05947080f * t;
2836     out[2] += 0.23359668f * t;
2837     out[14] += 0.11516472f * t;
2838 
2839     ta = 0.14867701f * a[15];
2840     tb = 0.14867701f * b[15];
2841     out[7] += ta * b[14] + tb * a[14];
2842     out[14] += ta * b[7] + tb * a[7];
2843     t = a[7] * b[14] + a[14] * b[7];
2844     out[15] += 0.14867701f * t;
2845 
2846     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2847     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2848     out[8] += ta * b[8] + tb * a[8];
2849     t = a[8] * b[8];
2850     out[0] += 0.28209479f * t;
2851     out[6] -= 0.18022375f * t;
2852 
2853     ta = -0.09403160f * a[11];
2854     tb = -0.09403160f * b[11];
2855     out[8] += ta * b[9] + tb * a[9];
2856     out[9] += ta * b[8] + tb * a[8];
2857     t = a[8] * b[9] + a[9] * b[8];
2858     out[11] -= 0.09403160f * t;
2859 
2860     ta = -0.09403160f * a[15];
2861     tb = -0.09403160f * b[15];
2862     out[8] += ta * b[13] + tb * a[13];
2863     out[13] += ta * b[8] + tb * a[8];
2864     t = a[8] * b[13] + a[13] * b[8];
2865     out[15] -= 0.09403160f * t;
2866 
2867     ta = 0.18467439f * a[2] - 0.18806319f * a[12];
2868     tb = 0.18467439f * b[2] - 0.18806319f * b[12];
2869     out[8] += ta * b[14] + tb * a[14];
2870     out[14] += ta * b[8] + tb * a[8];
2871     t = a[8] * b[14] + a[14] * b[8];
2872     out[2] += 0.18467439f * t;
2873     out[12] -= 0.18806319f * t;
2874 
2875     ta = -0.21026104f * a[6] + 0.28209479f * a[0];
2876     tb = -0.21026104f * b[6] + 0.28209479f * b[0];
2877     out[9] += ta * b[9] + tb * a[9];
2878     t = a[9] * b[9];
2879     out[6] -= 0.21026104f * t;
2880     out[0] += 0.28209479f * t;
2881 
2882     ta = 0.28209479f * a[0];
2883     tb = 0.28209479f * b[0];
2884     out[10] += ta * b[10] + tb * a[10];
2885     t = a[10] * b[10];
2886     out[0] += 0.28209479f * t;
2887 
2888     ta = 0.28209479f * a[0] + 0.12615663f * a[6] - 0.14567312f * a[8];
2889     tb = 0.28209479f * b[0] + 0.12615663f * b[6] - 0.14567312f * b[8];
2890     out[11] += ta * b[11] + tb * a[11];
2891     t = a[11] * b[11];
2892     out[0] += 0.28209479f * t;
2893     out[6] += 0.12615663f * t;
2894     out[8] -= 0.14567312f * t;
2895 
2896     ta = 0.28209479f * a[0] + 0.16820885f * a[6];
2897     tb = 0.28209479f * b[0] + 0.16820885f * b[6];
2898     out[12] += ta * b[12] + tb * a[12];
2899     t = a[12] * b[12];
2900     out[0] += 0.28209479f * t;
2901     out[6] += 0.16820885f * t;
2902 
2903     ta =0.28209479f * a[0] + 0.14567312f * a[8] + 0.12615663f * a[6];
2904     tb =0.28209479f * b[0] + 0.14567312f * b[8] + 0.12615663f * b[6];
2905     out[13] += ta * b[13] + tb * a[13];
2906     t = a[13] * b[13];
2907     out[0] += 0.28209479f * t;
2908     out[8] += 0.14567312f * t;
2909     out[6] += 0.12615663f * t;
2910 
2911     ta = 0.28209479f * a[0];
2912     tb = 0.28209479f * b[0];
2913     out[14] += ta * b[14] + tb * a[14];
2914     t = a[14] * b[14];
2915     out[0] += 0.28209479f * t;
2916 
2917     ta = 0.28209479f * a[0] - 0.21026104f * a[6];
2918     tb = 0.28209479f * b[0] - 0.21026104f * b[6];
2919     out[15] += ta * b[15] + tb * a[15];
2920     t = a[15] * b[15];
2921     out[0] += 0.28209479f * t;
2922     out[6] -= 0.21026104f * t;
2923 
2924     return out;
2925 }
2926 
2927 static void rotate_X(FLOAT *out, UINT order, FLOAT a, FLOAT *in)
2928 {
2929     out[0] = in[0];
2930 
2931     out[1] = a * in[2];
2932     out[2] = -a * in[1];
2933     out[3] = in[3];
2934 
2935     out[4] = a * in[7];
2936     out[5] = -in[5];
2937     out[6] = -0.5f * in[6] - 0.8660253882f * in[8];
2938     out[7] = -a * in[4];
2939     out[8] = -0.8660253882f * in[6] + 0.5f * in[8];
2940     out[9] = -a * 0.7905694842f * in[12] + a * 0.6123724580f * in[14];
2941 
2942     out[10] = -in[10];
2943     out[11] = -a * 0.6123724580f * in[12] - a * 0.7905694842f * in[14];
2944     out[12] = a * 0.7905694842f * in[9] + a * 0.6123724580f * in[11];
2945     out[13] = -0.25f * in[13] - 0.9682458639f * in[15];
2946     out[14] = -a * 0.6123724580f * in[9] + a * 0.7905694842f * in[11];
2947     out[15] = -0.9682458639f * in[13] + 0.25f * in[15];
2948     if (order == 4)
2949         return;
2950 
2951     out[16] = -a * 0.9354143739f * in[21] + a * 0.3535533845f * in[23];
2952     out[17] = -0.75f * in[17] + 0.6614378095f * in[19];
2953     out[18] = -a * 0.3535533845f * in[21] - a * 0.9354143739f * in[23];
2954     out[19] = 0.6614378095f * in[17] + 0.75f * in[19];
2955     out[20] = 0.375f * in[20] + 0.5590170026f * in[22] + 0.7395099998f * in[24];
2956     out[21] = a * 0.9354143739f * in[16] + a * 0.3535533845f * in[18];
2957     out[22] = 0.5590170026f * in[20] + 0.5f * in[22] - 0.6614378691f * in[24];
2958     out[23] = -a * 0.3535533845f * in[16] + a * 0.9354143739f * in[18];
2959     out[24] = 0.7395099998f * in[20] - 0.6614378691f * in[22] + 0.125f * in[24];
2960     if (order == 5)
2961         return;
2962 
2963     out[25] = a * 0.7015607357f * in[30] - a * 0.6846531630f * in[32] + a * 0.1976423711f * in[34];
2964     out[26] = -0.5f * in[26] + 0.8660253882f * in[28];
2965     out[27] = a * 0.5229125023f * in[30] + a * 0.3061861992f * in[32] - a * 0.7954951525f * in[34];
2966     out[28] = 0.8660253882f * in[26] + 0.5f * in[28];
2967     out[29] = a * 0.4841229022f * in[30] + a * 0.6614378691f * in[32] + a * 0.5728219748f * in[34];
2968     out[30] = -a * 0.7015607357f * in[25] - a * 0.5229125023f * in[27] - a * 0.4841229022f * in[29];
2969     out[31] = 0.125f * in[31] + 0.4050463140f * in[33] + 0.9057110548f * in[35];
2970     out[32] = a * 0.6846531630f * in[25] - a * 0.3061861992f * in[27] - a * 0.6614378691f * in[29];
2971     out[33] = 0.4050463140f * in[31] + 0.8125f * in[33] - 0.4192627370f * in[35];
2972     out[34] = -a * 0.1976423711f * in[25] + a * 0.7954951525f * in[27] - a * 0.5728219748f * in[29];
2973     out[35] = 0.9057110548f * in[31] - 0.4192627370f * in[33] + 0.0624999329f * in[35];
2974 }
2975 
2976 HRESULT WINAPI D3DXSHProjectCubeMap(UINT order, IDirect3DCubeTexture9 *cubemap, FLOAT *rout, FLOAT *gout, FLOAT *bout)
2977 {
2978     FIXME("order %u, cubemap %p, rout %p, gout %p, bout %p: stub!\n", order, cubemap, rout, gout, bout);
2979 
2980     if(!cubemap || order < D3DXSH_MINORDER || order > D3DXSH_MAXORDER)
2981         return D3DERR_INVALIDCALL;
2982 
2983     *rout = 0.0f;
2984     *gout = 0.0f;
2985     *bout = 0.0f;
2986 
2987     return D3D_OK;
2988 }
2989 
2990 FLOAT* WINAPI D3DXSHRotate(FLOAT *out, UINT order, const D3DXMATRIX *matrix, const FLOAT *in)
2991 {
2992     FLOAT alpha, beta, gamma, sinb, temp[36], temp1[36];
2993 
2994     TRACE("out %p, order %u, matrix %p, in %p\n", out, order, matrix, in);
2995 
2996     out[0] = in[0];
2997 
2998     if ((order > D3DXSH_MAXORDER) || (order < D3DXSH_MINORDER))
2999         return out;
3000 
3001     if (order <= 3)
3002     {
3003         out[1] = matrix->u.m[1][1] * in[1] - matrix->u.m[2][1] * in[2] + matrix->u.m[0][1] * in[3];
3004         out[2] = -matrix->u.m[1][2] * in[1] + matrix->u.m[2][2] * in[2] - matrix->u.m[0][2] * in[3];
3005         out[3] = matrix->u.m[1][0] * in[1] - matrix->u.m[2][0] * in[2] + matrix->u.m[0][0] * in[3];
3006 
3007         if (order == 3)
3008         {
3009             FLOAT coeff[]={
3010                 matrix->u.m[1][0] * matrix->u.m[0][0], matrix->u.m[1][1] * matrix->u.m[0][1],
3011                 matrix->u.m[1][1] * matrix->u.m[2][1], matrix->u.m[1][0] * matrix->u.m[2][0],
3012                 matrix->u.m[2][0] * matrix->u.m[2][0], matrix->u.m[2][1] * matrix->u.m[2][1],
3013                 matrix->u.m[0][0] * matrix->u.m[2][0], matrix->u.m[0][1] * matrix->u.m[2][1],
3014                 matrix->u.m[0][1] * matrix->u.m[0][1], matrix->u.m[1][0] * matrix->u.m[1][0],
3015                 matrix->u.m[1][1] * matrix->u.m[1][1], matrix->u.m[0][0] * matrix->u.m[0][0], };
3016 
3017             out[4] = (matrix->u.m[1][1] * matrix->u.m[0][0] + matrix->u.m[0][1] * matrix->u.m[1][0]) * in[4];
3018             out[4] -= (matrix->u.m[1][0] * matrix->u.m[2][1] + matrix->u.m[1][1] * matrix->u.m[2][0]) * in[5];
3019             out[4] += 1.7320508076f * matrix->u.m[2][0] * matrix->u.m[2][1] * in[6];
3020             out[4] -= (matrix->u.m[0][1] * matrix->u.m[2][0] + matrix->u.m[0][0] * matrix->u.m[2][1]) * in[7];
3021             out[4] += (matrix->u.m[0][0] * matrix->u.m[0][1] - matrix->u.m[1][0] * matrix->u.m[1][1]) * in[8];
3022 
3023             out[5] = (matrix->u.m[1][1] * matrix->u.m[2][2] + matrix->u.m[1][2] * matrix->u.m[2][1]) * in[5];
3024             out[5] -= (matrix->u.m[1][1] * matrix->u.m[0][2] + matrix->u.m[1][2] * matrix->u.m[0][1]) * in[4];
3025             out[5] -= 1.7320508076f * matrix->u.m[2][2] * matrix->u.m[2][1] * in[6];
3026             out[5] += (matrix->u.m[0][2] * matrix->u.m[2][1] + matrix->u.m[0][1] * matrix->u.m[2][2]) * in[7];
3027             out[5] -= (matrix->u.m[0][1] * matrix->u.m[0][2] - matrix->u.m[1][1] * matrix->u.m[1][2]) * in[8];
3028 
3029             out[6] = (matrix->u.m[2][2] * matrix->u.m[2][2] - 0.5f * (coeff[4] + coeff[5])) * in[6];
3030             out[6] -= (0.5773502692f * (coeff[0] + coeff[1]) - 1.1547005384f * matrix->u.m[1][2] * matrix->u.m[0][2]) * in[4];
3031             out[6] += (0.5773502692f * (coeff[2] + coeff[3]) - 1.1547005384f * matrix->u.m[1][2] * matrix->u.m[2][2]) * in[5];
3032             out[6] += (0.5773502692f * (coeff[6] + coeff[7]) - 1.1547005384f * matrix->u.m[0][2] * matrix->u.m[2][2]) * in[7];
3033             out[6] += (0.2886751347f * (coeff[9] - coeff[8] + coeff[10] - coeff[11]) - 0.5773502692f *
3034                   (matrix->u.m[1][2] * matrix->u.m[1][2] - matrix->u.m[0][2] * matrix->u.m[0][2])) * in[8];
3035 
3036             out[7] = (matrix->u.m[0][0] * matrix->u.m[2][2] + matrix->u.m[0][2] * matrix->u.m[2][0]) * in[7];
3037             out[7] -= (matrix->u.m[1][0] * matrix->u.m[0][2] + matrix->u.m[1][2] * matrix->u.m[0][0]) * in[4];
3038             out[7] += (matrix->u.m[1][0] * matrix->u.m[2][2] + matrix->u.m[1][2] * matrix->u.m[2][0]) * in[5];
3039             out[7] -= 1.7320508076f * matrix->u.m[2][2] * matrix->u.m[2][0] * in[6];
3040             out[7] -= (matrix->u.m[0][0] * matrix->u.m[0][2] - matrix->u.m[1][0] * matrix->u.m[1][2]) * in[8];
3041 
3042             out[8] = 0.5f * (coeff[11] - coeff[8] - coeff[9] + coeff[10]) * in[8];
3043             out[8] += (coeff[0] - coeff[1]) * in[4];
3044             out[8] += (coeff[2] - coeff[3]) * in[5];
3045             out[8] += 0.86602540f * (coeff[4] - coeff[5]) * in[6];
3046             out[8] += (coeff[7] - coeff[6]) * in[7];
3047         }
3048 
3049         return out;
3050     }
3051 
3052     if (fabsf(matrix->u.m[2][2]) != 1.0f)
3053     {
3054         sinb = sqrtf(1.0f - matrix->u.m[2][2] * matrix->u.m[2][2]);
3055         alpha = atan2f(matrix->u.m[2][1] / sinb, matrix->u.m[2][0] / sinb);
3056         beta = atan2f(sinb, matrix->u.m[2][2]);
3057         gamma = atan2f(matrix->u.m[1][2] / sinb, -matrix->u.m[0][2] / sinb);
3058     }
3059     else
3060     {
3061         alpha = atan2f(matrix->u.m[0][1], matrix->u.m[0][0]);
3062         beta = 0.0f;
3063         gamma = 0.0f;
3064     }
3065 
3066     D3DXSHRotateZ(temp, order, gamma, in);
3067     rotate_X(temp1, order, 1.0f, temp);
3068     D3DXSHRotateZ(temp, order, beta, temp1);
3069     rotate_X(temp1, order, -1.0f, temp);
3070     D3DXSHRotateZ(out, order, alpha, temp1);
3071 
3072     return out;
3073 }
3074 
3075 FLOAT * WINAPI D3DXSHRotateZ(FLOAT *out, UINT order, FLOAT angle, const FLOAT *in)
3076 {
3077     UINT i, sum = 0;
3078     FLOAT c[5], s[5];
3079 
3080     TRACE("out %p, order %u, angle %f, in %p\n", out, order, angle, in);
3081 
3082     order = min(max(order, D3DXSH_MINORDER), D3DXSH_MAXORDER);
3083 
3084     out[0] = in[0];
3085 
3086     for (i = 1; i < order; i++)
3087     {
3088         UINT j;
3089 
3090         c[i - 1] = cosf(i * angle);
3091         s[i - 1] = sinf(i * angle);
3092         sum += i * 2;
3093 
3094         out[sum - i] = c[i - 1] * in[sum - i];
3095         out[sum - i] += s[i - 1] * in[sum + i];
3096         for (j = i - 1; j > 0; j--)
3097         {
3098             out[sum - j] = 0.0f;
3099             out[sum - j] = c[j - 1] * in[sum - j];
3100             out[sum - j] += s[j - 1] * in[sum + j];
3101         }
3102 
3103         if (in == out)
3104             out[sum] = 0.0f;
3105         else
3106             out[sum] = in[sum];
3107 
3108         for (j = 1; j < i; j++)
3109         {
3110             out[sum + j] = 0.0f;
3111             out[sum + j] = -s[j - 1] * in[sum - j];
3112             out[sum + j] += c[j - 1] * in[sum + j];
3113         }
3114         out[sum + i] = -s[i - 1] * in[sum - i];
3115         out[sum + i] += c[i - 1] * in[sum + i];
3116     }
3117 
3118     return out;
3119 }
3120 
3121 FLOAT* WINAPI D3DXSHScale(FLOAT *out, UINT order, const FLOAT *a, const FLOAT scale)
3122 {
3123     UINT i;
3124 
3125     TRACE("out %p, order %u, a %p, scale %f\n", out, order, a, scale);
3126 
3127     for (i = 0; i < order * order; i++)
3128         out[i] = a[i] * scale;
3129 
3130     return out;
3131 }
3132