xref: /reactos/dll/directx/wine/d3dx9_36/math.c (revision 8cc1ec1b)
1 /*
2  * Mathematical operations specific to D3DX9.
3  *
4  * Copyright (C) 2008 David Adam
5  * Copyright (C) 2008 Luis Busquets
6  * Copyright (C) 2008 Jérôme Gardou
7  * Copyright (C) 2008 Philip Nilsson
8  * Copyright (C) 2008 Henri Verbeet
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24 
25 #include "config.h"
26 #include "wine/port.h"
27 
28 #include "d3dx9_private.h"
29 
30 WINE_DEFAULT_DEBUG_CHANNEL(d3dx);
31 
32 struct ID3DXMatrixStackImpl
33 {
34   ID3DXMatrixStack ID3DXMatrixStack_iface;
35   LONG ref;
36 
37   unsigned int current;
38   unsigned int stack_size;
39   D3DXMATRIX *stack;
40 };
41 
42 static const unsigned int INITIAL_STACK_SIZE = 32;
43 
44 /*_________________D3DXColor____________________*/
45 
46 D3DXCOLOR* WINAPI D3DXColorAdjustContrast(D3DXCOLOR *pout, const D3DXCOLOR *pc, FLOAT s)
47 {
48     TRACE("pout %p, pc %p, s %f\n", pout, pc, s);
49 
50     pout->r = 0.5f + s * (pc->r - 0.5f);
51     pout->g = 0.5f + s * (pc->g - 0.5f);
52     pout->b = 0.5f + s * (pc->b - 0.5f);
53     pout->a = pc->a;
54     return pout;
55 }
56 
57 D3DXCOLOR* WINAPI D3DXColorAdjustSaturation(D3DXCOLOR *pout, const D3DXCOLOR *pc, FLOAT s)
58 {
59     FLOAT grey;
60 
61     TRACE("pout %p, pc %p, s %f\n", pout, pc, s);
62 
63     grey = pc->r * 0.2125f + pc->g * 0.7154f + pc->b * 0.0721f;
64     pout->r = grey + s * (pc->r - grey);
65     pout->g = grey + s * (pc->g - grey);
66     pout->b = grey + s * (pc->b - grey);
67     pout->a = pc->a;
68     return pout;
69 }
70 
71 /*_________________Misc__________________________*/
72 
73 FLOAT WINAPI D3DXFresnelTerm(FLOAT costheta, FLOAT refractionindex)
74 {
75     FLOAT a, d, g, result;
76 
77     TRACE("costheta %f, refractionindex %f\n", costheta, refractionindex);
78 
79     g = sqrtf(refractionindex * refractionindex + costheta * costheta - 1.0f);
80     a = g + costheta;
81     d = g - costheta;
82     result = (costheta * a - 1.0f) * (costheta * a - 1.0f) / ((costheta * d + 1.0f) * (costheta * d + 1.0f)) + 1.0f;
83     result *= 0.5f * d * d / (a * a);
84 
85     return result;
86 }
87 
88 /*_________________D3DXMatrix____________________*/
89 
90 D3DXMATRIX * WINAPI D3DXMatrixAffineTransformation(D3DXMATRIX *out, FLOAT scaling, const D3DXVECTOR3 *rotationcenter,
91         const D3DXQUATERNION *rotation, const D3DXVECTOR3 *translation)
92 {
93     TRACE("out %p, scaling %f, rotationcenter %p, rotation %p, translation %p\n",
94             out, scaling, rotationcenter, rotation, translation);
95 
96     D3DXMatrixIdentity(out);
97 
98     if (rotation)
99     {
100         FLOAT temp00, temp01, temp02, temp10, temp11, temp12, temp20, temp21, temp22;
101 
102         temp00 = 1.0f - 2.0f * (rotation->y * rotation->y + rotation->z * rotation->z);
103         temp01 = 2.0f * (rotation->x * rotation->y + rotation->z * rotation->w);
104         temp02 = 2.0f * (rotation->x * rotation->z - rotation->y * rotation->w);
105         temp10 = 2.0f * (rotation->x * rotation->y - rotation->z * rotation->w);
106         temp11 = 1.0f - 2.0f * (rotation->x * rotation->x + rotation->z * rotation->z);
107         temp12 = 2.0f * (rotation->y * rotation->z + rotation->x * rotation->w);
108         temp20 = 2.0f * (rotation->x * rotation->z + rotation->y * rotation->w);
109         temp21 = 2.0f * (rotation->y * rotation->z - rotation->x * rotation->w);
110         temp22 = 1.0f - 2.0f * (rotation->x * rotation->x + rotation->y * rotation->y);
111 
112         out->u.m[0][0] = scaling * temp00;
113         out->u.m[0][1] = scaling * temp01;
114         out->u.m[0][2] = scaling * temp02;
115         out->u.m[1][0] = scaling * temp10;
116         out->u.m[1][1] = scaling * temp11;
117         out->u.m[1][2] = scaling * temp12;
118         out->u.m[2][0] = scaling * temp20;
119         out->u.m[2][1] = scaling * temp21;
120         out->u.m[2][2] = scaling * temp22;
121 
122         if (rotationcenter)
123         {
124             out->u.m[3][0] = rotationcenter->x * (1.0f - temp00) - rotationcenter->y * temp10
125                     - rotationcenter->z * temp20;
126             out->u.m[3][1] = rotationcenter->y * (1.0f - temp11) - rotationcenter->x * temp01
127                     - rotationcenter->z * temp21;
128             out->u.m[3][2] = rotationcenter->z * (1.0f - temp22) - rotationcenter->x * temp02
129                     - rotationcenter->y * temp12;
130         }
131     }
132     else
133     {
134         out->u.m[0][0] = scaling;
135         out->u.m[1][1] = scaling;
136         out->u.m[2][2] = scaling;
137     }
138 
139     if (translation)
140     {
141         out->u.m[3][0] += translation->x;
142         out->u.m[3][1] += translation->y;
143         out->u.m[3][2] += translation->z;
144     }
145 
146     return out;
147 }
148 
149 D3DXMATRIX * WINAPI D3DXMatrixAffineTransformation2D(D3DXMATRIX *out, FLOAT scaling,
150         const D3DXVECTOR2 *rotationcenter, FLOAT rotation, const D3DXVECTOR2 *translation)
151 {
152     FLOAT tmp1, tmp2, s;
153 
154     TRACE("out %p, scaling %f, rotationcenter %p, rotation %f, translation %p\n",
155             out, scaling, rotationcenter, rotation, translation);
156 
157     s = sinf(rotation / 2.0f);
158     tmp1 = 1.0f - 2.0f * s * s;
159     tmp2 = 2.0f * s * cosf(rotation / 2.0f);
160 
161     D3DXMatrixIdentity(out);
162     out->u.m[0][0] = scaling * tmp1;
163     out->u.m[0][1] = scaling * tmp2;
164     out->u.m[1][0] = -scaling * tmp2;
165     out->u.m[1][1] = scaling * tmp1;
166 
167     if (rotationcenter)
168     {
169         FLOAT x, y;
170 
171         x = rotationcenter->x;
172         y = rotationcenter->y;
173 
174         out->u.m[3][0] = y * tmp2 - x * tmp1 + x;
175         out->u.m[3][1] = -x * tmp2 - y * tmp1 + y;
176     }
177 
178     if (translation)
179     {
180         out->u.m[3][0] += translation->x;
181         out->u.m[3][1] += translation->y;
182     }
183 
184     return out;
185 }
186 
187 HRESULT WINAPI D3DXMatrixDecompose(D3DXVECTOR3 *poutscale, D3DXQUATERNION *poutrotation, D3DXVECTOR3 *pouttranslation, const D3DXMATRIX *pm)
188 {
189     D3DXMATRIX normalized;
190     D3DXVECTOR3 vec;
191 
192     TRACE("poutscale %p, poutrotation %p, pouttranslation %p, pm %p\n", poutscale, poutrotation, pouttranslation, pm);
193 
194     /*Compute the scaling part.*/
195     vec.x=pm->u.m[0][0];
196     vec.y=pm->u.m[0][1];
197     vec.z=pm->u.m[0][2];
198     poutscale->x=D3DXVec3Length(&vec);
199 
200     vec.x=pm->u.m[1][0];
201     vec.y=pm->u.m[1][1];
202     vec.z=pm->u.m[1][2];
203     poutscale->y=D3DXVec3Length(&vec);
204 
205     vec.x=pm->u.m[2][0];
206     vec.y=pm->u.m[2][1];
207     vec.z=pm->u.m[2][2];
208     poutscale->z=D3DXVec3Length(&vec);
209 
210     /*Compute the translation part.*/
211     pouttranslation->x=pm->u.m[3][0];
212     pouttranslation->y=pm->u.m[3][1];
213     pouttranslation->z=pm->u.m[3][2];
214 
215     /*Let's calculate the rotation now*/
216     if ( (poutscale->x == 0.0f) || (poutscale->y == 0.0f) || (poutscale->z == 0.0f) ) return D3DERR_INVALIDCALL;
217 
218     normalized.u.m[0][0]=pm->u.m[0][0]/poutscale->x;
219     normalized.u.m[0][1]=pm->u.m[0][1]/poutscale->x;
220     normalized.u.m[0][2]=pm->u.m[0][2]/poutscale->x;
221     normalized.u.m[1][0]=pm->u.m[1][0]/poutscale->y;
222     normalized.u.m[1][1]=pm->u.m[1][1]/poutscale->y;
223     normalized.u.m[1][2]=pm->u.m[1][2]/poutscale->y;
224     normalized.u.m[2][0]=pm->u.m[2][0]/poutscale->z;
225     normalized.u.m[2][1]=pm->u.m[2][1]/poutscale->z;
226     normalized.u.m[2][2]=pm->u.m[2][2]/poutscale->z;
227 
228     D3DXQuaternionRotationMatrix(poutrotation,&normalized);
229     return S_OK;
230 }
231 
232 FLOAT WINAPI D3DXMatrixDeterminant(const D3DXMATRIX *pm)
233 {
234     FLOAT t[3], v[4];
235 
236     TRACE("pm %p\n", pm);
237 
238     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
239     t[1] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
240     t[2] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
241     v[0] = pm->u.m[1][1] * t[0] - pm->u.m[2][1] * t[1] + pm->u.m[3][1] * t[2];
242     v[1] = -pm->u.m[1][0] * t[0] + pm->u.m[2][0] * t[1] - pm->u.m[3][0] * t[2];
243 
244     t[0] = pm->u.m[1][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[1][1];
245     t[1] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
246     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
247     v[2] = pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1] + pm->u.m[1][3] * t[2];
248     v[3] = -pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] - pm->u.m[1][2] * t[2];
249 
250     return pm->u.m[0][0] * v[0] + pm->u.m[0][1] * v[1] +
251         pm->u.m[0][2] * v[2] + pm->u.m[0][3] * v[3];
252 }
253 
254 D3DXMATRIX* WINAPI D3DXMatrixInverse(D3DXMATRIX *pout, FLOAT *pdeterminant, const D3DXMATRIX *pm)
255 {
256     FLOAT det, t[3], v[16];
257     UINT i, j;
258 
259     TRACE("pout %p, pdeterminant %p, pm %p\n", pout, pdeterminant, pm);
260 
261     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
262     t[1] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
263     t[2] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
264     v[0] = pm->u.m[1][1] * t[0] - pm->u.m[2][1] * t[1] + pm->u.m[3][1] * t[2];
265     v[4] = -pm->u.m[1][0] * t[0] + pm->u.m[2][0] * t[1] - pm->u.m[3][0] * t[2];
266 
267     t[0] = pm->u.m[1][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[1][1];
268     t[1] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
269     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
270     v[8] = pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1] + pm->u.m[1][3] * t[2];
271     v[12] = -pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] - pm->u.m[1][2] * t[2];
272 
273     det = pm->u.m[0][0] * v[0] + pm->u.m[0][1] * v[4] +
274         pm->u.m[0][2] * v[8] + pm->u.m[0][3] * v[12];
275     if (det == 0.0f)
276         return NULL;
277     if (pdeterminant)
278         *pdeterminant = det;
279 
280     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
281     t[1] = pm->u.m[0][2] * pm->u.m[3][3] - pm->u.m[0][3] * pm->u.m[3][2];
282     t[2] = pm->u.m[0][2] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][2];
283     v[1] = -pm->u.m[0][1] * t[0] + pm->u.m[2][1] * t[1] - pm->u.m[3][1] * t[2];
284     v[5] = pm->u.m[0][0] * t[0] - pm->u.m[2][0] * t[1] + pm->u.m[3][0] * t[2];
285 
286     t[0] = pm->u.m[0][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[0][1];
287     t[1] = pm->u.m[3][0] * pm->u.m[0][1] - pm->u.m[0][0] * pm->u.m[3][1];
288     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
289     v[9] = -pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1]- pm->u.m[0][3] * t[2];
290     v[13] = pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] + pm->u.m[0][2] * t[2];
291 
292     t[0] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
293     t[1] = pm->u.m[0][2] * pm->u.m[3][3] - pm->u.m[0][3] * pm->u.m[3][2];
294     t[2] = pm->u.m[0][2] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][2];
295     v[2] = pm->u.m[0][1] * t[0] - pm->u.m[1][1] * t[1] + pm->u.m[3][1] * t[2];
296     v[6] = -pm->u.m[0][0] * t[0] + pm->u.m[1][0] * t[1] - pm->u.m[3][0] * t[2];
297 
298     t[0] = pm->u.m[0][0] * pm->u.m[1][1] - pm->u.m[1][0] * pm->u.m[0][1];
299     t[1] = pm->u.m[3][0] * pm->u.m[0][1] - pm->u.m[0][0] * pm->u.m[3][1];
300     t[2] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
301     v[10] = pm->u.m[3][3] * t[0] + pm->u.m[1][3] * t[1] + pm->u.m[0][3] * t[2];
302     v[14] = -pm->u.m[3][2] * t[0] - pm->u.m[1][2] * t[1] - pm->u.m[0][2] * t[2];
303 
304     t[0] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
305     t[1] = pm->u.m[0][2] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][2];
306     t[2] = pm->u.m[0][2] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][2];
307     v[3] = -pm->u.m[0][1] * t[0] + pm->u.m[1][1] * t[1] - pm->u.m[2][1] * t[2];
308     v[7] = pm->u.m[0][0] * t[0] - pm->u.m[1][0] * t[1] + pm->u.m[2][0] * t[2];
309 
310     v[11] = -pm->u.m[0][0] * (pm->u.m[1][1] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][1]) +
311         pm->u.m[1][0] * (pm->u.m[0][1] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][1]) -
312         pm->u.m[2][0] * (pm->u.m[0][1] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][1]);
313 
314     v[15] = pm->u.m[0][0] * (pm->u.m[1][1] * pm->u.m[2][2] - pm->u.m[1][2] * pm->u.m[2][1]) -
315         pm->u.m[1][0] * (pm->u.m[0][1] * pm->u.m[2][2] - pm->u.m[0][2] * pm->u.m[2][1]) +
316         pm->u.m[2][0] * (pm->u.m[0][1] * pm->u.m[1][2] - pm->u.m[0][2] * pm->u.m[1][1]);
317 
318     det = 1.0f / det;
319 
320     for (i = 0; i < 4; i++)
321         for (j = 0; j < 4; j++)
322             pout->u.m[i][j] = v[4 * i + j] * det;
323 
324     return pout;
325 }
326 
327 D3DXMATRIX * WINAPI D3DXMatrixLookAtLH(D3DXMATRIX *out, const D3DXVECTOR3 *eye, const D3DXVECTOR3 *at,
328         const D3DXVECTOR3 *up)
329 {
330     D3DXVECTOR3 right, upn, vec;
331 
332     TRACE("out %p, eye %p, at %p, up %p\n", out, eye, at, up);
333 
334     D3DXVec3Subtract(&vec, at, eye);
335     D3DXVec3Normalize(&vec, &vec);
336     D3DXVec3Cross(&right, up, &vec);
337     D3DXVec3Cross(&upn, &vec, &right);
338     D3DXVec3Normalize(&right, &right);
339     D3DXVec3Normalize(&upn, &upn);
340     out->u.m[0][0] = right.x;
341     out->u.m[1][0] = right.y;
342     out->u.m[2][0] = right.z;
343     out->u.m[3][0] = -D3DXVec3Dot(&right, eye);
344     out->u.m[0][1] = upn.x;
345     out->u.m[1][1] = upn.y;
346     out->u.m[2][1] = upn.z;
347     out->u.m[3][1] = -D3DXVec3Dot(&upn, eye);
348     out->u.m[0][2] = vec.x;
349     out->u.m[1][2] = vec.y;
350     out->u.m[2][2] = vec.z;
351     out->u.m[3][2] = -D3DXVec3Dot(&vec, eye);
352     out->u.m[0][3] = 0.0f;
353     out->u.m[1][3] = 0.0f;
354     out->u.m[2][3] = 0.0f;
355     out->u.m[3][3] = 1.0f;
356 
357     return out;
358 }
359 
360 D3DXMATRIX * WINAPI D3DXMatrixLookAtRH(D3DXMATRIX *out, const D3DXVECTOR3 *eye, const D3DXVECTOR3 *at,
361         const D3DXVECTOR3 *up)
362 {
363     D3DXVECTOR3 right, upn, vec;
364 
365     TRACE("out %p, eye %p, at %p, up %p\n", out, eye, at, up);
366 
367     D3DXVec3Subtract(&vec, at, eye);
368     D3DXVec3Normalize(&vec, &vec);
369     D3DXVec3Cross(&right, up, &vec);
370     D3DXVec3Cross(&upn, &vec, &right);
371     D3DXVec3Normalize(&right, &right);
372     D3DXVec3Normalize(&upn, &upn);
373     out->u.m[0][0] = -right.x;
374     out->u.m[1][0] = -right.y;
375     out->u.m[2][0] = -right.z;
376     out->u.m[3][0] = D3DXVec3Dot(&right, eye);
377     out->u.m[0][1] = upn.x;
378     out->u.m[1][1] = upn.y;
379     out->u.m[2][1] = upn.z;
380     out->u.m[3][1] = -D3DXVec3Dot(&upn, eye);
381     out->u.m[0][2] = -vec.x;
382     out->u.m[1][2] = -vec.y;
383     out->u.m[2][2] = -vec.z;
384     out->u.m[3][2] = D3DXVec3Dot(&vec, eye);
385     out->u.m[0][3] = 0.0f;
386     out->u.m[1][3] = 0.0f;
387     out->u.m[2][3] = 0.0f;
388     out->u.m[3][3] = 1.0f;
389 
390     return out;
391 }
392 
393 D3DXMATRIX* WINAPI D3DXMatrixMultiply(D3DXMATRIX *pout, const D3DXMATRIX *pm1, const D3DXMATRIX *pm2)
394 {
395     D3DXMATRIX out;
396     int i,j;
397 
398     TRACE("pout %p, pm1 %p, pm2 %p\n", pout, pm1, pm2);
399 
400     for (i=0; i<4; i++)
401     {
402         for (j=0; j<4; j++)
403         {
404             out.u.m[i][j] = pm1->u.m[i][0] * pm2->u.m[0][j] + pm1->u.m[i][1] * pm2->u.m[1][j] + pm1->u.m[i][2] * pm2->u.m[2][j] + pm1->u.m[i][3] * pm2->u.m[3][j];
405         }
406     }
407 
408     *pout = out;
409     return pout;
410 }
411 
412 D3DXMATRIX* WINAPI D3DXMatrixMultiplyTranspose(D3DXMATRIX *pout, const D3DXMATRIX *pm1, const D3DXMATRIX *pm2)
413 {
414     D3DXMATRIX temp;
415     int i, j;
416 
417     TRACE("pout %p, pm1 %p, pm2 %p\n", pout, pm1, pm2);
418 
419     for (i = 0; i < 4; i++)
420         for (j = 0; j < 4; j++)
421             temp.u.m[j][i] = pm1->u.m[i][0] * pm2->u.m[0][j] + pm1->u.m[i][1] * pm2->u.m[1][j] + pm1->u.m[i][2] * pm2->u.m[2][j] + pm1->u.m[i][3] * pm2->u.m[3][j];
422 
423     *pout = temp;
424     return pout;
425 }
426 
427 D3DXMATRIX* WINAPI D3DXMatrixOrthoLH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
428 {
429     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
430 
431     D3DXMatrixIdentity(pout);
432     pout->u.m[0][0] = 2.0f / w;
433     pout->u.m[1][1] = 2.0f / h;
434     pout->u.m[2][2] = 1.0f / (zf - zn);
435     pout->u.m[3][2] = zn / (zn - zf);
436     return pout;
437 }
438 
439 D3DXMATRIX* WINAPI D3DXMatrixOrthoOffCenterLH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
440 {
441     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
442 
443     D3DXMatrixIdentity(pout);
444     pout->u.m[0][0] = 2.0f / (r - l);
445     pout->u.m[1][1] = 2.0f / (t - b);
446     pout->u.m[2][2] = 1.0f / (zf -zn);
447     pout->u.m[3][0] = -1.0f -2.0f *l / (r - l);
448     pout->u.m[3][1] = 1.0f + 2.0f * t / (b - t);
449     pout->u.m[3][2] = zn / (zn -zf);
450     return pout;
451 }
452 
453 D3DXMATRIX* WINAPI D3DXMatrixOrthoOffCenterRH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
454 {
455     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
456 
457     D3DXMatrixIdentity(pout);
458     pout->u.m[0][0] = 2.0f / (r - l);
459     pout->u.m[1][1] = 2.0f / (t - b);
460     pout->u.m[2][2] = 1.0f / (zn -zf);
461     pout->u.m[3][0] = -1.0f -2.0f *l / (r - l);
462     pout->u.m[3][1] = 1.0f + 2.0f * t / (b - t);
463     pout->u.m[3][2] = zn / (zn -zf);
464     return pout;
465 }
466 
467 D3DXMATRIX* WINAPI D3DXMatrixOrthoRH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
468 {
469     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
470 
471     D3DXMatrixIdentity(pout);
472     pout->u.m[0][0] = 2.0f / w;
473     pout->u.m[1][1] = 2.0f / h;
474     pout->u.m[2][2] = 1.0f / (zn - zf);
475     pout->u.m[3][2] = zn / (zn - zf);
476     return pout;
477 }
478 
479 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveFovLH(D3DXMATRIX *pout, FLOAT fovy, FLOAT aspect, FLOAT zn, FLOAT zf)
480 {
481     TRACE("pout %p, fovy %f, aspect %f, zn %f, zf %f\n", pout, fovy, aspect, zn, zf);
482 
483     D3DXMatrixIdentity(pout);
484     pout->u.m[0][0] = 1.0f / (aspect * tanf(fovy/2.0f));
485     pout->u.m[1][1] = 1.0f / tanf(fovy/2.0f);
486     pout->u.m[2][2] = zf / (zf - zn);
487     pout->u.m[2][3] = 1.0f;
488     pout->u.m[3][2] = (zf * zn) / (zn - zf);
489     pout->u.m[3][3] = 0.0f;
490     return pout;
491 }
492 
493 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveFovRH(D3DXMATRIX *pout, FLOAT fovy, FLOAT aspect, FLOAT zn, FLOAT zf)
494 {
495     TRACE("pout %p, fovy %f, aspect %f, zn %f, zf %f\n", pout, fovy, aspect, zn, zf);
496 
497     D3DXMatrixIdentity(pout);
498     pout->u.m[0][0] = 1.0f / (aspect * tanf(fovy/2.0f));
499     pout->u.m[1][1] = 1.0f / tanf(fovy/2.0f);
500     pout->u.m[2][2] = zf / (zn - zf);
501     pout->u.m[2][3] = -1.0f;
502     pout->u.m[3][2] = (zf * zn) / (zn - zf);
503     pout->u.m[3][3] = 0.0f;
504     return pout;
505 }
506 
507 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveLH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
508 {
509     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
510 
511     D3DXMatrixIdentity(pout);
512     pout->u.m[0][0] = 2.0f * zn / w;
513     pout->u.m[1][1] = 2.0f * zn / h;
514     pout->u.m[2][2] = zf / (zf - zn);
515     pout->u.m[3][2] = (zn * zf) / (zn - zf);
516     pout->u.m[2][3] = 1.0f;
517     pout->u.m[3][3] = 0.0f;
518     return pout;
519 }
520 
521 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveOffCenterLH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
522 {
523     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
524 
525     D3DXMatrixIdentity(pout);
526     pout->u.m[0][0] = 2.0f * zn / (r - l);
527     pout->u.m[1][1] = -2.0f * zn / (b - t);
528     pout->u.m[2][0] = -1.0f - 2.0f * l / (r - l);
529     pout->u.m[2][1] = 1.0f + 2.0f * t / (b - t);
530     pout->u.m[2][2] = - zf / (zn - zf);
531     pout->u.m[3][2] = (zn * zf) / (zn -zf);
532     pout->u.m[2][3] = 1.0f;
533     pout->u.m[3][3] = 0.0f;
534     return pout;
535 }
536 
537 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveOffCenterRH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
538 {
539     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
540 
541     D3DXMatrixIdentity(pout);
542     pout->u.m[0][0] = 2.0f * zn / (r - l);
543     pout->u.m[1][1] = -2.0f * zn / (b - t);
544     pout->u.m[2][0] = 1.0f + 2.0f * l / (r - l);
545     pout->u.m[2][1] = -1.0f -2.0f * t / (b - t);
546     pout->u.m[2][2] = zf / (zn - zf);
547     pout->u.m[3][2] = (zn * zf) / (zn -zf);
548     pout->u.m[2][3] = -1.0f;
549     pout->u.m[3][3] = 0.0f;
550     return pout;
551 }
552 
553 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveRH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
554 {
555     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
556 
557     D3DXMatrixIdentity(pout);
558     pout->u.m[0][0] = 2.0f * zn / w;
559     pout->u.m[1][1] = 2.0f * zn / h;
560     pout->u.m[2][2] = zf / (zn - zf);
561     pout->u.m[3][2] = (zn * zf) / (zn - zf);
562     pout->u.m[2][3] = -1.0f;
563     pout->u.m[3][3] = 0.0f;
564     return pout;
565 }
566 
567 D3DXMATRIX* WINAPI D3DXMatrixReflect(D3DXMATRIX *pout, const D3DXPLANE *pplane)
568 {
569     D3DXPLANE Nplane;
570 
571     TRACE("pout %p, pplane %p\n", pout, pplane);
572 
573     D3DXPlaneNormalize(&Nplane, pplane);
574     D3DXMatrixIdentity(pout);
575     pout->u.m[0][0] = 1.0f - 2.0f * Nplane.a * Nplane.a;
576     pout->u.m[0][1] = -2.0f * Nplane.a * Nplane.b;
577     pout->u.m[0][2] = -2.0f * Nplane.a * Nplane.c;
578     pout->u.m[1][0] = -2.0f * Nplane.a * Nplane.b;
579     pout->u.m[1][1] = 1.0f - 2.0f * Nplane.b * Nplane.b;
580     pout->u.m[1][2] = -2.0f * Nplane.b * Nplane.c;
581     pout->u.m[2][0] = -2.0f * Nplane.c * Nplane.a;
582     pout->u.m[2][1] = -2.0f * Nplane.c * Nplane.b;
583     pout->u.m[2][2] = 1.0f - 2.0f * Nplane.c * Nplane.c;
584     pout->u.m[3][0] = -2.0f * Nplane.d * Nplane.a;
585     pout->u.m[3][1] = -2.0f * Nplane.d * Nplane.b;
586     pout->u.m[3][2] = -2.0f * Nplane.d * Nplane.c;
587     return pout;
588 }
589 
590 D3DXMATRIX * WINAPI D3DXMatrixRotationAxis(D3DXMATRIX *out, const D3DXVECTOR3 *v, FLOAT angle)
591 {
592     D3DXVECTOR3 nv;
593     FLOAT sangle, cangle, cdiff;
594 
595     TRACE("out %p, v %p, angle %f\n", out, v, angle);
596 
597     D3DXVec3Normalize(&nv, v);
598     sangle = sinf(angle);
599     cangle = cosf(angle);
600     cdiff = 1.0f - cangle;
601 
602     out->u.m[0][0] = cdiff * nv.x * nv.x + cangle;
603     out->u.m[1][0] = cdiff * nv.x * nv.y - sangle * nv.z;
604     out->u.m[2][0] = cdiff * nv.x * nv.z + sangle * nv.y;
605     out->u.m[3][0] = 0.0f;
606     out->u.m[0][1] = cdiff * nv.y * nv.x + sangle * nv.z;
607     out->u.m[1][1] = cdiff * nv.y * nv.y + cangle;
608     out->u.m[2][1] = cdiff * nv.y * nv.z - sangle * nv.x;
609     out->u.m[3][1] = 0.0f;
610     out->u.m[0][2] = cdiff * nv.z * nv.x - sangle * nv.y;
611     out->u.m[1][2] = cdiff * nv.z * nv.y + sangle * nv.x;
612     out->u.m[2][2] = cdiff * nv.z * nv.z + cangle;
613     out->u.m[3][2] = 0.0f;
614     out->u.m[0][3] = 0.0f;
615     out->u.m[1][3] = 0.0f;
616     out->u.m[2][3] = 0.0f;
617     out->u.m[3][3] = 1.0f;
618 
619     return out;
620 }
621 
622 D3DXMATRIX* WINAPI D3DXMatrixRotationQuaternion(D3DXMATRIX *pout, const D3DXQUATERNION *pq)
623 {
624     TRACE("pout %p, pq %p\n", pout, pq);
625 
626     D3DXMatrixIdentity(pout);
627     pout->u.m[0][0] = 1.0f - 2.0f * (pq->y * pq->y + pq->z * pq->z);
628     pout->u.m[0][1] = 2.0f * (pq->x *pq->y + pq->z * pq->w);
629     pout->u.m[0][2] = 2.0f * (pq->x * pq->z - pq->y * pq->w);
630     pout->u.m[1][0] = 2.0f * (pq->x * pq->y - pq->z * pq->w);
631     pout->u.m[1][1] = 1.0f - 2.0f * (pq->x * pq->x + pq->z * pq->z);
632     pout->u.m[1][2] = 2.0f * (pq->y *pq->z + pq->x *pq->w);
633     pout->u.m[2][0] = 2.0f * (pq->x * pq->z + pq->y * pq->w);
634     pout->u.m[2][1] = 2.0f * (pq->y *pq->z - pq->x *pq->w);
635     pout->u.m[2][2] = 1.0f - 2.0f * (pq->x * pq->x + pq->y * pq->y);
636     return pout;
637 }
638 
639 D3DXMATRIX* WINAPI D3DXMatrixRotationX(D3DXMATRIX *pout, FLOAT angle)
640 {
641     TRACE("pout %p, angle %f\n", pout, angle);
642 
643     D3DXMatrixIdentity(pout);
644     pout->u.m[1][1] = cosf(angle);
645     pout->u.m[2][2] = cosf(angle);
646     pout->u.m[1][2] = sinf(angle);
647     pout->u.m[2][1] = -sinf(angle);
648     return pout;
649 }
650 
651 D3DXMATRIX* WINAPI D3DXMatrixRotationY(D3DXMATRIX *pout, FLOAT angle)
652 {
653     TRACE("pout %p, angle %f\n", pout, angle);
654 
655     D3DXMatrixIdentity(pout);
656     pout->u.m[0][0] = cosf(angle);
657     pout->u.m[2][2] = cosf(angle);
658     pout->u.m[0][2] = -sinf(angle);
659     pout->u.m[2][0] = sinf(angle);
660     return pout;
661 }
662 
663 D3DXMATRIX * WINAPI D3DXMatrixRotationYawPitchRoll(D3DXMATRIX *out, FLOAT yaw, FLOAT pitch, FLOAT roll)
664 {
665     FLOAT sroll, croll, spitch, cpitch, syaw, cyaw;
666 
667     TRACE("out %p, yaw %f, pitch %f, roll %f\n", out, yaw, pitch, roll);
668 
669     sroll = sinf(roll);
670     croll = cosf(roll);
671     spitch = sinf(pitch);
672     cpitch = cosf(pitch);
673     syaw = sinf(yaw);
674     cyaw = cosf(yaw);
675 
676     out->u.m[0][0] = sroll * spitch * syaw + croll * cyaw;
677     out->u.m[0][1] = sroll * cpitch;
678     out->u.m[0][2] = sroll * spitch * cyaw - croll * syaw;
679     out->u.m[0][3] = 0.0f;
680     out->u.m[1][0] = croll * spitch * syaw - sroll * cyaw;
681     out->u.m[1][1] = croll * cpitch;
682     out->u.m[1][2] = croll * spitch * cyaw + sroll * syaw;
683     out->u.m[1][3] = 0.0f;
684     out->u.m[2][0] = cpitch * syaw;
685     out->u.m[2][1] = -spitch;
686     out->u.m[2][2] = cpitch * cyaw;
687     out->u.m[2][3] = 0.0f;
688     out->u.m[3][0] = 0.0f;
689     out->u.m[3][1] = 0.0f;
690     out->u.m[3][2] = 0.0f;
691     out->u.m[3][3] = 1.0f;
692 
693     return out;
694 }
695 
696 D3DXMATRIX* WINAPI D3DXMatrixRotationZ(D3DXMATRIX *pout, FLOAT angle)
697 {
698     TRACE("pout %p, angle %f\n", pout, angle);
699 
700     D3DXMatrixIdentity(pout);
701     pout->u.m[0][0] = cosf(angle);
702     pout->u.m[1][1] = cosf(angle);
703     pout->u.m[0][1] = sinf(angle);
704     pout->u.m[1][0] = -sinf(angle);
705     return pout;
706 }
707 
708 D3DXMATRIX* WINAPI D3DXMatrixScaling(D3DXMATRIX *pout, FLOAT sx, FLOAT sy, FLOAT sz)
709 {
710     TRACE("pout %p, sx %f, sy %f, sz %f\n", pout, sx, sy, sz);
711 
712     D3DXMatrixIdentity(pout);
713     pout->u.m[0][0] = sx;
714     pout->u.m[1][1] = sy;
715     pout->u.m[2][2] = sz;
716     return pout;
717 }
718 
719 D3DXMATRIX* WINAPI D3DXMatrixShadow(D3DXMATRIX *pout, const D3DXVECTOR4 *plight, const D3DXPLANE *pplane)
720 {
721     D3DXPLANE Nplane;
722     FLOAT dot;
723 
724     TRACE("pout %p, plight %p, pplane %p\n", pout, plight, pplane);
725 
726     D3DXPlaneNormalize(&Nplane, pplane);
727     dot = D3DXPlaneDot(&Nplane, plight);
728     pout->u.m[0][0] = dot - Nplane.a * plight->x;
729     pout->u.m[0][1] = -Nplane.a * plight->y;
730     pout->u.m[0][2] = -Nplane.a * plight->z;
731     pout->u.m[0][3] = -Nplane.a * plight->w;
732     pout->u.m[1][0] = -Nplane.b * plight->x;
733     pout->u.m[1][1] = dot - Nplane.b * plight->y;
734     pout->u.m[1][2] = -Nplane.b * plight->z;
735     pout->u.m[1][3] = -Nplane.b * plight->w;
736     pout->u.m[2][0] = -Nplane.c * plight->x;
737     pout->u.m[2][1] = -Nplane.c * plight->y;
738     pout->u.m[2][2] = dot - Nplane.c * plight->z;
739     pout->u.m[2][3] = -Nplane.c * plight->w;
740     pout->u.m[3][0] = -Nplane.d * plight->x;
741     pout->u.m[3][1] = -Nplane.d * plight->y;
742     pout->u.m[3][2] = -Nplane.d * plight->z;
743     pout->u.m[3][3] = dot - Nplane.d * plight->w;
744     return pout;
745 }
746 
747 D3DXMATRIX * WINAPI D3DXMatrixTransformation(D3DXMATRIX *out, const D3DXVECTOR3 *scaling_center,
748         const D3DXQUATERNION *scaling_rotation, const D3DXVECTOR3 *scaling,
749         const D3DXVECTOR3 *rotation_center, const D3DXQUATERNION *rotation,
750         const D3DXVECTOR3 *translation)
751 {
752     static const D3DXVECTOR3 zero_vector;
753     D3DXMATRIX m1, msr1, ms, msr, msc, mrc1, mr, mrc, mt;
754     D3DXVECTOR3 sc, rc;
755     D3DXQUATERNION q;
756 
757     TRACE("out %p, scaling_center %p, scaling_rotation %p, scaling %p, rotation_center %p,"
758             " rotation %p, translation %p.\n",
759             out, scaling_center, scaling_rotation, scaling, rotation_center, rotation, translation);
760 
761     if (scaling)
762     {
763         sc = scaling_center ? *scaling_center : zero_vector;
764         D3DXMatrixTranslation(&m1, -sc.x, -sc.y, -sc.z);
765         if (scaling_rotation)
766         {
767             q.x = -scaling_rotation->x;
768             q.y = -scaling_rotation->y;
769             q.z = -scaling_rotation->z;
770             q.w = scaling_rotation->w;
771             D3DXMatrixRotationQuaternion(&msr1, &q);
772             D3DXMatrixMultiply(&m1, &m1, &msr1);
773         }
774         D3DXMatrixScaling(&ms, scaling->x, scaling->y, scaling->z);
775         D3DXMatrixMultiply(&m1, &m1, &ms);
776         if (scaling_rotation)
777         {
778             D3DXMatrixRotationQuaternion(&msr, scaling_rotation);
779             D3DXMatrixMultiply(&m1, &m1, &msr);
780         }
781         D3DXMatrixTranslation(&msc, sc.x, sc.y, sc.z);
782         D3DXMatrixMultiply(&m1, &m1, &msc);
783     }
784     else
785     {
786         D3DXMatrixIdentity(&m1);
787     }
788 
789     if (rotation)
790     {
791         rc = rotation_center ? *rotation_center : zero_vector;
792         D3DXMatrixTranslation(&mrc1, -rc.x, -rc.y, -rc.z);
793         D3DXMatrixMultiply(&m1, &m1, &mrc1);
794         D3DXMatrixRotationQuaternion(&mr, rotation);
795         D3DXMatrixMultiply(&m1, &m1, &mr);
796         D3DXMatrixTranslation(&mrc, rc.x, rc.y, rc.z);
797         D3DXMatrixMultiply(&m1, &m1, &mrc);
798     }
799 
800     if (translation)
801     {
802         D3DXMatrixTranslation(&mt, translation->x, translation->y, translation->z);
803         D3DXMatrixMultiply(out, &m1, &mt);
804     }
805     else
806     {
807         *out = m1;
808     }
809 
810     return out;
811 }
812 
813 static void vec3_from_vec2(D3DXVECTOR3 *v3, const D3DXVECTOR2 *v2)
814 {
815     if (!v2)
816         return;
817 
818     v3->x = v2->x;
819     v3->y = v2->y;
820     v3->z = 0.0f;
821 }
822 
823 D3DXMATRIX * WINAPI D3DXMatrixTransformation2D(D3DXMATRIX *out, const D3DXVECTOR2 *scaling_center,
824         float scaling_rotation, const D3DXVECTOR2 *scaling, const D3DXVECTOR2 *rotation_center,
825         float rotation, const D3DXVECTOR2 *translation)
826 {
827     D3DXVECTOR3 r_c, s, s_c, t;
828     D3DXQUATERNION r, s_r;
829 
830     TRACE("out %p, scaling_center %p, scaling_rotation %.8e, scaling %p, rotation_center %p, "
831             "rotation %.8e, translation %p.\n",
832             out, scaling_center, scaling_rotation, scaling, rotation_center, rotation, translation);
833 
834     vec3_from_vec2(&s_c, scaling_center);
835     vec3_from_vec2(&s, scaling);
836     if (scaling)
837         s.z = 1.0f;
838     vec3_from_vec2(&r_c, rotation_center);
839     vec3_from_vec2(&t, translation);
840 
841     if (rotation)
842     {
843         r.w = cosf(rotation / 2.0f);
844         r.x = 0.0f;
845         r.y = 0.0f;
846         r.z = sinf(rotation / 2.0f);
847     }
848 
849     if (scaling_rotation)
850     {
851         s_r.w = cosf(scaling_rotation / 2.0f);
852         s_r.x = 0.0f;
853         s_r.y = 0.0f;
854         s_r.z = sinf(scaling_rotation / 2.0f);
855     }
856 
857     return D3DXMatrixTransformation(out, scaling_center ? &s_c : NULL,
858             scaling_rotation ? &s_r : NULL, scaling ? &s : NULL, rotation_center ? &r_c: NULL,
859             rotation ? &r : NULL, translation ? &t : NULL);
860 }
861 
862 D3DXMATRIX* WINAPI D3DXMatrixTranslation(D3DXMATRIX *pout, FLOAT x, FLOAT y, FLOAT z)
863 {
864     TRACE("pout %p, x %f, y %f, z %f\n", pout, x, y, z);
865 
866     D3DXMatrixIdentity(pout);
867     pout->u.m[3][0] = x;
868     pout->u.m[3][1] = y;
869     pout->u.m[3][2] = z;
870     return pout;
871 }
872 
873 D3DXMATRIX* WINAPI D3DXMatrixTranspose(D3DXMATRIX *pout, const D3DXMATRIX *pm)
874 {
875     const D3DXMATRIX m = *pm;
876     int i,j;
877 
878     TRACE("pout %p, pm %p\n", pout, pm);
879 
880     for (i=0; i<4; i++)
881         for (j=0; j<4; j++) pout->u.m[i][j] = m.u.m[j][i];
882 
883     return pout;
884 }
885 
886 /*_________________D3DXMatrixStack____________________*/
887 
888 
889 static inline struct ID3DXMatrixStackImpl *impl_from_ID3DXMatrixStack(ID3DXMatrixStack *iface)
890 {
891   return CONTAINING_RECORD(iface, struct ID3DXMatrixStackImpl, ID3DXMatrixStack_iface);
892 }
893 
894 static HRESULT WINAPI ID3DXMatrixStackImpl_QueryInterface(ID3DXMatrixStack *iface, REFIID riid, void **out)
895 {
896     TRACE("iface %p, riid %s, out %p.\n", iface, debugstr_guid(riid), out);
897 
898     if (IsEqualGUID(riid, &IID_ID3DXMatrixStack)
899             || IsEqualGUID(riid, &IID_IUnknown))
900     {
901         ID3DXMatrixStack_AddRef(iface);
902         *out = iface;
903         return S_OK;
904     }
905 
906     WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));
907 
908     *out = NULL;
909     return E_NOINTERFACE;
910 }
911 
912 static ULONG WINAPI ID3DXMatrixStackImpl_AddRef(ID3DXMatrixStack *iface)
913 {
914     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
915     ULONG ref = InterlockedIncrement(&This->ref);
916     TRACE("(%p) : AddRef from %d\n", This, ref - 1);
917     return ref;
918 }
919 
920 static ULONG WINAPI ID3DXMatrixStackImpl_Release(ID3DXMatrixStack *iface)
921 {
922     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
923     ULONG ref = InterlockedDecrement(&This->ref);
924     if (!ref)
925     {
926         HeapFree(GetProcessHeap(), 0, This->stack);
927         HeapFree(GetProcessHeap(), 0, This);
928     }
929     TRACE("(%p) : ReleaseRef to %d\n", This, ref);
930     return ref;
931 }
932 
933 static D3DXMATRIX* WINAPI ID3DXMatrixStackImpl_GetTop(ID3DXMatrixStack *iface)
934 {
935     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
936 
937     TRACE("iface %p\n", iface);
938 
939     return &This->stack[This->current];
940 }
941 
942 static HRESULT WINAPI ID3DXMatrixStackImpl_LoadIdentity(ID3DXMatrixStack *iface)
943 {
944     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
945 
946     TRACE("iface %p\n", iface);
947 
948     D3DXMatrixIdentity(&This->stack[This->current]);
949 
950     return D3D_OK;
951 }
952 
953 static HRESULT WINAPI ID3DXMatrixStackImpl_LoadMatrix(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
954 {
955     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
956 
957     TRACE("iface %p, pm %p\n", iface, pm);
958 
959     This->stack[This->current] = *pm;
960 
961     return D3D_OK;
962 }
963 
964 static HRESULT WINAPI ID3DXMatrixStackImpl_MultMatrix(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
965 {
966     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
967 
968     TRACE("iface %p, pm %p\n", iface, pm);
969 
970     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], pm);
971 
972     return D3D_OK;
973 }
974 
975 static HRESULT WINAPI ID3DXMatrixStackImpl_MultMatrixLocal(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
976 {
977     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
978 
979     TRACE("iface %p, pm %p\n", iface, pm);
980 
981     D3DXMatrixMultiply(&This->stack[This->current], pm, &This->stack[This->current]);
982 
983     return D3D_OK;
984 }
985 
986 static HRESULT WINAPI ID3DXMatrixStackImpl_Pop(ID3DXMatrixStack *iface)
987 {
988     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
989 
990     TRACE("iface %p\n", iface);
991 
992     /* Popping the last element on the stack returns D3D_OK, but does nothing. */
993     if (!This->current) return D3D_OK;
994 
995     if (This->current <= This->stack_size / 4 && This->stack_size >= INITIAL_STACK_SIZE * 2)
996     {
997         unsigned int new_size;
998         D3DXMATRIX *new_stack;
999 
1000         new_size = This->stack_size / 2;
1001         new_stack = HeapReAlloc(GetProcessHeap(), 0, This->stack, new_size * sizeof(*new_stack));
1002         if (new_stack)
1003         {
1004             This->stack_size = new_size;
1005             This->stack = new_stack;
1006         }
1007     }
1008 
1009     --This->current;
1010 
1011     return D3D_OK;
1012 }
1013 
1014 static HRESULT WINAPI ID3DXMatrixStackImpl_Push(ID3DXMatrixStack *iface)
1015 {
1016     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1017 
1018     TRACE("iface %p\n", iface);
1019 
1020     if (This->current == This->stack_size - 1)
1021     {
1022         unsigned int new_size;
1023         D3DXMATRIX *new_stack;
1024 
1025         if (This->stack_size > UINT_MAX / 2) return E_OUTOFMEMORY;
1026 
1027         new_size = This->stack_size * 2;
1028         new_stack = HeapReAlloc(GetProcessHeap(), 0, This->stack, new_size * sizeof(*new_stack));
1029         if (!new_stack) return E_OUTOFMEMORY;
1030 
1031         This->stack_size = new_size;
1032         This->stack = new_stack;
1033     }
1034 
1035     ++This->current;
1036     This->stack[This->current] = This->stack[This->current - 1];
1037 
1038     return D3D_OK;
1039 }
1040 
1041 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateAxis(ID3DXMatrixStack *iface, const D3DXVECTOR3 *pv, FLOAT angle)
1042 {
1043     D3DXMATRIX temp;
1044     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1045 
1046     TRACE("iface %p, pv %p, angle %f\n", iface, pv, angle);
1047 
1048     D3DXMatrixRotationAxis(&temp, pv, angle);
1049     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1050 
1051     return D3D_OK;
1052 }
1053 
1054 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateAxisLocal(ID3DXMatrixStack *iface, const D3DXVECTOR3 *pv, FLOAT angle)
1055 {
1056     D3DXMATRIX temp;
1057     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1058 
1059     TRACE("iface %p, pv %p, angle %f\n", iface, pv, angle);
1060 
1061     D3DXMatrixRotationAxis(&temp, pv, angle);
1062     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1063 
1064     return D3D_OK;
1065 }
1066 
1067 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateYawPitchRoll(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1068 {
1069     D3DXMATRIX temp;
1070     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1071 
1072     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1073 
1074     D3DXMatrixRotationYawPitchRoll(&temp, x, y, z);
1075     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1076 
1077     return D3D_OK;
1078 }
1079 
1080 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateYawPitchRollLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1081 {
1082     D3DXMATRIX temp;
1083     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1084 
1085     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1086 
1087     D3DXMatrixRotationYawPitchRoll(&temp, x, y, z);
1088     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1089 
1090     return D3D_OK;
1091 }
1092 
1093 static HRESULT WINAPI ID3DXMatrixStackImpl_Scale(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1094 {
1095     D3DXMATRIX temp;
1096     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1097 
1098     TRACE("iface %p,x %f, y %f, z %f\n", iface, x, y, z);
1099 
1100     D3DXMatrixScaling(&temp, x, y, z);
1101     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1102 
1103     return D3D_OK;
1104 }
1105 
1106 static HRESULT WINAPI ID3DXMatrixStackImpl_ScaleLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1107 {
1108     D3DXMATRIX temp;
1109     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1110 
1111     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1112 
1113     D3DXMatrixScaling(&temp, x, y, z);
1114     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1115 
1116     return D3D_OK;
1117 }
1118 
1119 static HRESULT WINAPI ID3DXMatrixStackImpl_Translate(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1120 {
1121     D3DXMATRIX temp;
1122     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1123 
1124     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1125 
1126     D3DXMatrixTranslation(&temp, x, y, z);
1127     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1128 
1129     return D3D_OK;
1130 }
1131 
1132 static HRESULT WINAPI ID3DXMatrixStackImpl_TranslateLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1133 {
1134     D3DXMATRIX temp;
1135     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1136 
1137     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1138 
1139     D3DXMatrixTranslation(&temp, x, y, z);
1140     D3DXMatrixMultiply(&This->stack[This->current], &temp,&This->stack[This->current]);
1141 
1142     return D3D_OK;
1143 }
1144 
1145 static const ID3DXMatrixStackVtbl ID3DXMatrixStack_Vtbl =
1146 {
1147     ID3DXMatrixStackImpl_QueryInterface,
1148     ID3DXMatrixStackImpl_AddRef,
1149     ID3DXMatrixStackImpl_Release,
1150     ID3DXMatrixStackImpl_Pop,
1151     ID3DXMatrixStackImpl_Push,
1152     ID3DXMatrixStackImpl_LoadIdentity,
1153     ID3DXMatrixStackImpl_LoadMatrix,
1154     ID3DXMatrixStackImpl_MultMatrix,
1155     ID3DXMatrixStackImpl_MultMatrixLocal,
1156     ID3DXMatrixStackImpl_RotateAxis,
1157     ID3DXMatrixStackImpl_RotateAxisLocal,
1158     ID3DXMatrixStackImpl_RotateYawPitchRoll,
1159     ID3DXMatrixStackImpl_RotateYawPitchRollLocal,
1160     ID3DXMatrixStackImpl_Scale,
1161     ID3DXMatrixStackImpl_ScaleLocal,
1162     ID3DXMatrixStackImpl_Translate,
1163     ID3DXMatrixStackImpl_TranslateLocal,
1164     ID3DXMatrixStackImpl_GetTop
1165 };
1166 
1167 HRESULT WINAPI D3DXCreateMatrixStack(DWORD flags, ID3DXMatrixStack **stack)
1168 {
1169     struct ID3DXMatrixStackImpl *object;
1170 
1171     TRACE("flags %#x, stack %p.\n", flags, stack);
1172 
1173     if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object))))
1174     {
1175         *stack = NULL;
1176         return E_OUTOFMEMORY;
1177     }
1178     object->ID3DXMatrixStack_iface.lpVtbl = &ID3DXMatrixStack_Vtbl;
1179     object->ref = 1;
1180 
1181     if (!(object->stack = HeapAlloc(GetProcessHeap(), 0, INITIAL_STACK_SIZE * sizeof(*object->stack))))
1182     {
1183         HeapFree(GetProcessHeap(), 0, object);
1184         *stack = NULL;
1185         return E_OUTOFMEMORY;
1186     }
1187 
1188     object->current = 0;
1189     object->stack_size = INITIAL_STACK_SIZE;
1190     D3DXMatrixIdentity(&object->stack[0]);
1191 
1192     TRACE("Created matrix stack %p.\n", object);
1193 
1194     *stack = &object->ID3DXMatrixStack_iface;
1195     return D3D_OK;
1196 }
1197 
1198 /*_________________D3DXPLANE________________*/
1199 
1200 D3DXPLANE* WINAPI D3DXPlaneFromPointNormal(D3DXPLANE *pout, const D3DXVECTOR3 *pvpoint, const D3DXVECTOR3 *pvnormal)
1201 {
1202     TRACE("pout %p, pvpoint %p, pvnormal %p\n", pout, pvpoint, pvnormal);
1203 
1204     pout->a = pvnormal->x;
1205     pout->b = pvnormal->y;
1206     pout->c = pvnormal->z;
1207     pout->d = -D3DXVec3Dot(pvpoint, pvnormal);
1208     return pout;
1209 }
1210 
1211 D3DXPLANE* WINAPI D3DXPlaneFromPoints(D3DXPLANE *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3)
1212 {
1213     D3DXVECTOR3 edge1, edge2, normal, Nnormal;
1214 
1215     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p\n", pout, pv1, pv2, pv3);
1216 
1217     edge1.x = 0.0f; edge1.y = 0.0f; edge1.z = 0.0f;
1218     edge2.x = 0.0f; edge2.y = 0.0f; edge2.z = 0.0f;
1219     D3DXVec3Subtract(&edge1, pv2, pv1);
1220     D3DXVec3Subtract(&edge2, pv3, pv1);
1221     D3DXVec3Cross(&normal, &edge1, &edge2);
1222     D3DXVec3Normalize(&Nnormal, &normal);
1223     D3DXPlaneFromPointNormal(pout, pv1, &Nnormal);
1224     return pout;
1225 }
1226 
1227 D3DXVECTOR3* WINAPI D3DXPlaneIntersectLine(D3DXVECTOR3 *pout, const D3DXPLANE *pp, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2)
1228 {
1229     D3DXVECTOR3 direction, normal;
1230     FLOAT dot, temp;
1231 
1232     TRACE("pout %p, pp %p, pv1 %p, pv2 %p\n", pout, pp, pv1, pv2);
1233 
1234     normal.x = pp->a;
1235     normal.y = pp->b;
1236     normal.z = pp->c;
1237     direction.x = pv2->x - pv1->x;
1238     direction.y = pv2->y - pv1->y;
1239     direction.z = pv2->z - pv1->z;
1240     dot = D3DXVec3Dot(&normal, &direction);
1241     if ( !dot ) return NULL;
1242     temp = ( pp->d + D3DXVec3Dot(&normal, pv1) ) / dot;
1243     pout->x = pv1->x - temp * direction.x;
1244     pout->y = pv1->y - temp * direction.y;
1245     pout->z = pv1->z - temp * direction.z;
1246     return pout;
1247 }
1248 
1249 D3DXPLANE * WINAPI D3DXPlaneNormalize(D3DXPLANE *out, const D3DXPLANE *p)
1250 {
1251     FLOAT norm;
1252 
1253     TRACE("out %p, p %p\n", out, p);
1254 
1255     norm = sqrtf(p->a * p->a + p->b * p->b + p->c * p->c);
1256     if (norm)
1257     {
1258         out->a = p->a / norm;
1259         out->b = p->b / norm;
1260         out->c = p->c / norm;
1261         out->d = p->d / norm;
1262     }
1263     else
1264     {
1265         out->a = 0.0f;
1266         out->b = 0.0f;
1267         out->c = 0.0f;
1268         out->d = 0.0f;
1269     }
1270 
1271     return out;
1272 }
1273 
1274 D3DXPLANE* WINAPI D3DXPlaneTransform(D3DXPLANE *pout, const D3DXPLANE *pplane, const D3DXMATRIX *pm)
1275 {
1276     const D3DXPLANE plane = *pplane;
1277 
1278     TRACE("pout %p, pplane %p, pm %p\n", pout, pplane, pm);
1279 
1280     pout->a = pm->u.m[0][0] * plane.a + pm->u.m[1][0] * plane.b + pm->u.m[2][0] * plane.c + pm->u.m[3][0] * plane.d;
1281     pout->b = pm->u.m[0][1] * plane.a + pm->u.m[1][1] * plane.b + pm->u.m[2][1] * plane.c + pm->u.m[3][1] * plane.d;
1282     pout->c = pm->u.m[0][2] * plane.a + pm->u.m[1][2] * plane.b + pm->u.m[2][2] * plane.c + pm->u.m[3][2] * plane.d;
1283     pout->d = pm->u.m[0][3] * plane.a + pm->u.m[1][3] * plane.b + pm->u.m[2][3] * plane.c + pm->u.m[3][3] * plane.d;
1284     return pout;
1285 }
1286 
1287 D3DXPLANE* WINAPI D3DXPlaneTransformArray(D3DXPLANE* out, UINT outstride, const D3DXPLANE* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1288 {
1289     UINT i;
1290 
1291     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1292 
1293     for (i = 0; i < elements; ++i) {
1294         D3DXPlaneTransform(
1295             (D3DXPLANE*)((char*)out + outstride * i),
1296             (const D3DXPLANE*)((const char*)in + instride * i),
1297             matrix);
1298     }
1299     return out;
1300 }
1301 
1302 /*_________________D3DXQUATERNION________________*/
1303 
1304 D3DXQUATERNION* WINAPI D3DXQuaternionBaryCentric(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3, FLOAT f, FLOAT g)
1305 {
1306     D3DXQUATERNION temp1, temp2;
1307 
1308      TRACE("pout %p, pq1 %p, pq2 %p, pq3 %p, f %f, g %f\n", pout, pq1, pq2, pq3, f, g);
1309 
1310     D3DXQuaternionSlerp(pout, D3DXQuaternionSlerp(&temp1, pq1, pq2, f + g), D3DXQuaternionSlerp(&temp2, pq1, pq3, f+g), g / (f + g));
1311     return pout;
1312 }
1313 
1314 D3DXQUATERNION * WINAPI D3DXQuaternionExp(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1315 {
1316     FLOAT norm;
1317 
1318     TRACE("out %p, q %p\n", out, q);
1319 
1320     norm = sqrtf(q->x * q->x + q->y * q->y + q->z * q->z);
1321     if (norm)
1322     {
1323         out->x = sinf(norm) * q->x / norm;
1324         out->y = sinf(norm) * q->y / norm;
1325         out->z = sinf(norm) * q->z / norm;
1326         out->w = cosf(norm);
1327     }
1328     else
1329     {
1330         out->x = 0.0f;
1331         out->y = 0.0f;
1332         out->z = 0.0f;
1333         out->w = 1.0f;
1334     }
1335 
1336     return out;
1337 }
1338 
1339 D3DXQUATERNION* WINAPI D3DXQuaternionInverse(D3DXQUATERNION *pout, const D3DXQUATERNION *pq)
1340 {
1341     FLOAT norm;
1342 
1343     TRACE("pout %p, pq %p\n", pout, pq);
1344 
1345     norm = D3DXQuaternionLengthSq(pq);
1346 
1347     pout->x = -pq->x / norm;
1348     pout->y = -pq->y / norm;
1349     pout->z = -pq->z / norm;
1350     pout->w = pq->w / norm;
1351     return pout;
1352 }
1353 
1354 D3DXQUATERNION * WINAPI D3DXQuaternionLn(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1355 {
1356     FLOAT t;
1357 
1358     TRACE("out %p, q %p\n", out, q);
1359 
1360     if ((q->w >= 1.0f) || (q->w == -1.0f))
1361         t = 1.0f;
1362     else
1363         t = acosf(q->w) / sqrtf(1.0f - q->w * q->w);
1364 
1365     out->x = t * q->x;
1366     out->y = t * q->y;
1367     out->z = t * q->z;
1368     out->w = 0.0f;
1369 
1370     return out;
1371 }
1372 
1373 D3DXQUATERNION* WINAPI D3DXQuaternionMultiply(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2)
1374 {
1375     D3DXQUATERNION out;
1376 
1377     TRACE("pout %p, pq1 %p, pq2 %p\n", pout, pq1, pq2);
1378 
1379     out.x = pq2->w * pq1->x + pq2->x * pq1->w + pq2->y * pq1->z - pq2->z * pq1->y;
1380     out.y = pq2->w * pq1->y - pq2->x * pq1->z + pq2->y * pq1->w + pq2->z * pq1->x;
1381     out.z = pq2->w * pq1->z + pq2->x * pq1->y - pq2->y * pq1->x + pq2->z * pq1->w;
1382     out.w = pq2->w * pq1->w - pq2->x * pq1->x - pq2->y * pq1->y - pq2->z * pq1->z;
1383     *pout = out;
1384     return pout;
1385 }
1386 
1387 D3DXQUATERNION * WINAPI D3DXQuaternionNormalize(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1388 {
1389     FLOAT norm;
1390 
1391     TRACE("out %p, q %p\n", out, q);
1392 
1393     norm = D3DXQuaternionLength(q);
1394 
1395     out->x = q->x / norm;
1396     out->y = q->y / norm;
1397     out->z = q->z / norm;
1398     out->w = q->w / norm;
1399 
1400     return out;
1401 }
1402 
1403 D3DXQUATERNION * WINAPI D3DXQuaternionRotationAxis(D3DXQUATERNION *out, const D3DXVECTOR3 *v, FLOAT angle)
1404 {
1405     D3DXVECTOR3 temp;
1406 
1407     TRACE("out %p, v %p, angle %f\n", out, v, angle);
1408 
1409     D3DXVec3Normalize(&temp, v);
1410 
1411     out->x = sinf(angle / 2.0f) * temp.x;
1412     out->y = sinf(angle / 2.0f) * temp.y;
1413     out->z = sinf(angle / 2.0f) * temp.z;
1414     out->w = cosf(angle / 2.0f);
1415 
1416     return out;
1417 }
1418 
1419 D3DXQUATERNION * WINAPI D3DXQuaternionRotationMatrix(D3DXQUATERNION *out, const D3DXMATRIX *m)
1420 {
1421     FLOAT s, trace;
1422 
1423     TRACE("out %p, m %p\n", out, m);
1424 
1425     trace = m->u.m[0][0] + m->u.m[1][1] + m->u.m[2][2] + 1.0f;
1426     if (trace > 1.0f)
1427     {
1428         s = 2.0f * sqrtf(trace);
1429         out->x = (m->u.m[1][2] - m->u.m[2][1]) / s;
1430         out->y = (m->u.m[2][0] - m->u.m[0][2]) / s;
1431         out->z = (m->u.m[0][1] - m->u.m[1][0]) / s;
1432         out->w = 0.25f * s;
1433     }
1434     else
1435     {
1436         int i, maxi = 0;
1437 
1438         for (i = 1; i < 3; i++)
1439         {
1440             if (m->u.m[i][i] > m->u.m[maxi][maxi])
1441                 maxi = i;
1442         }
1443 
1444         switch (maxi)
1445         {
1446             case 0:
1447                 s = 2.0f * sqrtf(1.0f + m->u.m[0][0] - m->u.m[1][1] - m->u.m[2][2]);
1448                 out->x = 0.25f * s;
1449                 out->y = (m->u.m[0][1] + m->u.m[1][0]) / s;
1450                 out->z = (m->u.m[0][2] + m->u.m[2][0]) / s;
1451                 out->w = (m->u.m[1][2] - m->u.m[2][1]) / s;
1452                 break;
1453 
1454             case 1:
1455                 s = 2.0f * sqrtf(1.0f + m->u.m[1][1] - m->u.m[0][0] - m->u.m[2][2]);
1456                 out->x = (m->u.m[0][1] + m->u.m[1][0]) / s;
1457                 out->y = 0.25f * s;
1458                 out->z = (m->u.m[1][2] + m->u.m[2][1]) / s;
1459                 out->w = (m->u.m[2][0] - m->u.m[0][2]) / s;
1460                 break;
1461 
1462             case 2:
1463                 s = 2.0f * sqrtf(1.0f + m->u.m[2][2] - m->u.m[0][0] - m->u.m[1][1]);
1464                 out->x = (m->u.m[0][2] + m->u.m[2][0]) / s;
1465                 out->y = (m->u.m[1][2] + m->u.m[2][1]) / s;
1466                 out->z = 0.25f * s;
1467                 out->w = (m->u.m[0][1] - m->u.m[1][0]) / s;
1468                 break;
1469         }
1470     }
1471 
1472     return out;
1473 }
1474 
1475 D3DXQUATERNION * WINAPI D3DXQuaternionRotationYawPitchRoll(D3DXQUATERNION *out, FLOAT yaw, FLOAT pitch, FLOAT roll)
1476 {
1477     FLOAT syaw, cyaw, spitch, cpitch, sroll, croll;
1478 
1479     TRACE("out %p, yaw %f, pitch %f, roll %f\n", out, yaw, pitch, roll);
1480 
1481     syaw = sinf(yaw / 2.0f);
1482     cyaw = cosf(yaw / 2.0f);
1483     spitch = sinf(pitch / 2.0f);
1484     cpitch = cosf(pitch / 2.0f);
1485     sroll = sinf(roll / 2.0f);
1486     croll = cosf(roll / 2.0f);
1487 
1488     out->x = syaw * cpitch * sroll + cyaw * spitch * croll;
1489     out->y = syaw * cpitch * croll - cyaw * spitch * sroll;
1490     out->z = cyaw * cpitch * sroll - syaw * spitch * croll;
1491     out->w = cyaw * cpitch * croll + syaw * spitch * sroll;
1492 
1493     return out;
1494 }
1495 
1496 D3DXQUATERNION * WINAPI D3DXQuaternionSlerp(D3DXQUATERNION *out, const D3DXQUATERNION *q1,
1497         const D3DXQUATERNION *q2, FLOAT t)
1498 {
1499     FLOAT dot, temp;
1500 
1501     TRACE("out %p, q1 %p, q2 %p, t %f\n", out, q1, q2, t);
1502 
1503     temp = 1.0f - t;
1504     dot = D3DXQuaternionDot(q1, q2);
1505     if (dot < 0.0f)
1506     {
1507         t = -t;
1508         dot = -dot;
1509     }
1510 
1511     if (1.0f - dot > 0.001f)
1512     {
1513         FLOAT theta = acosf(dot);
1514 
1515         temp = sinf(theta * temp) / sinf(theta);
1516         t = sinf(theta * t) / sinf(theta);
1517     }
1518 
1519     out->x = temp * q1->x + t * q2->x;
1520     out->y = temp * q1->y + t * q2->y;
1521     out->z = temp * q1->z + t * q2->z;
1522     out->w = temp * q1->w + t * q2->w;
1523 
1524     return out;
1525 }
1526 
1527 D3DXQUATERNION* WINAPI D3DXQuaternionSquad(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3, const D3DXQUATERNION *pq4, FLOAT t)
1528 {
1529     D3DXQUATERNION temp1, temp2;
1530 
1531     TRACE("pout %p, pq1 %p, pq2 %p, pq3 %p, pq4 %p, t %f\n", pout, pq1, pq2, pq3, pq4, t);
1532 
1533     D3DXQuaternionSlerp(pout, D3DXQuaternionSlerp(&temp1, pq1, pq4, t), D3DXQuaternionSlerp(&temp2, pq2, pq3, t), 2.0f * t * (1.0f - t));
1534     return pout;
1535 }
1536 
1537 static D3DXQUATERNION add_diff(const D3DXQUATERNION *q1, const D3DXQUATERNION *q2, const FLOAT add)
1538 {
1539     D3DXQUATERNION temp;
1540 
1541     temp.x = q1->x + add * q2->x;
1542     temp.y = q1->y + add * q2->y;
1543     temp.z = q1->z + add * q2->z;
1544     temp.w = q1->w + add * q2->w;
1545 
1546     return temp;
1547 }
1548 
1549 void WINAPI D3DXQuaternionSquadSetup(D3DXQUATERNION *paout, D3DXQUATERNION *pbout, D3DXQUATERNION *pcout, const D3DXQUATERNION *pq0, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3)
1550 {
1551     D3DXQUATERNION q, temp1, temp2, temp3, zero;
1552     D3DXQUATERNION aout, cout;
1553 
1554     TRACE("paout %p, pbout %p, pcout %p, pq0 %p, pq1 %p, pq2 %p, pq3 %p\n", paout, pbout, pcout, pq0, pq1, pq2, pq3);
1555 
1556     zero.x = 0.0f;
1557     zero.y = 0.0f;
1558     zero.z = 0.0f;
1559     zero.w = 0.0f;
1560 
1561     if (D3DXQuaternionDot(pq0, pq1) < 0.0f)
1562         temp2 = add_diff(&zero, pq0, -1.0f);
1563     else
1564         temp2 = *pq0;
1565 
1566     if (D3DXQuaternionDot(pq1, pq2) < 0.0f)
1567         cout = add_diff(&zero, pq2, -1.0f);
1568     else
1569         cout = *pq2;
1570 
1571     if (D3DXQuaternionDot(&cout, pq3) < 0.0f)
1572         temp3 = add_diff(&zero, pq3, -1.0f);
1573     else
1574         temp3 = *pq3;
1575 
1576     D3DXQuaternionInverse(&temp1, pq1);
1577     D3DXQuaternionMultiply(&temp2, &temp1, &temp2);
1578     D3DXQuaternionLn(&temp2, &temp2);
1579     D3DXQuaternionMultiply(&q, &temp1, &cout);
1580     D3DXQuaternionLn(&q, &q);
1581     temp1 = add_diff(&temp2, &q, 1.0f);
1582     temp1.x *= -0.25f;
1583     temp1.y *= -0.25f;
1584     temp1.z *= -0.25f;
1585     temp1.w *= -0.25f;
1586     D3DXQuaternionExp(&temp1, &temp1);
1587     D3DXQuaternionMultiply(&aout, pq1, &temp1);
1588 
1589     D3DXQuaternionInverse(&temp1, &cout);
1590     D3DXQuaternionMultiply(&temp2, &temp1, pq1);
1591     D3DXQuaternionLn(&temp2, &temp2);
1592     D3DXQuaternionMultiply(&q, &temp1, &temp3);
1593     D3DXQuaternionLn(&q, &q);
1594     temp1 = add_diff(&temp2, &q, 1.0f);
1595     temp1.x *= -0.25f;
1596     temp1.y *= -0.25f;
1597     temp1.z *= -0.25f;
1598     temp1.w *= -0.25f;
1599     D3DXQuaternionExp(&temp1, &temp1);
1600     D3DXQuaternionMultiply(pbout, &cout, &temp1);
1601     *paout = aout;
1602     *pcout = cout;
1603 }
1604 
1605 void WINAPI D3DXQuaternionToAxisAngle(const D3DXQUATERNION *pq, D3DXVECTOR3 *paxis, FLOAT *pangle)
1606 {
1607     TRACE("pq %p, paxis %p, pangle %p\n", pq, paxis, pangle);
1608 
1609     if (paxis)
1610     {
1611         paxis->x = pq->x;
1612         paxis->y = pq->y;
1613         paxis->z = pq->z;
1614     }
1615     if (pangle)
1616         *pangle = 2.0f * acosf(pq->w);
1617 }
1618 
1619 /*_________________D3DXVec2_____________________*/
1620 
1621 D3DXVECTOR2* WINAPI D3DXVec2BaryCentric(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pv3, FLOAT f, FLOAT g)
1622 {
1623     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
1624 
1625     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
1626     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
1627     return pout;
1628 }
1629 
1630 D3DXVECTOR2* WINAPI D3DXVec2CatmullRom(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv0, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pv3, FLOAT s)
1631 {
1632     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
1633 
1634     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
1635     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
1636     return pout;
1637 }
1638 
1639 D3DXVECTOR2* WINAPI D3DXVec2Hermite(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pt1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pt2, FLOAT s)
1640 {
1641     FLOAT h1, h2, h3, h4;
1642 
1643     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
1644 
1645     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
1646     h2 = s * s * s - 2.0f * s * s + s;
1647     h3 = -2.0f * s * s * s + 3.0f * s * s;
1648     h4 = s * s * s - s * s;
1649 
1650     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
1651     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
1652     return pout;
1653 }
1654 
1655 D3DXVECTOR2* WINAPI D3DXVec2Normalize(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv)
1656 {
1657     FLOAT norm;
1658 
1659     TRACE("pout %p, pv %p\n", pout, pv);
1660 
1661     norm = D3DXVec2Length(pv);
1662     if ( !norm )
1663     {
1664         pout->x = 0.0f;
1665         pout->y = 0.0f;
1666     }
1667     else
1668     {
1669         pout->x = pv->x / norm;
1670         pout->y = pv->y / norm;
1671     }
1672 
1673     return pout;
1674 }
1675 
1676 D3DXVECTOR4* WINAPI D3DXVec2Transform(D3DXVECTOR4 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1677 {
1678     D3DXVECTOR4 out;
1679 
1680     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1681 
1682     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y  + pm->u.m[3][0];
1683     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y  + pm->u.m[3][1];
1684     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y  + pm->u.m[3][2];
1685     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y  + pm->u.m[3][3];
1686     *pout = out;
1687     return pout;
1688 }
1689 
1690 D3DXVECTOR4* WINAPI D3DXVec2TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR2* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1691 {
1692     UINT i;
1693 
1694     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1695 
1696     for (i = 0; i < elements; ++i) {
1697         D3DXVec2Transform(
1698             (D3DXVECTOR4*)((char*)out + outstride * i),
1699             (const D3DXVECTOR2*)((const char*)in + instride * i),
1700             matrix);
1701     }
1702     return out;
1703 }
1704 
1705 D3DXVECTOR2* WINAPI D3DXVec2TransformCoord(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1706 {
1707     D3DXVECTOR2 v;
1708     FLOAT norm;
1709 
1710     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1711 
1712     v = *pv;
1713     norm = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[3][3];
1714 
1715     pout->x = (pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y + pm->u.m[3][0]) / norm;
1716     pout->y = (pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y + pm->u.m[3][1]) / norm;
1717 
1718     return pout;
1719 }
1720 
1721 D3DXVECTOR2* WINAPI D3DXVec2TransformCoordArray(D3DXVECTOR2* out, UINT outstride, const D3DXVECTOR2* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1722 {
1723     UINT i;
1724 
1725     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1726 
1727     for (i = 0; i < elements; ++i) {
1728         D3DXVec2TransformCoord(
1729             (D3DXVECTOR2*)((char*)out + outstride * i),
1730             (const D3DXVECTOR2*)((const char*)in + instride * i),
1731             matrix);
1732     }
1733     return out;
1734 }
1735 
1736 D3DXVECTOR2* WINAPI D3DXVec2TransformNormal(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1737 {
1738     const D3DXVECTOR2 v = *pv;
1739 
1740     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1741 
1742     pout->x = pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y;
1743     pout->y = pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y;
1744     return pout;
1745 }
1746 
1747 D3DXVECTOR2* WINAPI D3DXVec2TransformNormalArray(D3DXVECTOR2* out, UINT outstride, const D3DXVECTOR2 *in, UINT instride, const D3DXMATRIX *matrix, UINT elements)
1748 {
1749     UINT i;
1750 
1751     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1752 
1753     for (i = 0; i < elements; ++i) {
1754         D3DXVec2TransformNormal(
1755             (D3DXVECTOR2*)((char*)out + outstride * i),
1756             (const D3DXVECTOR2*)((const char*)in + instride * i),
1757             matrix);
1758     }
1759     return out;
1760 }
1761 
1762 /*_________________D3DXVec3_____________________*/
1763 
1764 D3DXVECTOR3* WINAPI D3DXVec3BaryCentric(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3, FLOAT f, FLOAT g)
1765 {
1766     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
1767 
1768     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
1769     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
1770     pout->z = (1.0f-f-g) * (pv1->z) + f * (pv2->z) + g * (pv3->z);
1771     return pout;
1772 }
1773 
1774 D3DXVECTOR3* WINAPI D3DXVec3CatmullRom( D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv0, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3, FLOAT s)
1775 {
1776     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
1777 
1778     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
1779     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
1780     pout->z = 0.5f * (2.0f * pv1->z + (pv2->z - pv0->z) *s + (2.0f *pv0->z - 5.0f * pv1->z + 4.0f * pv2->z - pv3->z) * s * s + (pv3->z -3.0f * pv2->z + 3.0f * pv1->z - pv0->z) * s * s * s);
1781     return pout;
1782 }
1783 
1784 D3DXVECTOR3* WINAPI D3DXVec3Hermite(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pt1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pt2, FLOAT s)
1785 {
1786     FLOAT h1, h2, h3, h4;
1787 
1788     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
1789 
1790     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
1791     h2 = s * s * s - 2.0f * s * s + s;
1792     h3 = -2.0f * s * s * s + 3.0f * s * s;
1793     h4 = s * s * s - s * s;
1794 
1795     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
1796     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
1797     pout->z = h1 * (pv1->z) + h2 * (pt1->z) + h3 * (pv2->z) + h4 * (pt2->z);
1798     return pout;
1799 }
1800 
1801 D3DXVECTOR3* WINAPI D3DXVec3Normalize(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv)
1802 {
1803     FLOAT norm;
1804 
1805     TRACE("pout %p, pv %p\n", pout, pv);
1806 
1807     norm = D3DXVec3Length(pv);
1808     if ( !norm )
1809     {
1810         pout->x = 0.0f;
1811         pout->y = 0.0f;
1812         pout->z = 0.0f;
1813     }
1814     else
1815     {
1816         pout->x = pv->x / norm;
1817         pout->y = pv->y / norm;
1818         pout->z = pv->z / norm;
1819     }
1820 
1821     return pout;
1822 }
1823 
1824 D3DXVECTOR3* WINAPI D3DXVec3Project(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DVIEWPORT9 *pviewport, const D3DXMATRIX *pprojection, const D3DXMATRIX *pview, const D3DXMATRIX *pworld)
1825 {
1826     D3DXMATRIX m;
1827 
1828     TRACE("pout %p, pv %p, pviewport %p, pprojection %p, pview %p, pworld %p\n", pout, pv, pviewport, pprojection, pview, pworld);
1829 
1830     D3DXMatrixIdentity(&m);
1831     if (pworld) D3DXMatrixMultiply(&m, &m, pworld);
1832     if (pview) D3DXMatrixMultiply(&m, &m, pview);
1833     if (pprojection) D3DXMatrixMultiply(&m, &m, pprojection);
1834 
1835     D3DXVec3TransformCoord(pout, pv, &m);
1836 
1837     if (pviewport)
1838     {
1839         pout->x = pviewport->X +  ( 1.0f + pout->x ) * pviewport->Width / 2.0f;
1840         pout->y = pviewport->Y +  ( 1.0f - pout->y ) * pviewport->Height / 2.0f;
1841         pout->z = pviewport->MinZ + pout->z * ( pviewport->MaxZ - pviewport->MinZ );
1842     }
1843     return pout;
1844 }
1845 
1846 D3DXVECTOR3* WINAPI D3DXVec3ProjectArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DVIEWPORT9* viewport, const D3DXMATRIX* projection, const D3DXMATRIX* view, const D3DXMATRIX* world, UINT elements)
1847 {
1848     UINT i;
1849 
1850     TRACE("out %p, outstride %u, in %p, instride %u, viewport %p, projection %p, view %p, world %p, elements %u\n",
1851         out, outstride, in, instride, viewport, projection, view, world, elements);
1852 
1853     for (i = 0; i < elements; ++i) {
1854         D3DXVec3Project(
1855             (D3DXVECTOR3*)((char*)out + outstride * i),
1856             (const D3DXVECTOR3*)((const char*)in + instride * i),
1857             viewport, projection, view, world);
1858     }
1859     return out;
1860 }
1861 
1862 D3DXVECTOR4* WINAPI D3DXVec3Transform(D3DXVECTOR4 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1863 {
1864     D3DXVECTOR4 out;
1865 
1866     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1867 
1868     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0];
1869     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1];
1870     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2];
1871     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] * pv->z + pm->u.m[3][3];
1872     *pout = out;
1873     return pout;
1874 }
1875 
1876 D3DXVECTOR4* WINAPI D3DXVec3TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1877 {
1878     UINT i;
1879 
1880     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1881 
1882     for (i = 0; i < elements; ++i) {
1883         D3DXVec3Transform(
1884             (D3DXVECTOR4*)((char*)out + outstride * i),
1885             (const D3DXVECTOR3*)((const char*)in + instride * i),
1886             matrix);
1887     }
1888     return out;
1889 }
1890 
1891 D3DXVECTOR3* WINAPI D3DXVec3TransformCoord(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1892 {
1893     D3DXVECTOR3 out;
1894     FLOAT norm;
1895 
1896     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1897 
1898     norm = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] *pv->z + pm->u.m[3][3];
1899 
1900     out.x = (pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0]) / norm;
1901     out.y = (pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1]) / norm;
1902     out.z = (pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2]) / norm;
1903 
1904     *pout = out;
1905 
1906     return pout;
1907 }
1908 
1909 D3DXVECTOR3* WINAPI D3DXVec3TransformCoordArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1910 {
1911     UINT i;
1912 
1913     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1914 
1915     for (i = 0; i < elements; ++i) {
1916         D3DXVec3TransformCoord(
1917             (D3DXVECTOR3*)((char*)out + outstride * i),
1918             (const D3DXVECTOR3*)((const char*)in + instride * i),
1919             matrix);
1920     }
1921     return out;
1922 }
1923 
1924 D3DXVECTOR3* WINAPI D3DXVec3TransformNormal(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1925 {
1926     const D3DXVECTOR3 v = *pv;
1927 
1928     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1929 
1930     pout->x = pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y + pm->u.m[2][0] * v.z;
1931     pout->y = pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y + pm->u.m[2][1] * v.z;
1932     pout->z = pm->u.m[0][2] * v.x + pm->u.m[1][2] * v.y + pm->u.m[2][2] * v.z;
1933     return pout;
1934 
1935 }
1936 
1937 D3DXVECTOR3* WINAPI D3DXVec3TransformNormalArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1938 {
1939     UINT i;
1940 
1941     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1942 
1943     for (i = 0; i < elements; ++i) {
1944         D3DXVec3TransformNormal(
1945             (D3DXVECTOR3*)((char*)out + outstride * i),
1946             (const D3DXVECTOR3*)((const char*)in + instride * i),
1947             matrix);
1948     }
1949     return out;
1950 }
1951 
1952 D3DXVECTOR3 * WINAPI D3DXVec3Unproject(D3DXVECTOR3 *out, const D3DXVECTOR3 *v,
1953         const D3DVIEWPORT9 *viewport, const D3DXMATRIX *projection, const D3DXMATRIX *view,
1954         const D3DXMATRIX *world)
1955 {
1956     D3DXMATRIX m;
1957 
1958     TRACE("out %p, v %p, viewport %p, projection %p, view %p, world %p.\n",
1959             out, v, viewport, projection, view, world);
1960 
1961     D3DXMatrixIdentity(&m);
1962     if (world)
1963         D3DXMatrixMultiply(&m, &m, world);
1964     if (view)
1965         D3DXMatrixMultiply(&m, &m, view);
1966     if (projection)
1967         D3DXMatrixMultiply(&m, &m, projection);
1968     D3DXMatrixInverse(&m, NULL, &m);
1969 
1970     *out = *v;
1971     if (viewport)
1972     {
1973         out->x = 2.0f * (out->x - viewport->X) / viewport->Width - 1.0f;
1974         out->y = 1.0f - 2.0f * (out->y - viewport->Y) / viewport->Height;
1975         out->z = (out->z - viewport->MinZ) / (viewport->MaxZ - viewport->MinZ);
1976     }
1977     D3DXVec3TransformCoord(out, out, &m);
1978     return out;
1979 }
1980 
1981 D3DXVECTOR3* WINAPI D3DXVec3UnprojectArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DVIEWPORT9* viewport, const D3DXMATRIX* projection, const D3DXMATRIX* view, const D3DXMATRIX* world, UINT elements)
1982 {
1983     UINT i;
1984 
1985     TRACE("out %p, outstride %u, in %p, instride %u, viewport %p, projection %p, view %p, world %p, elements %u\n",
1986         out, outstride, in, instride, viewport, projection, view, world, elements);
1987 
1988     for (i = 0; i < elements; ++i) {
1989         D3DXVec3Unproject(
1990             (D3DXVECTOR3*)((char*)out + outstride * i),
1991             (const D3DXVECTOR3*)((const char*)in + instride * i),
1992             viewport, projection, view, world);
1993     }
1994     return out;
1995 }
1996 
1997 /*_________________D3DXVec4_____________________*/
1998 
1999 D3DXVECTOR4* WINAPI D3DXVec4BaryCentric(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3, FLOAT f, FLOAT g)
2000 {
2001     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
2002 
2003     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
2004     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
2005     pout->z = (1.0f-f-g) * (pv1->z) + f * (pv2->z) + g * (pv3->z);
2006     pout->w = (1.0f-f-g) * (pv1->w) + f * (pv2->w) + g * (pv3->w);
2007     return pout;
2008 }
2009 
2010 D3DXVECTOR4* WINAPI D3DXVec4CatmullRom(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv0, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3, FLOAT s)
2011 {
2012     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
2013 
2014     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
2015     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
2016     pout->z = 0.5f * (2.0f * pv1->z + (pv2->z - pv0->z) *s + (2.0f *pv0->z - 5.0f * pv1->z + 4.0f * pv2->z - pv3->z) * s * s + (pv3->z -3.0f * pv2->z + 3.0f * pv1->z - pv0->z) * s * s * s);
2017     pout->w = 0.5f * (2.0f * pv1->w + (pv2->w - pv0->w) *s + (2.0f *pv0->w - 5.0f * pv1->w + 4.0f * pv2->w - pv3->w) * s * s + (pv3->w -3.0f * pv2->w + 3.0f * pv1->w - pv0->w) * s * s * s);
2018     return pout;
2019 }
2020 
2021 D3DXVECTOR4* WINAPI D3DXVec4Cross(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3)
2022 {
2023     D3DXVECTOR4 out;
2024 
2025     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p\n", pout, pv1, pv2, pv3);
2026 
2027     out.x = pv1->y * (pv2->z * pv3->w - pv3->z * pv2->w) - pv1->z * (pv2->y * pv3->w - pv3->y * pv2->w) + pv1->w * (pv2->y * pv3->z - pv2->z *pv3->y);
2028     out.y = -(pv1->x * (pv2->z * pv3->w - pv3->z * pv2->w) - pv1->z * (pv2->x * pv3->w - pv3->x * pv2->w) + pv1->w * (pv2->x * pv3->z - pv3->x * pv2->z));
2029     out.z = pv1->x * (pv2->y * pv3->w - pv3->y * pv2->w) - pv1->y * (pv2->x *pv3->w - pv3->x * pv2->w) + pv1->w * (pv2->x * pv3->y - pv3->x * pv2->y);
2030     out.w = -(pv1->x * (pv2->y * pv3->z - pv3->y * pv2->z) - pv1->y * (pv2->x * pv3->z - pv3->x *pv2->z) + pv1->z * (pv2->x * pv3->y - pv3->x * pv2->y));
2031     *pout = out;
2032     return pout;
2033 }
2034 
2035 D3DXVECTOR4* WINAPI D3DXVec4Hermite(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pt1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pt2, FLOAT s)
2036 {
2037     FLOAT h1, h2, h3, h4;
2038 
2039     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
2040 
2041     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
2042     h2 = s * s * s - 2.0f * s * s + s;
2043     h3 = -2.0f * s * s * s + 3.0f * s * s;
2044     h4 = s * s * s - s * s;
2045 
2046     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
2047     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
2048     pout->z = h1 * (pv1->z) + h2 * (pt1->z) + h3 * (pv2->z) + h4 * (pt2->z);
2049     pout->w = h1 * (pv1->w) + h2 * (pt1->w) + h3 * (pv2->w) + h4 * (pt2->w);
2050     return pout;
2051 }
2052 
2053 D3DXVECTOR4* WINAPI D3DXVec4Normalize(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv)
2054 {
2055     FLOAT norm;
2056 
2057     TRACE("pout %p, pv %p\n", pout, pv);
2058 
2059     norm = D3DXVec4Length(pv);
2060 
2061     pout->x = pv->x / norm;
2062     pout->y = pv->y / norm;
2063     pout->z = pv->z / norm;
2064     pout->w = pv->w / norm;
2065 
2066     return pout;
2067 }
2068 
2069 D3DXVECTOR4* WINAPI D3DXVec4Transform(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv, const D3DXMATRIX *pm)
2070 {
2071     D3DXVECTOR4 out;
2072 
2073     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
2074 
2075     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0] * pv->w;
2076     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1] * pv->w;
2077     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2] * pv->w;
2078     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] * pv->z + pm->u.m[3][3] * pv->w;
2079     *pout = out;
2080     return pout;
2081 }
2082 
2083 D3DXVECTOR4* WINAPI D3DXVec4TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR4* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
2084 {
2085     UINT i;
2086 
2087     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
2088 
2089     for (i = 0; i < elements; ++i) {
2090         D3DXVec4Transform(
2091             (D3DXVECTOR4*)((char*)out + outstride * i),
2092             (const D3DXVECTOR4*)((const char*)in + instride * i),
2093             matrix);
2094     }
2095     return out;
2096 }
2097 
2098 unsigned short float_32_to_16(const float in)
2099 {
2100     int exp = 0, origexp;
2101     float tmp = fabsf(in);
2102     int sign = (copysignf(1, in) < 0);
2103     unsigned int mantissa;
2104     unsigned short ret;
2105 
2106     /* Deal with special numbers */
2107     if (isinf(in)) return (sign ? 0xffff : 0x7fff);
2108     if (isnan(in)) return (sign ? 0xffff : 0x7fff);
2109     if (in == 0.0f) return (sign ? 0x8000 : 0x0000);
2110 
2111     if (tmp < (float)(1u << 10))
2112     {
2113         do
2114         {
2115             tmp *= 2.0f;
2116             exp--;
2117         } while (tmp < (float)(1u << 10));
2118     }
2119     else if (tmp >= (float)(1u << 11))
2120     {
2121         do
2122         {
2123             tmp /= 2.0f;
2124             exp++;
2125         } while (tmp >= (float)(1u << 11));
2126     }
2127 
2128     exp += 10;  /* Normalize the mantissa */
2129     exp += 15;  /* Exponent is encoded with excess 15 */
2130 
2131     origexp = exp;
2132 
2133     mantissa = (unsigned int) tmp;
2134     if ((tmp - mantissa == 0.5f && mantissa % 2 == 1) || /* round half to even */
2135         (tmp - mantissa > 0.5f))
2136     {
2137         mantissa++; /* round to nearest, away from zero */
2138     }
2139     if (mantissa == 2048)
2140     {
2141         mantissa = 1024;
2142         exp++;
2143     }
2144 
2145     if (exp > 31)
2146     {
2147         /* too big */
2148         ret = 0x7fff; /* INF */
2149     }
2150     else if (exp <= 0)
2151     {
2152         unsigned int rounding = 0;
2153 
2154         /* Denormalized half float */
2155 
2156         /* return 0x0000 (=0.0) for numbers too small to represent in half floats */
2157         if (exp < -11)
2158             return (sign ? 0x8000 : 0x0000);
2159 
2160         exp = origexp;
2161 
2162         /* the 13 extra bits from single precision are used for rounding */
2163         mantissa = (unsigned int)(tmp * (1u << 13));
2164         mantissa >>= 1 - exp; /* denormalize */
2165 
2166         mantissa -= ~(mantissa >> 13) & 1; /* round half to even */
2167         /* remove 13 least significant bits to get half float precision */
2168         mantissa >>= 12;
2169         rounding = mantissa & 1;
2170         mantissa >>= 1;
2171 
2172         ret = mantissa + rounding;
2173     }
2174     else
2175     {
2176         ret = (exp << 10) | (mantissa & 0x3ff);
2177     }
2178 
2179     ret |= ((sign ? 1 : 0) << 15); /* Add the sign */
2180     return ret;
2181 }
2182 
2183 D3DXFLOAT16 *WINAPI D3DXFloat32To16Array(D3DXFLOAT16 *pout, const FLOAT *pin, UINT n)
2184 {
2185     unsigned int i;
2186 
2187     TRACE("pout %p, pin %p, n %u\n", pout, pin, n);
2188 
2189     for (i = 0; i < n; ++i)
2190     {
2191         pout[i].value = float_32_to_16(pin[i]);
2192     }
2193 
2194     return pout;
2195 }
2196 
2197 /* Native d3dx9's D3DXFloat16to32Array lacks support for NaN and Inf. Specifically, e = 16 is treated as a
2198  * regular number - e.g., 0x7fff is converted to 131008.0 and 0xffff to -131008.0. */
2199 float float_16_to_32(const unsigned short in)
2200 {
2201     const unsigned short s = (in & 0x8000);
2202     const unsigned short e = (in & 0x7C00) >> 10;
2203     const unsigned short m = in & 0x3FF;
2204     const float sgn = (s ? -1.0f : 1.0f);
2205 
2206     if (e == 0)
2207     {
2208         if (m == 0) return sgn * 0.0f; /* +0.0 or -0.0 */
2209         else return sgn * powf(2, -14.0f) * (m / 1024.0f);
2210     }
2211     else
2212     {
2213         return sgn * powf(2, e - 15.0f) * (1.0f + (m / 1024.0f));
2214     }
2215 }
2216 
2217 FLOAT *WINAPI D3DXFloat16To32Array(FLOAT *pout, const D3DXFLOAT16 *pin, UINT n)
2218 {
2219     unsigned int i;
2220 
2221     TRACE("pout %p, pin %p, n %u\n", pout, pin, n);
2222 
2223     for (i = 0; i < n; ++i)
2224     {
2225         pout[i] = float_16_to_32(pin[i].value);
2226     }
2227 
2228     return pout;
2229 }
2230 
2231 /*_________________D3DXSH________________*/
2232 
2233 FLOAT* WINAPI D3DXSHAdd(FLOAT *out, UINT order, const FLOAT *a, const FLOAT *b)
2234 {
2235     UINT i;
2236 
2237     TRACE("out %p, order %u, a %p, b %p\n", out, order, a, b);
2238 
2239     for (i = 0; i < order * order; i++)
2240         out[i] = a[i] + b[i];
2241 
2242     return out;
2243 }
2244 
2245 FLOAT WINAPI D3DXSHDot(UINT order, const FLOAT *a, const FLOAT *b)
2246 {
2247     FLOAT s;
2248     UINT i;
2249 
2250     TRACE("order %u, a %p, b %p\n", order, a, b);
2251 
2252     s = a[0] * b[0];
2253     for (i = 1; i < order * order; i++)
2254         s += a[i] * b[i];
2255 
2256     return s;
2257 }
2258 
2259 static void weightedcapintegrale(FLOAT *out, UINT order, FLOAT angle)
2260 {
2261     FLOAT coeff[3];
2262 
2263     coeff[0] = cosf(angle);
2264 
2265     out[0] = 2.0f * D3DX_PI * (1.0f - coeff[0]);
2266     out[1] = D3DX_PI * sinf(angle) * sinf(angle);
2267     if (order <= 2)
2268         return;
2269 
2270     out[2] = coeff[0] * out[1];
2271     if (order == 3)
2272         return;
2273 
2274     coeff[1] = coeff[0] * coeff[0];
2275     coeff[2] = coeff[1] * coeff[1];
2276 
2277     out[3] = D3DX_PI * (-1.25f * coeff[2] + 1.5f * coeff[1] - 0.25f);
2278     if (order == 4)
2279         return;
2280 
2281     out[4] = -0.25f * D3DX_PI * coeff[0] * (7.0f * coeff[2] - 10.0f * coeff[1] + 3.0f);
2282     if (order == 5)
2283         return;
2284 
2285     out[5] = D3DX_PI * (-2.625f * coeff[2] * coeff[1] + 4.375f * coeff[2] - 1.875f * coeff[1] + 0.125f);
2286 }
2287 
2288 HRESULT WINAPI D3DXSHEvalConeLight(UINT order, const D3DXVECTOR3 *dir, FLOAT radius,
2289     FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *rout, FLOAT *gout, FLOAT *bout)
2290 {
2291     FLOAT cap[6], clamped_angle, norm, scale, temp;
2292     UINT i, index, j;
2293 
2294     TRACE("order %u, dir %p, radius %f, red %f, green %f, blue %f, rout %p, gout %p, bout %p\n",
2295         order, dir, radius, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2296 
2297     if (radius <= 0.0f)
2298         return D3DXSHEvalDirectionalLight(order, dir, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2299 
2300     clamped_angle = (radius > D3DX_PI / 2.0f) ? (D3DX_PI / 2.0f) : radius;
2301     norm = sinf(clamped_angle) * sinf(clamped_angle);
2302 
2303     if (order > D3DXSH_MAXORDER)
2304     {
2305         WARN("Order clamped at D3DXSH_MAXORDER\n");
2306         order = D3DXSH_MAXORDER;
2307     }
2308 
2309     weightedcapintegrale(cap, order, radius);
2310     D3DXSHEvalDirection(rout, order, dir);
2311 
2312     for (i = 0; i < order; i++)
2313     {
2314         scale = cap[i] / norm;
2315 
2316         for (j = 0; j < 2 * i + 1; j++)
2317         {
2318             index = i * i + j;
2319             temp = rout[index] * scale;
2320 
2321             rout[index] = temp * Rintensity;
2322             if (gout)
2323                 gout[index] = temp * Gintensity;
2324             if (bout)
2325                 bout[index] = temp * Bintensity;
2326         }
2327     }
2328 
2329     return D3D_OK;
2330 }
2331 
2332 FLOAT* WINAPI D3DXSHEvalDirection(FLOAT *out, UINT order, const D3DXVECTOR3 *dir)
2333 {
2334     const FLOAT dirxx = dir->x * dir->x;
2335     const FLOAT dirxy = dir->x * dir->y;
2336     const FLOAT dirxz = dir->x * dir->z;
2337     const FLOAT diryy = dir->y * dir->y;
2338     const FLOAT diryz = dir->y * dir->z;
2339     const FLOAT dirzz = dir->z * dir->z;
2340     const FLOAT dirxxxx = dirxx * dirxx;
2341     const FLOAT diryyyy = diryy * diryy;
2342     const FLOAT dirzzzz = dirzz * dirzz;
2343     const FLOAT dirxyxy = dirxy * dirxy;
2344 
2345     TRACE("out %p, order %u, dir %p\n", out, order, dir);
2346 
2347     if ((order < D3DXSH_MINORDER) || (order > D3DXSH_MAXORDER))
2348         return out;
2349 
2350     out[0] = 0.5f / sqrtf(D3DX_PI);
2351     out[1] = -0.5f / sqrtf(D3DX_PI / 3.0f) * dir->y;
2352     out[2] = 0.5f / sqrtf(D3DX_PI / 3.0f) * dir->z;
2353     out[3] = -0.5f / sqrtf(D3DX_PI / 3.0f) * dir->x;
2354     if (order == 2)
2355         return out;
2356 
2357     out[4] = 0.5f / sqrtf(D3DX_PI / 15.0f) * dirxy;
2358     out[5] = -0.5f / sqrtf(D3DX_PI / 15.0f) * diryz;
2359     out[6] = 0.25f / sqrtf(D3DX_PI / 5.0f) * (3.0f * dirzz - 1.0f);
2360     out[7] = -0.5f / sqrtf(D3DX_PI / 15.0f) * dirxz;
2361     out[8] = 0.25f / sqrtf(D3DX_PI / 15.0f) * (dirxx - diryy);
2362     if (order == 3)
2363         return out;
2364 
2365     out[9] = -sqrtf(70.0f / D3DX_PI) / 8.0f * dir->y * (3.0f * dirxx - diryy);
2366     out[10] = sqrtf(105.0f / D3DX_PI) / 2.0f * dirxy * dir->z;
2367     out[11] = -sqrtf(42.0f / D3DX_PI) / 8.0f * dir->y * (-1.0f + 5.0f * dirzz);
2368     out[12] = sqrtf(7.0f / D3DX_PI) / 4.0f * dir->z * (5.0f * dirzz - 3.0f);
2369     out[13] = sqrtf(42.0f / D3DX_PI) / 8.0f * dir->x * (1.0f - 5.0f * dirzz);
2370     out[14] = sqrtf(105.0f / D3DX_PI) / 4.0f * dir->z * (dirxx - diryy);
2371     out[15] = -sqrtf(70.0f / D3DX_PI) / 8.0f * dir->x * (dirxx - 3.0f * diryy);
2372     if (order == 4)
2373         return out;
2374 
2375     out[16] = 0.75f * sqrtf(35.0f / D3DX_PI) * dirxy * (dirxx - diryy);
2376     out[17] = 3.0f * dir->z * out[9];
2377     out[18] = 0.75f * sqrtf(5.0f / D3DX_PI) * dirxy * (7.0f * dirzz - 1.0f);
2378     out[19] = 0.375f * sqrtf(10.0f / D3DX_PI) * diryz * (3.0f - 7.0f * dirzz);
2379     out[20] = 3.0f / (16.0f * sqrtf(D3DX_PI)) * (35.0f * dirzzzz - 30.f * dirzz + 3.0f);
2380     out[21] = 0.375f * sqrtf(10.0f / D3DX_PI) * dirxz * (3.0f - 7.0f * dirzz);
2381     out[22] = 0.375f * sqrtf(5.0f / D3DX_PI) * (dirxx - diryy) * (7.0f * dirzz - 1.0f);
2382     out[23] = 3.0f * dir->z * out[15];
2383     out[24] = 3.0f / 16.0f * sqrtf(35.0f / D3DX_PI) * (dirxxxx - 6.0f * dirxyxy + diryyyy);
2384     if (order == 5)
2385         return out;
2386 
2387     out[25] = -3.0f/ 32.0f * sqrtf(154.0f / D3DX_PI) * dir->y * (5.0f * dirxxxx - 10.0f * dirxyxy + diryyyy);
2388     out[26] = 0.75f * sqrtf(385.0f / D3DX_PI) * dirxy * dir->z * (dirxx - diryy);
2389     out[27] = sqrtf(770.0f / D3DX_PI) / 32.0f * dir->y * (3.0f * dirxx - diryy) * (1.0f - 9.0f * dirzz);
2390     out[28] = sqrtf(1155.0f / D3DX_PI) / 4.0f * dirxy * dir->z * (3.0f * dirzz - 1.0f);
2391     out[29] = sqrtf(165.0f / D3DX_PI) / 16.0f * dir->y * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
2392     out[30] = sqrtf(11.0f / D3DX_PI) / 16.0f * dir->z * (63.0f * dirzzzz - 70.0f * dirzz + 15.0f);
2393     out[31] = sqrtf(165.0f / D3DX_PI) / 16.0f * dir->x * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
2394     out[32] = sqrtf(1155.0f / D3DX_PI) / 8.0f * dir->z * (dirxx - diryy) * (3.0f * dirzz - 1.0f);
2395     out[33] = sqrtf(770.0f / D3DX_PI) / 32.0f * dir->x * (dirxx - 3.0f * diryy) * (1.0f - 9.0f * dirzz);
2396     out[34] = 3.0f / 16.0f * sqrtf(385.0f / D3DX_PI) * dir->z * (dirxxxx - 6.0f * dirxyxy + diryyyy);
2397     out[35] = -3.0f/ 32.0f * sqrtf(154.0f / D3DX_PI) * dir->x * (dirxxxx - 10.0f * dirxyxy + 5.0f * diryyyy);
2398 
2399     return out;
2400 }
2401 
2402 HRESULT WINAPI D3DXSHEvalDirectionalLight(UINT order, const D3DXVECTOR3 *dir, FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *Rout, FLOAT *Gout, FLOAT *Bout)
2403 {
2404     FLOAT s, temp;
2405     UINT j;
2406 
2407     TRACE("Order %u, Vector %p, Red %f, Green %f, Blue %f, Rout %p, Gout %p, Bout %p\n", order, dir, Rintensity, Gintensity, Bintensity, Rout, Gout, Bout);
2408 
2409     s = 0.75f;
2410     if ( order > 2 )
2411         s += 5.0f / 16.0f;
2412     if ( order > 4 )
2413         s -= 3.0f / 32.0f;
2414     s /= D3DX_PI;
2415 
2416     D3DXSHEvalDirection(Rout, order, dir);
2417     for (j = 0; j < order * order; j++)
2418     {
2419         temp = Rout[j] / s;
2420 
2421         Rout[j] = Rintensity * temp;
2422         if ( Gout )
2423             Gout[j] = Gintensity * temp;
2424         if ( Bout )
2425             Bout[j] = Bintensity * temp;
2426     }
2427 
2428     return D3D_OK;
2429 }
2430 
2431 HRESULT WINAPI D3DXSHEvalHemisphereLight(UINT order, const D3DXVECTOR3 *dir, D3DXCOLOR top, D3DXCOLOR bottom,
2432     FLOAT *rout, FLOAT *gout, FLOAT *bout)
2433 {
2434     FLOAT a[2], temp[4];
2435     UINT i, j;
2436 
2437     TRACE("order %u, dir %p, rout %p, gout %p, bout %p\n", order, dir, rout, gout, bout);
2438 
2439     D3DXSHEvalDirection(temp, 2, dir);
2440 
2441     a[0] = (top.r + bottom.r) * 3.0f * D3DX_PI;
2442     a[1] = (top.r - bottom.r) * D3DX_PI;
2443     for (i = 0; i < order; i++)
2444         for (j = 0; j < 2 * i + 1; j++)
2445             if (i < 2)
2446                 rout[i * i + j] = temp[i * i + j] * a[i];
2447             else
2448                 rout[i * i + j] = 0.0f;
2449 
2450     if (gout)
2451     {
2452         a[0] = (top.g + bottom.g) * 3.0f * D3DX_PI;
2453         a[1] = (top.g - bottom.g) * D3DX_PI;
2454         for (i = 0; i < order; i++)
2455             for (j = 0; j < 2 * i + 1; j++)
2456                 if (i < 2)
2457                     gout[i * i + j] = temp[i * i + j] * a[i];
2458                 else
2459                     gout[i * i + j] = 0.0f;
2460     }
2461 
2462     if (bout)
2463     {
2464         a[0] = (top.b + bottom.b) * 3.0f * D3DX_PI;
2465         a[1] = (top.b - bottom.b) * D3DX_PI;
2466         for (i = 0; i < order; i++)
2467             for (j = 0; j < 2 * i + 1; j++)
2468                 if (i < 2)
2469                     bout[i * i + j] = temp[i * i + j] * a[i];
2470                 else
2471                     bout[i * i + j] = 0.0f;
2472     }
2473 
2474     return D3D_OK;
2475 }
2476 
2477 HRESULT WINAPI D3DXSHEvalSphericalLight(UINT order, const D3DXVECTOR3 *dir, FLOAT radius,
2478     FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *rout, FLOAT *gout, FLOAT *bout)
2479 {
2480     D3DXVECTOR3 normal;
2481     FLOAT cap[6], clamped_angle, dist, temp;
2482     UINT i, index, j;
2483 
2484     TRACE("order %u, dir %p, radius %f, red %f, green %f, blue %f, rout %p, gout %p, bout %p\n",
2485         order, dir, radius, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2486 
2487     if (order > D3DXSH_MAXORDER)
2488     {
2489         WARN("Order clamped at D3DXSH_MAXORDER\n");
2490         order = D3DXSH_MAXORDER;
2491     }
2492 
2493     if (radius < 0.0f)
2494         radius = -radius;
2495 
2496     dist = D3DXVec3Length(dir);
2497     clamped_angle = (dist <= radius) ? D3DX_PI / 2.0f : asinf(radius / dist);
2498 
2499     weightedcapintegrale(cap, order, clamped_angle);
2500     D3DXVec3Normalize(&normal, dir);
2501     D3DXSHEvalDirection(rout, order, &normal);
2502 
2503     for (i = 0; i < order; i++)
2504         for (j = 0; j < 2 * i + 1; j++)
2505         {
2506             index = i * i + j;
2507             temp = rout[index] * cap[i];
2508 
2509             rout[index] = temp * Rintensity;
2510             if (gout)
2511                 gout[index] = temp * Gintensity;
2512             if (bout)
2513                 bout[index] = temp * Bintensity;
2514         }
2515 
2516     return D3D_OK;
2517 }
2518 
2519 FLOAT * WINAPI D3DXSHMultiply2(FLOAT *out, const FLOAT *a, const FLOAT *b)
2520 {
2521     FLOAT ta, tb;
2522 
2523     TRACE("out %p, a %p, b %p\n", out, a, b);
2524 
2525     ta = 0.28209479f * a[0];
2526     tb = 0.28209479f * b[0];
2527 
2528     out[0] = 0.28209479f * D3DXSHDot(2, a, b);
2529     out[1] = ta * b[1] + tb * a[1];
2530     out[2] = ta * b[2] + tb * a[2];
2531     out[3] = ta * b[3] + tb * a[3];
2532 
2533     return out;
2534 }
2535 
2536 FLOAT * WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b)
2537 {
2538     FLOAT t, ta, tb;
2539 
2540     TRACE("out %p, a %p, b %p\n", out, a, b);
2541 
2542     out[0] = 0.28209479f * a[0] * b[0];
2543 
2544     ta = 0.28209479f * a[0] - 0.12615663f * a[6] - 0.21850969f * a[8];
2545     tb = 0.28209479f * b[0] - 0.12615663f * b[6] - 0.21850969f * b[8];
2546     out[1] = ta * b[1] + tb * a[1];
2547     t = a[1] * b[1];
2548     out[0] += 0.28209479f * t;
2549     out[6] = -0.12615663f * t;
2550     out[8] = -0.21850969f * t;
2551 
2552     ta = 0.21850969f * a[5];
2553     tb = 0.21850969f * b[5];
2554     out[1] += ta * b[2] + tb * a[2];
2555     out[2] = ta * b[1] + tb * a[1];
2556     t = a[1] * b[2] +a[2] * b[1];
2557     out[5] = 0.21850969f * t;
2558 
2559     ta = 0.21850969f * a[4];
2560     tb = 0.21850969f * b[4];
2561     out[1] += ta * b[3] + tb * a[3];
2562     out[3]  = ta * b[1] + tb * a[1];
2563     t = a[1] * b[3] + a[3] * b[1];
2564     out[4] = 0.21850969f * t;
2565 
2566     ta = 0.28209480f * a[0] + 0.25231326f * a[6];
2567     tb = 0.28209480f * b[0] + 0.25231326f * b[6];
2568     out[2] += ta * b[2] + tb * a[2];
2569     t = a[2] * b[2];
2570     out[0] += 0.28209480f * t;
2571     out[6] += 0.25231326f * t;
2572 
2573     ta = 0.21850969f * a[7];
2574     tb = 0.21850969f * b[7];
2575     out[2] += ta * b[3] + tb * a[3];
2576     out[3] += ta * b[2] + tb * a[2];
2577     t = a[2] * b[3] + a[3] * b[2];
2578     out[7] = 0.21850969f * t;
2579 
2580     ta = 0.28209479f * a[0] - 0.12615663f * a[6] + 0.21850969f * a[8];
2581     tb = 0.28209479f * b[0] - 0.12615663f * b[6] + 0.21850969f * b[8];
2582     out[3] += ta * b[3] + tb * a[3];
2583     t = a[3] * b[3];
2584     out[0] += 0.28209479f * t;
2585     out[6] -= 0.12615663f * t;
2586     out[8] += 0.21850969f * t;
2587 
2588     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2589     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2590     out[4] += ta * b[4] + tb * a[4];
2591     t = a[4] * b[4];
2592     out[0] += 0.28209479f * t;
2593     out[6] -= 0.18022375f * t;
2594 
2595     ta = 0.15607835f * a[7];
2596     tb = 0.15607835f * b[7];
2597     out[4] += ta * b[5] + tb * a[5];
2598     out[5] += ta * b[4] + tb * a[4];
2599     t = a[4] * b[5] + a[5] * b[4];
2600     out[7] += 0.15607835f * t;
2601 
2602     ta = 0.28209479f * a[0] + 0.09011188f * a[6] - 0.15607835f * a[8];
2603     tb = 0.28209479f * b[0] + 0.09011188f * b[6] - 0.15607835f * b[8];
2604     out[5] += ta * b[5] + tb * a[5];
2605     t = a[5] * b[5];
2606     out[0] += 0.28209479f * t;
2607     out[6] += 0.09011188f * t;
2608     out[8] -= 0.15607835f * t;
2609 
2610     ta = 0.28209480f * a[0];
2611     tb = 0.28209480f * b[0];
2612     out[6] += ta * b[6] + tb * a[6];
2613     t = a[6] * b[6];
2614     out[0] += 0.28209480f * t;
2615     out[6] += 0.18022376f * t;
2616 
2617     ta = 0.28209479f * a[0] + 0.09011188f * a[6] + 0.15607835f * a[8];
2618     tb = 0.28209479f * b[0] + 0.09011188f * b[6] + 0.15607835f * b[8];
2619     out[7] += ta * b[7] + tb * a[7];
2620     t = a[7] * b[7];
2621     out[0] += 0.28209479f * t;
2622     out[6] += 0.09011188f * t;
2623     out[8] += 0.15607835f * t;
2624 
2625     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2626     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2627     out[8] += ta * b[8] + tb * a[8];
2628     t = a[8] * b[8];
2629     out[0] += 0.28209479f * t;
2630     out[6] -= 0.18022375f * t;
2631 
2632     return out;
2633 }
2634 
2635 FLOAT * WINAPI D3DXSHMultiply4(FLOAT *out, const FLOAT *a, const FLOAT *b)
2636 {
2637     FLOAT ta, tb, t;
2638 
2639     TRACE("out %p, a %p, b %p\n", out, a, b);
2640 
2641     out[0] = 0.28209479f * a[0] * b[0];
2642 
2643     ta = 0.28209479f * a[0] - 0.12615663f * a[6] - 0.21850969f * a[8];
2644     tb = 0.28209479f * b[0] - 0.12615663f * b[6] - 0.21850969f * b[8];
2645     out[1] = ta * b[1] + tb * a[1];
2646     t = a[1] * b[1];
2647     out[0] += 0.28209479f * t;
2648     out[6] = -0.12615663f * t;
2649     out[8] = -0.21850969f * t;
2650 
2651     ta = 0.21850969f * a[3] - 0.05839917f * a[13] - 0.22617901f * a[15];
2652     tb = 0.21850969f * b[3] - 0.05839917f * b[13] - 0.22617901f * b[15];
2653     out[1] += ta * b[4] + tb * a[4];
2654     out[4] = ta * b[1] + tb * a[1];
2655     t = a[1] * b[4] + a[4] * b[1];
2656     out[3] = 0.21850969f * t;
2657     out[13] = -0.05839917f * t;
2658     out[15] = -0.22617901f * t;
2659 
2660     ta = 0.21850969f * a[2] - 0.14304817f * a[12] - 0.18467439f * a[14];
2661     tb = 0.21850969f * b[2] - 0.14304817f * b[12] - 0.18467439f * b[14];
2662     out[1] += ta * b[5] + tb * a[5];
2663     out[5] = ta * b[1] + tb * a[1];
2664     t = a[1] * b[5] + a[5] * b[1];
2665     out[2] = 0.21850969f * t;
2666     out[12] = -0.14304817f * t;
2667     out[14] = -0.18467439f * t;
2668 
2669     ta = 0.20230066f * a[11];
2670     tb = 0.20230066f * b[11];
2671     out[1] += ta * b[6] + tb * a[6];
2672     out[6] += ta * b[1] + tb * a[1];
2673     t = a[1] * b[6] + a[6] * b[1];
2674     out[11] = 0.20230066f * t;
2675 
2676     ta = 0.22617901f * a[9] + 0.05839917f * a[11];
2677     tb = 0.22617901f * b[9] + 0.05839917f * b[11];
2678     out[1] += ta * b[8] + tb * a[8];
2679     out[8] += ta * b[1] + tb * a[1];
2680     t = a[1] * b[8] + a[8] * b[1];
2681     out[9] = 0.22617901f * t;
2682     out[11] += 0.05839917f * t;
2683 
2684     ta = 0.28209480f * a[0] + 0.25231326f * a[6];
2685     tb = 0.28209480f * b[0] + 0.25231326f * b[6];
2686     out[2] += ta * b[2] + tb * a[2];
2687     t = a[2] * b[2];
2688     out[0] += 0.28209480f * t;
2689     out[6] += 0.25231326f * t;
2690 
2691     ta = 0.24776671f * a[12];
2692     tb = 0.24776671f * b[12];
2693     out[2] += ta * b[6] + tb * a[6];
2694     out[6] += ta * b[2] + tb * a[2];
2695     t = a[2] * b[6] + a[6] * b[2];
2696     out[12] += 0.24776671f * t;
2697 
2698     ta = 0.28209480f * a[0] - 0.12615663f * a[6] + 0.21850969f * a[8];
2699     tb = 0.28209480f * b[0] - 0.12615663f * b[6] + 0.21850969f * b[8];
2700     out[3] += ta * b[3] + tb * a[3];
2701     t = a[3] * b[3];
2702     out[0] += 0.28209480f * t;
2703     out[6] -= 0.12615663f * t;
2704     out[8] += 0.21850969f * t;
2705 
2706     ta = 0.20230066f * a[13];
2707     tb = 0.20230066f * b[13];
2708     out[3] += ta * b[6] + tb * a[6];
2709     out[6] += ta * b[3] + tb * a[3];
2710     t = a[3] * b[6] + a[6] * b[3];
2711     out[13] += 0.20230066f * t;
2712 
2713     ta = 0.21850969f * a[2] - 0.14304817f * a[12] + 0.18467439f * a[14];
2714     tb = 0.21850969f * b[2] - 0.14304817f * b[12] + 0.18467439f * b[14];
2715     out[3] += ta * b[7] + tb * a[7];
2716     out[7] = ta * b[3] + tb * a[3];
2717     t = a[3] * b[7] + a[7] * b[3];
2718     out[2] += 0.21850969f * t;
2719     out[12] -= 0.14304817f * t;
2720     out[14] += 0.18467439f * t;
2721 
2722     ta = -0.05839917f * a[13] + 0.22617901f * a[15];
2723     tb = -0.05839917f * b[13] + 0.22617901f * b[15];
2724     out[3] += ta * b[8] + tb * a[8];
2725     out[8] += ta * b[3] + tb * a[3];
2726     t = a[3] * b[8] + a[8] * b[3];
2727     out[13] -= 0.05839917f * t;
2728     out[15] += 0.22617901f * t;
2729 
2730     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2731     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2732     out[4] += ta * b[4] + tb * a[4];
2733     t = a[4] * b[4];
2734     out[0] += 0.28209479f * t;
2735     out[6] -= 0.18022375f * t;
2736 
2737     ta = 0.15607835f * a[7];
2738     tb = 0.15607835f * b[7];
2739     out[4] += ta * b[5] + tb * a[5];
2740     out[5] += ta * b[4] + tb * a[4];
2741     t = a[4] * b[5] + a[5] * b[4];
2742     out[7] += 0.15607835f * t;
2743 
2744     ta = 0.22617901f * a[3] - 0.09403160f * a[13];
2745     tb = 0.22617901f * b[3] - 0.09403160f * b[13];
2746     out[4] += ta * b[9] + tb * a[9];
2747     out[9] += ta * b[4] + tb * a[4];
2748     t = a[4] * b[9] + a[9] * b[4];
2749     out[3] += 0.22617901f * t;
2750     out[13] -= 0.09403160f * t;
2751 
2752     ta = 0.18467439f * a[2] - 0.18806319f * a[12];
2753     tb = 0.18467439f * b[2] - 0.18806319f * b[12];
2754     out[4] += ta * b[10] + tb * a [10];
2755     out[10] = ta * b[4] + tb * a[4];
2756     t = a[4] * b[10] + a[10] * b[4];
2757     out[2] += 0.18467439f * t;
2758     out[12] -= 0.18806319f * t;
2759 
2760     ta = -0.05839917f * a[3] + 0.14567312f * a[13] + 0.09403160f * a[15];
2761     tb = -0.05839917f * b[3] + 0.14567312f * b[13] + 0.09403160f * b[15];
2762     out[4] += ta * b[11] + tb * a[11];
2763     out[11] += ta * b[4] + tb * a[4];
2764     t = a[4] * b[11] + a[11] * b[4];
2765     out[3] -= 0.05839917f * t;
2766     out[13] += 0.14567312f * t;
2767     out[15] += 0.09403160f * t;
2768 
2769     ta = 0.28209479f * a[0] + 0.09011186f * a[6] - 0.15607835f * a[8];
2770     tb = 0.28209479f * b[0] + 0.09011186f * b[6] - 0.15607835f * b[8];
2771     out[5] += ta * b[5] + tb * a[5];
2772     t = a[5] * b[5];
2773     out[0] += 0.28209479f * t;
2774     out[6] += 0.09011186f * t;
2775     out[8] -= 0.15607835f * t;
2776 
2777     ta = 0.14867701f * a[14];
2778     tb = 0.14867701f * b[14];
2779     out[5] += ta * b[9] + tb * a[9];
2780     out[9] += ta * b[5] + tb * a[5];
2781     t = a[5] * b[9] + a[9] * b[5];
2782     out[14] += 0.14867701f * t;
2783 
2784     ta = 0.18467439f * a[3] + 0.11516472f * a[13] - 0.14867701f * a[15];
2785     tb = 0.18467439f * b[3] + 0.11516472f * b[13] - 0.14867701f * b[15];
2786     out[5] += ta * b[10] + tb * a[10];
2787     out[10] += ta * b[5] + tb * a[5];
2788     t = a[5] * b[10] + a[10] * b[5];
2789     out[3] += 0.18467439f * t;
2790     out[13] += 0.11516472f * t;
2791     out[15] -= 0.14867701f * t;
2792 
2793     ta = 0.23359668f * a[2] + 0.05947080f * a[12] - 0.11516472f * a[14];
2794     tb = 0.23359668f * b[2] + 0.05947080f * b[12] - 0.11516472f * b[14];
2795     out[5] += ta * b[11] + tb * a[11];
2796     out[11] += ta * b[5] + tb * a[5];
2797     t = a[5] * b[11] + a[11] * b[5];
2798     out[2] += 0.23359668f * t;
2799     out[12] += 0.05947080f * t;
2800     out[14] -= 0.11516472f * t;
2801 
2802     ta = 0.28209479f * a[0];
2803     tb = 0.28209479f * b[0];
2804     out[6] += ta * b[6] + tb * a[6];
2805     t = a[6] * b[6];
2806     out[0] += 0.28209479f * t;
2807     out[6] += 0.18022376f * t;
2808 
2809     ta = 0.09011186f * a[6] + 0.28209479f * a[0] + 0.15607835f * a[8];
2810     tb = 0.09011186f * b[6] + 0.28209479f * b[0] + 0.15607835f * b[8];
2811     out[7] += ta * b[7] + tb * a[7];
2812     t = a[7] * b[7];
2813     out[6] += 0.09011186f * t;
2814     out[0] += 0.28209479f * t;
2815     out[8] += 0.15607835f * t;
2816 
2817     ta = 0.14867701f * a[9] + 0.18467439f * a[1] + 0.11516472f * a[11];
2818     tb = 0.14867701f * b[9] + 0.18467439f * b[1] + 0.11516472f * b[11];
2819     out[7] += ta * b[10] + tb * a[10];
2820     out[10] += ta * b[7] + tb * a[7];
2821     t = a[7] * b[10] + a[10] * b[7];
2822     out[9] += 0.14867701f * t;
2823     out[1] += 0.18467439f * t;
2824     out[11] += 0.11516472f * t;
2825 
2826     ta = 0.05947080f * a[12] + 0.23359668f * a[2] + 0.11516472f * a[14];
2827     tb = 0.05947080f * b[12] + 0.23359668f * b[2] + 0.11516472f * b[14];
2828     out[7] += ta * b[13] + tb * a[13];
2829     out[13] += ta * b[7]+ tb * a[7];
2830     t = a[7] * b[13] + a[13] * b[7];
2831     out[12] += 0.05947080f * t;
2832     out[2] += 0.23359668f * t;
2833     out[14] += 0.11516472f * t;
2834 
2835     ta = 0.14867701f * a[15];
2836     tb = 0.14867701f * b[15];
2837     out[7] += ta * b[14] + tb * a[14];
2838     out[14] += ta * b[7] + tb * a[7];
2839     t = a[7] * b[14] + a[14] * b[7];
2840     out[15] += 0.14867701f * t;
2841 
2842     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2843     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2844     out[8] += ta * b[8] + tb * a[8];
2845     t = a[8] * b[8];
2846     out[0] += 0.28209479f * t;
2847     out[6] -= 0.18022375f * t;
2848 
2849     ta = -0.09403160f * a[11];
2850     tb = -0.09403160f * b[11];
2851     out[8] += ta * b[9] + tb * a[9];
2852     out[9] += ta * b[8] + tb * a[8];
2853     t = a[8] * b[9] + a[9] * b[8];
2854     out[11] -= 0.09403160f * t;
2855 
2856     ta = -0.09403160f * a[15];
2857     tb = -0.09403160f * b[15];
2858     out[8] += ta * b[13] + tb * a[13];
2859     out[13] += ta * b[8] + tb * a[8];
2860     t = a[8] * b[13] + a[13] * b[8];
2861     out[15] -= 0.09403160f * t;
2862 
2863     ta = 0.18467439f * a[2] - 0.18806319f * a[12];
2864     tb = 0.18467439f * b[2] - 0.18806319f * b[12];
2865     out[8] += ta * b[14] + tb * a[14];
2866     out[14] += ta * b[8] + tb * a[8];
2867     t = a[8] * b[14] + a[14] * b[8];
2868     out[2] += 0.18467439f * t;
2869     out[12] -= 0.18806319f * t;
2870 
2871     ta = -0.21026104f * a[6] + 0.28209479f * a[0];
2872     tb = -0.21026104f * b[6] + 0.28209479f * b[0];
2873     out[9] += ta * b[9] + tb * a[9];
2874     t = a[9] * b[9];
2875     out[6] -= 0.21026104f * t;
2876     out[0] += 0.28209479f * t;
2877 
2878     ta = 0.28209479f * a[0];
2879     tb = 0.28209479f * b[0];
2880     out[10] += ta * b[10] + tb * a[10];
2881     t = a[10] * b[10];
2882     out[0] += 0.28209479f * t;
2883 
2884     ta = 0.28209479f * a[0] + 0.12615663f * a[6] - 0.14567312f * a[8];
2885     tb = 0.28209479f * b[0] + 0.12615663f * b[6] - 0.14567312f * b[8];
2886     out[11] += ta * b[11] + tb * a[11];
2887     t = a[11] * b[11];
2888     out[0] += 0.28209479f * t;
2889     out[6] += 0.12615663f * t;
2890     out[8] -= 0.14567312f * t;
2891 
2892     ta = 0.28209479f * a[0] + 0.16820885f * a[6];
2893     tb = 0.28209479f * b[0] + 0.16820885f * b[6];
2894     out[12] += ta * b[12] + tb * a[12];
2895     t = a[12] * b[12];
2896     out[0] += 0.28209479f * t;
2897     out[6] += 0.16820885f * t;
2898 
2899     ta =0.28209479f * a[0] + 0.14567312f * a[8] + 0.12615663f * a[6];
2900     tb =0.28209479f * b[0] + 0.14567312f * b[8] + 0.12615663f * b[6];
2901     out[13] += ta * b[13] + tb * a[13];
2902     t = a[13] * b[13];
2903     out[0] += 0.28209479f * t;
2904     out[8] += 0.14567312f * t;
2905     out[6] += 0.12615663f * t;
2906 
2907     ta = 0.28209479f * a[0];
2908     tb = 0.28209479f * b[0];
2909     out[14] += ta * b[14] + tb * a[14];
2910     t = a[14] * b[14];
2911     out[0] += 0.28209479f * t;
2912 
2913     ta = 0.28209479f * a[0] - 0.21026104f * a[6];
2914     tb = 0.28209479f * b[0] - 0.21026104f * b[6];
2915     out[15] += ta * b[15] + tb * a[15];
2916     t = a[15] * b[15];
2917     out[0] += 0.28209479f * t;
2918     out[6] -= 0.21026104f * t;
2919 
2920     return out;
2921 }
2922 
2923 static void rotate_X(FLOAT *out, UINT order, FLOAT a, FLOAT *in)
2924 {
2925     out[0] = in[0];
2926 
2927     out[1] = a * in[2];
2928     out[2] = -a * in[1];
2929     out[3] = in[3];
2930 
2931     out[4] = a * in[7];
2932     out[5] = -in[5];
2933     out[6] = -0.5f * in[6] - 0.8660253882f * in[8];
2934     out[7] = -a * in[4];
2935     out[8] = -0.8660253882f * in[6] + 0.5f * in[8];
2936     out[9] = -a * 0.7905694842f * in[12] + a * 0.6123724580f * in[14];
2937 
2938     out[10] = -in[10];
2939     out[11] = -a * 0.6123724580f * in[12] - a * 0.7905694842f * in[14];
2940     out[12] = a * 0.7905694842f * in[9] + a * 0.6123724580f * in[11];
2941     out[13] = -0.25f * in[13] - 0.9682458639f * in[15];
2942     out[14] = -a * 0.6123724580f * in[9] + a * 0.7905694842f * in[11];
2943     out[15] = -0.9682458639f * in[13] + 0.25f * in[15];
2944     if (order == 4)
2945         return;
2946 
2947     out[16] = -a * 0.9354143739f * in[21] + a * 0.3535533845f * in[23];
2948     out[17] = -0.75f * in[17] + 0.6614378095f * in[19];
2949     out[18] = -a * 0.3535533845f * in[21] - a * 0.9354143739f * in[23];
2950     out[19] = 0.6614378095f * in[17] + 0.75f * in[19];
2951     out[20] = 0.375f * in[20] + 0.5590170026f * in[22] + 0.7395099998f * in[24];
2952     out[21] = a * 0.9354143739f * in[16] + a * 0.3535533845f * in[18];
2953     out[22] = 0.5590170026f * in[20] + 0.5f * in[22] - 0.6614378691f * in[24];
2954     out[23] = -a * 0.3535533845f * in[16] + a * 0.9354143739f * in[18];
2955     out[24] = 0.7395099998f * in[20] - 0.6614378691f * in[22] + 0.125f * in[24];
2956     if (order == 5)
2957         return;
2958 
2959     out[25] = a * 0.7015607357f * in[30] - a * 0.6846531630f * in[32] + a * 0.1976423711f * in[34];
2960     out[26] = -0.5f * in[26] + 0.8660253882f * in[28];
2961     out[27] = a * 0.5229125023f * in[30] + a * 0.3061861992f * in[32] - a * 0.7954951525f * in[34];
2962     out[28] = 0.8660253882f * in[26] + 0.5f * in[28];
2963     out[29] = a * 0.4841229022f * in[30] + a * 0.6614378691f * in[32] + a * 0.5728219748f * in[34];
2964     out[30] = -a * 0.7015607357f * in[25] - a * 0.5229125023f * in[27] - a * 0.4841229022f * in[29];
2965     out[31] = 0.125f * in[31] + 0.4050463140f * in[33] + 0.9057110548f * in[35];
2966     out[32] = a * 0.6846531630f * in[25] - a * 0.3061861992f * in[27] - a * 0.6614378691f * in[29];
2967     out[33] = 0.4050463140f * in[31] + 0.8125f * in[33] - 0.4192627370f * in[35];
2968     out[34] = -a * 0.1976423711f * in[25] + a * 0.7954951525f * in[27] - a * 0.5728219748f * in[29];
2969     out[35] = 0.9057110548f * in[31] - 0.4192627370f * in[33] + 0.0624999329f * in[35];
2970 }
2971 
2972 FLOAT* WINAPI D3DXSHRotate(FLOAT *out, UINT order, const D3DXMATRIX *matrix, const FLOAT *in)
2973 {
2974     FLOAT alpha, beta, gamma, sinb, temp[36], temp1[36];
2975 
2976     TRACE("out %p, order %u, matrix %p, in %p\n", out, order, matrix, in);
2977 
2978     out[0] = in[0];
2979 
2980     if ((order > D3DXSH_MAXORDER) || (order < D3DXSH_MINORDER))
2981         return out;
2982 
2983     if (order <= 3)
2984     {
2985         out[1] = matrix->u.m[1][1] * in[1] - matrix->u.m[2][1] * in[2] + matrix->u.m[0][1] * in[3];
2986         out[2] = -matrix->u.m[1][2] * in[1] + matrix->u.m[2][2] * in[2] - matrix->u.m[0][2] * in[3];
2987         out[3] = matrix->u.m[1][0] * in[1] - matrix->u.m[2][0] * in[2] + matrix->u.m[0][0] * in[3];
2988 
2989         if (order == 3)
2990         {
2991             FLOAT coeff[]={
2992                 matrix->u.m[1][0] * matrix->u.m[0][0], matrix->u.m[1][1] * matrix->u.m[0][1],
2993                 matrix->u.m[1][1] * matrix->u.m[2][1], matrix->u.m[1][0] * matrix->u.m[2][0],
2994                 matrix->u.m[2][0] * matrix->u.m[2][0], matrix->u.m[2][1] * matrix->u.m[2][1],
2995                 matrix->u.m[0][0] * matrix->u.m[2][0], matrix->u.m[0][1] * matrix->u.m[2][1],
2996                 matrix->u.m[0][1] * matrix->u.m[0][1], matrix->u.m[1][0] * matrix->u.m[1][0],
2997                 matrix->u.m[1][1] * matrix->u.m[1][1], matrix->u.m[0][0] * matrix->u.m[0][0], };
2998 
2999             out[4] = (matrix->u.m[1][1] * matrix->u.m[0][0] + matrix->u.m[0][1] * matrix->u.m[1][0]) * in[4];
3000             out[4] -= (matrix->u.m[1][0] * matrix->u.m[2][1] + matrix->u.m[1][1] * matrix->u.m[2][0]) * in[5];
3001             out[4] += 1.7320508076f * matrix->u.m[2][0] * matrix->u.m[2][1] * in[6];
3002             out[4] -= (matrix->u.m[0][1] * matrix->u.m[2][0] + matrix->u.m[0][0] * matrix->u.m[2][1]) * in[7];
3003             out[4] += (matrix->u.m[0][0] * matrix->u.m[0][1] - matrix->u.m[1][0] * matrix->u.m[1][1]) * in[8];
3004 
3005             out[5] = (matrix->u.m[1][1] * matrix->u.m[2][2] + matrix->u.m[1][2] * matrix->u.m[2][1]) * in[5];
3006             out[5] -= (matrix->u.m[1][1] * matrix->u.m[0][2] + matrix->u.m[1][2] * matrix->u.m[0][1]) * in[4];
3007             out[5] -= 1.7320508076f * matrix->u.m[2][2] * matrix->u.m[2][1] * in[6];
3008             out[5] += (matrix->u.m[0][2] * matrix->u.m[2][1] + matrix->u.m[0][1] * matrix->u.m[2][2]) * in[7];
3009             out[5] -= (matrix->u.m[0][1] * matrix->u.m[0][2] - matrix->u.m[1][1] * matrix->u.m[1][2]) * in[8];
3010 
3011             out[6] = (matrix->u.m[2][2] * matrix->u.m[2][2] - 0.5f * (coeff[4] + coeff[5])) * in[6];
3012             out[6] -= (0.5773502692f * (coeff[0] + coeff[1]) - 1.1547005384f * matrix->u.m[1][2] * matrix->u.m[0][2]) * in[4];
3013             out[6] += (0.5773502692f * (coeff[2] + coeff[3]) - 1.1547005384f * matrix->u.m[1][2] * matrix->u.m[2][2]) * in[5];
3014             out[6] += (0.5773502692f * (coeff[6] + coeff[7]) - 1.1547005384f * matrix->u.m[0][2] * matrix->u.m[2][2]) * in[7];
3015             out[6] += (0.2886751347f * (coeff[9] - coeff[8] + coeff[10] - coeff[11]) - 0.5773502692f *
3016                   (matrix->u.m[1][2] * matrix->u.m[1][2] - matrix->u.m[0][2] * matrix->u.m[0][2])) * in[8];
3017 
3018             out[7] = (matrix->u.m[0][0] * matrix->u.m[2][2] + matrix->u.m[0][2] * matrix->u.m[2][0]) * in[7];
3019             out[7] -= (matrix->u.m[1][0] * matrix->u.m[0][2] + matrix->u.m[1][2] * matrix->u.m[0][0]) * in[4];
3020             out[7] += (matrix->u.m[1][0] * matrix->u.m[2][2] + matrix->u.m[1][2] * matrix->u.m[2][0]) * in[5];
3021             out[7] -= 1.7320508076f * matrix->u.m[2][2] * matrix->u.m[2][0] * in[6];
3022             out[7] -= (matrix->u.m[0][0] * matrix->u.m[0][2] - matrix->u.m[1][0] * matrix->u.m[1][2]) * in[8];
3023 
3024             out[8] = 0.5f * (coeff[11] - coeff[8] - coeff[9] + coeff[10]) * in[8];
3025             out[8] += (coeff[0] - coeff[1]) * in[4];
3026             out[8] += (coeff[2] - coeff[3]) * in[5];
3027             out[8] += 0.86602540f * (coeff[4] - coeff[5]) * in[6];
3028             out[8] += (coeff[7] - coeff[6]) * in[7];
3029         }
3030 
3031         return out;
3032     }
3033 
3034     if (fabsf(matrix->u.m[2][2]) != 1.0f)
3035     {
3036         sinb = sqrtf(1.0f - matrix->u.m[2][2] * matrix->u.m[2][2]);
3037         alpha = atan2f(matrix->u.m[2][1] / sinb, matrix->u.m[2][0] / sinb);
3038         beta = atan2f(sinb, matrix->u.m[2][2]);
3039         gamma = atan2f(matrix->u.m[1][2] / sinb, -matrix->u.m[0][2] / sinb);
3040     }
3041     else
3042     {
3043         alpha = atan2f(matrix->u.m[0][1], matrix->u.m[0][0]);
3044         beta = 0.0f;
3045         gamma = 0.0f;
3046     }
3047 
3048     D3DXSHRotateZ(temp, order, gamma, in);
3049     rotate_X(temp1, order, 1.0f, temp);
3050     D3DXSHRotateZ(temp, order, beta, temp1);
3051     rotate_X(temp1, order, -1.0f, temp);
3052     D3DXSHRotateZ(out, order, alpha, temp1);
3053 
3054     return out;
3055 }
3056 
3057 FLOAT * WINAPI D3DXSHRotateZ(FLOAT *out, UINT order, FLOAT angle, const FLOAT *in)
3058 {
3059     UINT i, sum = 0;
3060     FLOAT c[5], s[5];
3061 
3062     TRACE("out %p, order %u, angle %f, in %p\n", out, order, angle, in);
3063 
3064     order = min(max(order, D3DXSH_MINORDER), D3DXSH_MAXORDER);
3065 
3066     out[0] = in[0];
3067 
3068     for (i = 1; i < order; i++)
3069     {
3070         UINT j;
3071 
3072         c[i - 1] = cosf(i * angle);
3073         s[i - 1] = sinf(i * angle);
3074         sum += i * 2;
3075 
3076         out[sum - i] = c[i - 1] * in[sum - i];
3077         out[sum - i] += s[i - 1] * in[sum + i];
3078         for (j = i - 1; j > 0; j--)
3079         {
3080             out[sum - j] = 0.0f;
3081             out[sum - j] = c[j - 1] * in[sum - j];
3082             out[sum - j] += s[j - 1] * in[sum + j];
3083         }
3084 
3085         if (in == out)
3086             out[sum] = 0.0f;
3087         else
3088             out[sum] = in[sum];
3089 
3090         for (j = 1; j < i; j++)
3091         {
3092             out[sum + j] = 0.0f;
3093             out[sum + j] = -s[j - 1] * in[sum - j];
3094             out[sum + j] += c[j - 1] * in[sum + j];
3095         }
3096         out[sum + i] = -s[i - 1] * in[sum - i];
3097         out[sum + i] += c[i - 1] * in[sum + i];
3098     }
3099 
3100     return out;
3101 }
3102 
3103 FLOAT* WINAPI D3DXSHScale(FLOAT *out, UINT order, const FLOAT *a, const FLOAT scale)
3104 {
3105     UINT i;
3106 
3107     TRACE("out %p, order %u, a %p, scale %f\n", out, order, a, scale);
3108 
3109     for (i = 0; i < order * order; i++)
3110         out[i] = a[i] * scale;
3111 
3112     return out;
3113 }
3114