xref: /reactos/dll/directx/wine/d3dx9_36/math.c (revision 48cc7814)
1 /*
2  * Mathematical operations specific to D3DX9.
3  *
4  * Copyright (C) 2008 David Adam
5  * Copyright (C) 2008 Luis Busquets
6  * Copyright (C) 2008 Jérôme Gardou
7  * Copyright (C) 2008 Philip Nilsson
8  * Copyright (C) 2008 Henri Verbeet
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24 
25 #include "config.h"
26 #include "wine/port.h"
27 
28 #include "d3dx9_private.h"
29 
30 WINE_DEFAULT_DEBUG_CHANNEL(d3dx);
31 
32 struct ID3DXMatrixStackImpl
33 {
34   ID3DXMatrixStack ID3DXMatrixStack_iface;
35   LONG ref;
36 
37   unsigned int current;
38   unsigned int stack_size;
39   D3DXMATRIX *stack;
40 };
41 
42 static const unsigned int INITIAL_STACK_SIZE = 32;
43 
44 /*_________________D3DXColor____________________*/
45 
46 D3DXCOLOR* WINAPI D3DXColorAdjustContrast(D3DXCOLOR *pout, const D3DXCOLOR *pc, FLOAT s)
47 {
48     TRACE("pout %p, pc %p, s %f\n", pout, pc, s);
49 
50     pout->r = 0.5f + s * (pc->r - 0.5f);
51     pout->g = 0.5f + s * (pc->g - 0.5f);
52     pout->b = 0.5f + s * (pc->b - 0.5f);
53     pout->a = pc->a;
54     return pout;
55 }
56 
57 D3DXCOLOR* WINAPI D3DXColorAdjustSaturation(D3DXCOLOR *pout, const D3DXCOLOR *pc, FLOAT s)
58 {
59     FLOAT grey;
60 
61     TRACE("pout %p, pc %p, s %f\n", pout, pc, s);
62 
63     grey = pc->r * 0.2125f + pc->g * 0.7154f + pc->b * 0.0721f;
64     pout->r = grey + s * (pc->r - grey);
65     pout->g = grey + s * (pc->g - grey);
66     pout->b = grey + s * (pc->b - grey);
67     pout->a = pc->a;
68     return pout;
69 }
70 
71 /*_________________Misc__________________________*/
72 
73 FLOAT WINAPI D3DXFresnelTerm(FLOAT costheta, FLOAT refractionindex)
74 {
75     FLOAT a, d, g, result;
76 
77     TRACE("costheta %f, refractionindex %f\n", costheta, refractionindex);
78 
79     g = sqrtf(refractionindex * refractionindex + costheta * costheta - 1.0f);
80     a = g + costheta;
81     d = g - costheta;
82     result = (costheta * a - 1.0f) * (costheta * a - 1.0f) / ((costheta * d + 1.0f) * (costheta * d + 1.0f)) + 1.0f;
83     result *= 0.5f * d * d / (a * a);
84 
85     return result;
86 }
87 
88 /*_________________D3DXMatrix____________________*/
89 
90 D3DXMATRIX * WINAPI D3DXMatrixAffineTransformation(D3DXMATRIX *out, FLOAT scaling, const D3DXVECTOR3 *rotationcenter,
91         const D3DXQUATERNION *rotation, const D3DXVECTOR3 *translation)
92 {
93     TRACE("out %p, scaling %f, rotationcenter %p, rotation %p, translation %p\n",
94             out, scaling, rotationcenter, rotation, translation);
95 
96     D3DXMatrixIdentity(out);
97 
98     if (rotation)
99     {
100         FLOAT temp00, temp01, temp02, temp10, temp11, temp12, temp20, temp21, temp22;
101 
102         temp00 = 1.0f - 2.0f * (rotation->y * rotation->y + rotation->z * rotation->z);
103         temp01 = 2.0f * (rotation->x * rotation->y + rotation->z * rotation->w);
104         temp02 = 2.0f * (rotation->x * rotation->z - rotation->y * rotation->w);
105         temp10 = 2.0f * (rotation->x * rotation->y - rotation->z * rotation->w);
106         temp11 = 1.0f - 2.0f * (rotation->x * rotation->x + rotation->z * rotation->z);
107         temp12 = 2.0f * (rotation->y * rotation->z + rotation->x * rotation->w);
108         temp20 = 2.0f * (rotation->x * rotation->z + rotation->y * rotation->w);
109         temp21 = 2.0f * (rotation->y * rotation->z - rotation->x * rotation->w);
110         temp22 = 1.0f - 2.0f * (rotation->x * rotation->x + rotation->y * rotation->y);
111 
112         out->u.m[0][0] = scaling * temp00;
113         out->u.m[0][1] = scaling * temp01;
114         out->u.m[0][2] = scaling * temp02;
115         out->u.m[1][0] = scaling * temp10;
116         out->u.m[1][1] = scaling * temp11;
117         out->u.m[1][2] = scaling * temp12;
118         out->u.m[2][0] = scaling * temp20;
119         out->u.m[2][1] = scaling * temp21;
120         out->u.m[2][2] = scaling * temp22;
121 
122         if (rotationcenter)
123         {
124             out->u.m[3][0] = rotationcenter->x * (1.0f - temp00) - rotationcenter->y * temp10
125                     - rotationcenter->z * temp20;
126             out->u.m[3][1] = rotationcenter->y * (1.0f - temp11) - rotationcenter->x * temp01
127                     - rotationcenter->z * temp21;
128             out->u.m[3][2] = rotationcenter->z * (1.0f - temp22) - rotationcenter->x * temp02
129                     - rotationcenter->y * temp12;
130         }
131     }
132     else
133     {
134         out->u.m[0][0] = scaling;
135         out->u.m[1][1] = scaling;
136         out->u.m[2][2] = scaling;
137     }
138 
139     if (translation)
140     {
141         out->u.m[3][0] += translation->x;
142         out->u.m[3][1] += translation->y;
143         out->u.m[3][2] += translation->z;
144     }
145 
146     return out;
147 }
148 
149 D3DXMATRIX * WINAPI D3DXMatrixAffineTransformation2D(D3DXMATRIX *out, FLOAT scaling,
150         const D3DXVECTOR2 *rotationcenter, FLOAT rotation, const D3DXVECTOR2 *translation)
151 {
152     FLOAT tmp1, tmp2, s;
153 
154     TRACE("out %p, scaling %f, rotationcenter %p, rotation %f, translation %p\n",
155             out, scaling, rotationcenter, rotation, translation);
156 
157     s = sinf(rotation / 2.0f);
158     tmp1 = 1.0f - 2.0f * s * s;
159     tmp2 = 2.0f * s * cosf(rotation / 2.0f);
160 
161     D3DXMatrixIdentity(out);
162     out->u.m[0][0] = scaling * tmp1;
163     out->u.m[0][1] = scaling * tmp2;
164     out->u.m[1][0] = -scaling * tmp2;
165     out->u.m[1][1] = scaling * tmp1;
166 
167     if (rotationcenter)
168     {
169         FLOAT x, y;
170 
171         x = rotationcenter->x;
172         y = rotationcenter->y;
173 
174         out->u.m[3][0] = y * tmp2 - x * tmp1 + x;
175         out->u.m[3][1] = -x * tmp2 - y * tmp1 + y;
176     }
177 
178     if (translation)
179     {
180         out->u.m[3][0] += translation->x;
181         out->u.m[3][1] += translation->y;
182     }
183 
184     return out;
185 }
186 
187 HRESULT WINAPI D3DXMatrixDecompose(D3DXVECTOR3 *poutscale, D3DXQUATERNION *poutrotation, D3DXVECTOR3 *pouttranslation, const D3DXMATRIX *pm)
188 {
189     D3DXMATRIX normalized;
190     D3DXVECTOR3 vec;
191 
192     TRACE("poutscale %p, poutrotation %p, pouttranslation %p, pm %p\n", poutscale, poutrotation, pouttranslation, pm);
193 
194     /*Compute the scaling part.*/
195     vec.x=pm->u.m[0][0];
196     vec.y=pm->u.m[0][1];
197     vec.z=pm->u.m[0][2];
198     poutscale->x=D3DXVec3Length(&vec);
199 
200     vec.x=pm->u.m[1][0];
201     vec.y=pm->u.m[1][1];
202     vec.z=pm->u.m[1][2];
203     poutscale->y=D3DXVec3Length(&vec);
204 
205     vec.x=pm->u.m[2][0];
206     vec.y=pm->u.m[2][1];
207     vec.z=pm->u.m[2][2];
208     poutscale->z=D3DXVec3Length(&vec);
209 
210     /*Compute the translation part.*/
211     pouttranslation->x=pm->u.m[3][0];
212     pouttranslation->y=pm->u.m[3][1];
213     pouttranslation->z=pm->u.m[3][2];
214 
215     /*Let's calculate the rotation now*/
216     if ( (poutscale->x == 0.0f) || (poutscale->y == 0.0f) || (poutscale->z == 0.0f) ) return D3DERR_INVALIDCALL;
217 
218     normalized.u.m[0][0]=pm->u.m[0][0]/poutscale->x;
219     normalized.u.m[0][1]=pm->u.m[0][1]/poutscale->x;
220     normalized.u.m[0][2]=pm->u.m[0][2]/poutscale->x;
221     normalized.u.m[1][0]=pm->u.m[1][0]/poutscale->y;
222     normalized.u.m[1][1]=pm->u.m[1][1]/poutscale->y;
223     normalized.u.m[1][2]=pm->u.m[1][2]/poutscale->y;
224     normalized.u.m[2][0]=pm->u.m[2][0]/poutscale->z;
225     normalized.u.m[2][1]=pm->u.m[2][1]/poutscale->z;
226     normalized.u.m[2][2]=pm->u.m[2][2]/poutscale->z;
227 
228     D3DXQuaternionRotationMatrix(poutrotation,&normalized);
229     return S_OK;
230 }
231 
232 FLOAT WINAPI D3DXMatrixDeterminant(const D3DXMATRIX *pm)
233 {
234     FLOAT t[3], v[4];
235 
236     TRACE("pm %p\n", pm);
237 
238     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
239     t[1] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
240     t[2] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
241     v[0] = pm->u.m[1][1] * t[0] - pm->u.m[2][1] * t[1] + pm->u.m[3][1] * t[2];
242     v[1] = -pm->u.m[1][0] * t[0] + pm->u.m[2][0] * t[1] - pm->u.m[3][0] * t[2];
243 
244     t[0] = pm->u.m[1][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[1][1];
245     t[1] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
246     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
247     v[2] = pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1] + pm->u.m[1][3] * t[2];
248     v[3] = -pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] - pm->u.m[1][2] * t[2];
249 
250     return pm->u.m[0][0] * v[0] + pm->u.m[0][1] * v[1] +
251         pm->u.m[0][2] * v[2] + pm->u.m[0][3] * v[3];
252 }
253 
254 D3DXMATRIX* WINAPI D3DXMatrixInverse(D3DXMATRIX *pout, FLOAT *pdeterminant, const D3DXMATRIX *pm)
255 {
256     FLOAT det, t[3], v[16];
257     UINT i, j;
258 
259     TRACE("pout %p, pdeterminant %p, pm %p\n", pout, pdeterminant, pm);
260 
261     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
262     t[1] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
263     t[2] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
264     v[0] = pm->u.m[1][1] * t[0] - pm->u.m[2][1] * t[1] + pm->u.m[3][1] * t[2];
265     v[4] = -pm->u.m[1][0] * t[0] + pm->u.m[2][0] * t[1] - pm->u.m[3][0] * t[2];
266 
267     t[0] = pm->u.m[1][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[1][1];
268     t[1] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
269     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
270     v[8] = pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1] + pm->u.m[1][3] * t[2];
271     v[12] = -pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] - pm->u.m[1][2] * t[2];
272 
273     det = pm->u.m[0][0] * v[0] + pm->u.m[0][1] * v[4] +
274         pm->u.m[0][2] * v[8] + pm->u.m[0][3] * v[12];
275     if (det == 0.0f)
276         return NULL;
277     if (pdeterminant)
278         *pdeterminant = det;
279 
280     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
281     t[1] = pm->u.m[0][2] * pm->u.m[3][3] - pm->u.m[0][3] * pm->u.m[3][2];
282     t[2] = pm->u.m[0][2] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][2];
283     v[1] = -pm->u.m[0][1] * t[0] + pm->u.m[2][1] * t[1] - pm->u.m[3][1] * t[2];
284     v[5] = pm->u.m[0][0] * t[0] - pm->u.m[2][0] * t[1] + pm->u.m[3][0] * t[2];
285 
286     t[0] = pm->u.m[0][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[0][1];
287     t[1] = pm->u.m[3][0] * pm->u.m[0][1] - pm->u.m[0][0] * pm->u.m[3][1];
288     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
289     v[9] = -pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1]- pm->u.m[0][3] * t[2];
290     v[13] = pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] + pm->u.m[0][2] * t[2];
291 
292     t[0] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
293     t[1] = pm->u.m[0][2] * pm->u.m[3][3] - pm->u.m[0][3] * pm->u.m[3][2];
294     t[2] = pm->u.m[0][2] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][2];
295     v[2] = pm->u.m[0][1] * t[0] - pm->u.m[1][1] * t[1] + pm->u.m[3][1] * t[2];
296     v[6] = -pm->u.m[0][0] * t[0] + pm->u.m[1][0] * t[1] - pm->u.m[3][0] * t[2];
297 
298     t[0] = pm->u.m[0][0] * pm->u.m[1][1] - pm->u.m[1][0] * pm->u.m[0][1];
299     t[1] = pm->u.m[3][0] * pm->u.m[0][1] - pm->u.m[0][0] * pm->u.m[3][1];
300     t[2] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
301     v[10] = pm->u.m[3][3] * t[0] + pm->u.m[1][3] * t[1] + pm->u.m[0][3] * t[2];
302     v[14] = -pm->u.m[3][2] * t[0] - pm->u.m[1][2] * t[1] - pm->u.m[0][2] * t[2];
303 
304     t[0] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
305     t[1] = pm->u.m[0][2] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][2];
306     t[2] = pm->u.m[0][2] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][2];
307     v[3] = -pm->u.m[0][1] * t[0] + pm->u.m[1][1] * t[1] - pm->u.m[2][1] * t[2];
308     v[7] = pm->u.m[0][0] * t[0] - pm->u.m[1][0] * t[1] + pm->u.m[2][0] * t[2];
309 
310     v[11] = -pm->u.m[0][0] * (pm->u.m[1][1] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][1]) +
311         pm->u.m[1][0] * (pm->u.m[0][1] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][1]) -
312         pm->u.m[2][0] * (pm->u.m[0][1] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][1]);
313 
314     v[15] = pm->u.m[0][0] * (pm->u.m[1][1] * pm->u.m[2][2] - pm->u.m[1][2] * pm->u.m[2][1]) -
315         pm->u.m[1][0] * (pm->u.m[0][1] * pm->u.m[2][2] - pm->u.m[0][2] * pm->u.m[2][1]) +
316         pm->u.m[2][0] * (pm->u.m[0][1] * pm->u.m[1][2] - pm->u.m[0][2] * pm->u.m[1][1]);
317 
318     det = 1.0f / det;
319 
320     for (i = 0; i < 4; i++)
321         for (j = 0; j < 4; j++)
322             pout->u.m[i][j] = v[4 * i + j] * det;
323 
324     return pout;
325 }
326 
327 D3DXMATRIX * WINAPI D3DXMatrixLookAtLH(D3DXMATRIX *out, const D3DXVECTOR3 *eye, const D3DXVECTOR3 *at,
328         const D3DXVECTOR3 *up)
329 {
330     D3DXVECTOR3 right, upn, vec;
331 
332     TRACE("out %p, eye %p, at %p, up %p\n", out, eye, at, up);
333 
334     D3DXVec3Subtract(&vec, at, eye);
335     D3DXVec3Normalize(&vec, &vec);
336     D3DXVec3Cross(&right, up, &vec);
337     D3DXVec3Cross(&upn, &vec, &right);
338     D3DXVec3Normalize(&right, &right);
339     D3DXVec3Normalize(&upn, &upn);
340     out->u.m[0][0] = right.x;
341     out->u.m[1][0] = right.y;
342     out->u.m[2][0] = right.z;
343     out->u.m[3][0] = -D3DXVec3Dot(&right, eye);
344     out->u.m[0][1] = upn.x;
345     out->u.m[1][1] = upn.y;
346     out->u.m[2][1] = upn.z;
347     out->u.m[3][1] = -D3DXVec3Dot(&upn, eye);
348     out->u.m[0][2] = vec.x;
349     out->u.m[1][2] = vec.y;
350     out->u.m[2][2] = vec.z;
351     out->u.m[3][2] = -D3DXVec3Dot(&vec, eye);
352     out->u.m[0][3] = 0.0f;
353     out->u.m[1][3] = 0.0f;
354     out->u.m[2][3] = 0.0f;
355     out->u.m[3][3] = 1.0f;
356 
357     return out;
358 }
359 
360 D3DXMATRIX * WINAPI D3DXMatrixLookAtRH(D3DXMATRIX *out, const D3DXVECTOR3 *eye, const D3DXVECTOR3 *at,
361         const D3DXVECTOR3 *up)
362 {
363     D3DXVECTOR3 right, upn, vec;
364 
365     TRACE("out %p, eye %p, at %p, up %p\n", out, eye, at, up);
366 
367     D3DXVec3Subtract(&vec, at, eye);
368     D3DXVec3Normalize(&vec, &vec);
369     D3DXVec3Cross(&right, up, &vec);
370     D3DXVec3Cross(&upn, &vec, &right);
371     D3DXVec3Normalize(&right, &right);
372     D3DXVec3Normalize(&upn, &upn);
373     out->u.m[0][0] = -right.x;
374     out->u.m[1][0] = -right.y;
375     out->u.m[2][0] = -right.z;
376     out->u.m[3][0] = D3DXVec3Dot(&right, eye);
377     out->u.m[0][1] = upn.x;
378     out->u.m[1][1] = upn.y;
379     out->u.m[2][1] = upn.z;
380     out->u.m[3][1] = -D3DXVec3Dot(&upn, eye);
381     out->u.m[0][2] = -vec.x;
382     out->u.m[1][2] = -vec.y;
383     out->u.m[2][2] = -vec.z;
384     out->u.m[3][2] = D3DXVec3Dot(&vec, eye);
385     out->u.m[0][3] = 0.0f;
386     out->u.m[1][3] = 0.0f;
387     out->u.m[2][3] = 0.0f;
388     out->u.m[3][3] = 1.0f;
389 
390     return out;
391 }
392 
393 D3DXMATRIX* WINAPI D3DXMatrixMultiply(D3DXMATRIX *pout, const D3DXMATRIX *pm1, const D3DXMATRIX *pm2)
394 {
395     D3DXMATRIX out;
396     int i,j;
397 
398     TRACE("pout %p, pm1 %p, pm2 %p\n", pout, pm1, pm2);
399 
400     for (i=0; i<4; i++)
401     {
402         for (j=0; j<4; j++)
403         {
404             out.u.m[i][j] = pm1->u.m[i][0] * pm2->u.m[0][j] + pm1->u.m[i][1] * pm2->u.m[1][j] + pm1->u.m[i][2] * pm2->u.m[2][j] + pm1->u.m[i][3] * pm2->u.m[3][j];
405         }
406     }
407 
408     *pout = out;
409     return pout;
410 }
411 
412 D3DXMATRIX* WINAPI D3DXMatrixMultiplyTranspose(D3DXMATRIX *pout, const D3DXMATRIX *pm1, const D3DXMATRIX *pm2)
413 {
414     D3DXMATRIX temp;
415     int i, j;
416 
417     TRACE("pout %p, pm1 %p, pm2 %p\n", pout, pm1, pm2);
418 
419     for (i = 0; i < 4; i++)
420         for (j = 0; j < 4; j++)
421             temp.u.m[j][i] = pm1->u.m[i][0] * pm2->u.m[0][j] + pm1->u.m[i][1] * pm2->u.m[1][j] + pm1->u.m[i][2] * pm2->u.m[2][j] + pm1->u.m[i][3] * pm2->u.m[3][j];
422 
423     *pout = temp;
424     return pout;
425 }
426 
427 D3DXMATRIX* WINAPI D3DXMatrixOrthoLH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
428 {
429     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
430 
431     D3DXMatrixIdentity(pout);
432     pout->u.m[0][0] = 2.0f / w;
433     pout->u.m[1][1] = 2.0f / h;
434     pout->u.m[2][2] = 1.0f / (zf - zn);
435     pout->u.m[3][2] = zn / (zn - zf);
436     return pout;
437 }
438 
439 D3DXMATRIX* WINAPI D3DXMatrixOrthoOffCenterLH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
440 {
441     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
442 
443     D3DXMatrixIdentity(pout);
444     pout->u.m[0][0] = 2.0f / (r - l);
445     pout->u.m[1][1] = 2.0f / (t - b);
446     pout->u.m[2][2] = 1.0f / (zf -zn);
447     pout->u.m[3][0] = -1.0f -2.0f *l / (r - l);
448     pout->u.m[3][1] = 1.0f + 2.0f * t / (b - t);
449     pout->u.m[3][2] = zn / (zn -zf);
450     return pout;
451 }
452 
453 D3DXMATRIX* WINAPI D3DXMatrixOrthoOffCenterRH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
454 {
455     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
456 
457     D3DXMatrixIdentity(pout);
458     pout->u.m[0][0] = 2.0f / (r - l);
459     pout->u.m[1][1] = 2.0f / (t - b);
460     pout->u.m[2][2] = 1.0f / (zn -zf);
461     pout->u.m[3][0] = -1.0f -2.0f *l / (r - l);
462     pout->u.m[3][1] = 1.0f + 2.0f * t / (b - t);
463     pout->u.m[3][2] = zn / (zn -zf);
464     return pout;
465 }
466 
467 D3DXMATRIX* WINAPI D3DXMatrixOrthoRH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
468 {
469     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
470 
471     D3DXMatrixIdentity(pout);
472     pout->u.m[0][0] = 2.0f / w;
473     pout->u.m[1][1] = 2.0f / h;
474     pout->u.m[2][2] = 1.0f / (zn - zf);
475     pout->u.m[3][2] = zn / (zn - zf);
476     return pout;
477 }
478 
479 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveFovLH(D3DXMATRIX *pout, FLOAT fovy, FLOAT aspect, FLOAT zn, FLOAT zf)
480 {
481     TRACE("pout %p, fovy %f, aspect %f, zn %f, zf %f\n", pout, fovy, aspect, zn, zf);
482 
483     D3DXMatrixIdentity(pout);
484     pout->u.m[0][0] = 1.0f / (aspect * tanf(fovy/2.0f));
485     pout->u.m[1][1] = 1.0f / tanf(fovy/2.0f);
486     pout->u.m[2][2] = zf / (zf - zn);
487     pout->u.m[2][3] = 1.0f;
488     pout->u.m[3][2] = (zf * zn) / (zn - zf);
489     pout->u.m[3][3] = 0.0f;
490     return pout;
491 }
492 
493 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveFovRH(D3DXMATRIX *pout, FLOAT fovy, FLOAT aspect, FLOAT zn, FLOAT zf)
494 {
495     TRACE("pout %p, fovy %f, aspect %f, zn %f, zf %f\n", pout, fovy, aspect, zn, zf);
496 
497     D3DXMatrixIdentity(pout);
498     pout->u.m[0][0] = 1.0f / (aspect * tanf(fovy/2.0f));
499     pout->u.m[1][1] = 1.0f / tanf(fovy/2.0f);
500     pout->u.m[2][2] = zf / (zn - zf);
501     pout->u.m[2][3] = -1.0f;
502     pout->u.m[3][2] = (zf * zn) / (zn - zf);
503     pout->u.m[3][3] = 0.0f;
504     return pout;
505 }
506 
507 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveLH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
508 {
509     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
510 
511     D3DXMatrixIdentity(pout);
512     pout->u.m[0][0] = 2.0f * zn / w;
513     pout->u.m[1][1] = 2.0f * zn / h;
514     pout->u.m[2][2] = zf / (zf - zn);
515     pout->u.m[3][2] = (zn * zf) / (zn - zf);
516     pout->u.m[2][3] = 1.0f;
517     pout->u.m[3][3] = 0.0f;
518     return pout;
519 }
520 
521 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveOffCenterLH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
522 {
523     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
524 
525     D3DXMatrixIdentity(pout);
526     pout->u.m[0][0] = 2.0f * zn / (r - l);
527     pout->u.m[1][1] = -2.0f * zn / (b - t);
528     pout->u.m[2][0] = -1.0f - 2.0f * l / (r - l);
529     pout->u.m[2][1] = 1.0f + 2.0f * t / (b - t);
530     pout->u.m[2][2] = - zf / (zn - zf);
531     pout->u.m[3][2] = (zn * zf) / (zn -zf);
532     pout->u.m[2][3] = 1.0f;
533     pout->u.m[3][3] = 0.0f;
534     return pout;
535 }
536 
537 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveOffCenterRH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
538 {
539     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
540 
541     D3DXMatrixIdentity(pout);
542     pout->u.m[0][0] = 2.0f * zn / (r - l);
543     pout->u.m[1][1] = -2.0f * zn / (b - t);
544     pout->u.m[2][0] = 1.0f + 2.0f * l / (r - l);
545     pout->u.m[2][1] = -1.0f -2.0f * t / (b - t);
546     pout->u.m[2][2] = zf / (zn - zf);
547     pout->u.m[3][2] = (zn * zf) / (zn -zf);
548     pout->u.m[2][3] = -1.0f;
549     pout->u.m[3][3] = 0.0f;
550     return pout;
551 }
552 
553 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveRH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
554 {
555     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
556 
557     D3DXMatrixIdentity(pout);
558     pout->u.m[0][0] = 2.0f * zn / w;
559     pout->u.m[1][1] = 2.0f * zn / h;
560     pout->u.m[2][2] = zf / (zn - zf);
561     pout->u.m[3][2] = (zn * zf) / (zn - zf);
562     pout->u.m[2][3] = -1.0f;
563     pout->u.m[3][3] = 0.0f;
564     return pout;
565 }
566 
567 D3DXMATRIX* WINAPI D3DXMatrixReflect(D3DXMATRIX *pout, const D3DXPLANE *pplane)
568 {
569     D3DXPLANE Nplane;
570 
571     TRACE("pout %p, pplane %p\n", pout, pplane);
572 
573     D3DXPlaneNormalize(&Nplane, pplane);
574     D3DXMatrixIdentity(pout);
575     pout->u.m[0][0] = 1.0f - 2.0f * Nplane.a * Nplane.a;
576     pout->u.m[0][1] = -2.0f * Nplane.a * Nplane.b;
577     pout->u.m[0][2] = -2.0f * Nplane.a * Nplane.c;
578     pout->u.m[1][0] = -2.0f * Nplane.a * Nplane.b;
579     pout->u.m[1][1] = 1.0f - 2.0f * Nplane.b * Nplane.b;
580     pout->u.m[1][2] = -2.0f * Nplane.b * Nplane.c;
581     pout->u.m[2][0] = -2.0f * Nplane.c * Nplane.a;
582     pout->u.m[2][1] = -2.0f * Nplane.c * Nplane.b;
583     pout->u.m[2][2] = 1.0f - 2.0f * Nplane.c * Nplane.c;
584     pout->u.m[3][0] = -2.0f * Nplane.d * Nplane.a;
585     pout->u.m[3][1] = -2.0f * Nplane.d * Nplane.b;
586     pout->u.m[3][2] = -2.0f * Nplane.d * Nplane.c;
587     return pout;
588 }
589 
590 D3DXMATRIX * WINAPI D3DXMatrixRotationAxis(D3DXMATRIX *out, const D3DXVECTOR3 *v, FLOAT angle)
591 {
592     D3DXVECTOR3 nv;
593     FLOAT sangle, cangle, cdiff;
594 
595     TRACE("out %p, v %p, angle %f\n", out, v, angle);
596 
597     D3DXVec3Normalize(&nv, v);
598     sangle = sinf(angle);
599     cangle = cosf(angle);
600     cdiff = 1.0f - cangle;
601 
602     out->u.m[0][0] = cdiff * nv.x * nv.x + cangle;
603     out->u.m[1][0] = cdiff * nv.x * nv.y - sangle * nv.z;
604     out->u.m[2][0] = cdiff * nv.x * nv.z + sangle * nv.y;
605     out->u.m[3][0] = 0.0f;
606     out->u.m[0][1] = cdiff * nv.y * nv.x + sangle * nv.z;
607     out->u.m[1][1] = cdiff * nv.y * nv.y + cangle;
608     out->u.m[2][1] = cdiff * nv.y * nv.z - sangle * nv.x;
609     out->u.m[3][1] = 0.0f;
610     out->u.m[0][2] = cdiff * nv.z * nv.x - sangle * nv.y;
611     out->u.m[1][2] = cdiff * nv.z * nv.y + sangle * nv.x;
612     out->u.m[2][2] = cdiff * nv.z * nv.z + cangle;
613     out->u.m[3][2] = 0.0f;
614     out->u.m[0][3] = 0.0f;
615     out->u.m[1][3] = 0.0f;
616     out->u.m[2][3] = 0.0f;
617     out->u.m[3][3] = 1.0f;
618 
619     return out;
620 }
621 
622 D3DXMATRIX* WINAPI D3DXMatrixRotationQuaternion(D3DXMATRIX *pout, const D3DXQUATERNION *pq)
623 {
624     TRACE("pout %p, pq %p\n", pout, pq);
625 
626     D3DXMatrixIdentity(pout);
627     pout->u.m[0][0] = 1.0f - 2.0f * (pq->y * pq->y + pq->z * pq->z);
628     pout->u.m[0][1] = 2.0f * (pq->x *pq->y + pq->z * pq->w);
629     pout->u.m[0][2] = 2.0f * (pq->x * pq->z - pq->y * pq->w);
630     pout->u.m[1][0] = 2.0f * (pq->x * pq->y - pq->z * pq->w);
631     pout->u.m[1][1] = 1.0f - 2.0f * (pq->x * pq->x + pq->z * pq->z);
632     pout->u.m[1][2] = 2.0f * (pq->y *pq->z + pq->x *pq->w);
633     pout->u.m[2][0] = 2.0f * (pq->x * pq->z + pq->y * pq->w);
634     pout->u.m[2][1] = 2.0f * (pq->y *pq->z - pq->x *pq->w);
635     pout->u.m[2][2] = 1.0f - 2.0f * (pq->x * pq->x + pq->y * pq->y);
636     return pout;
637 }
638 
639 D3DXMATRIX* WINAPI D3DXMatrixRotationX(D3DXMATRIX *pout, FLOAT angle)
640 {
641     TRACE("pout %p, angle %f\n", pout, angle);
642 
643     D3DXMatrixIdentity(pout);
644     pout->u.m[1][1] = cosf(angle);
645     pout->u.m[2][2] = cosf(angle);
646     pout->u.m[1][2] = sinf(angle);
647     pout->u.m[2][1] = -sinf(angle);
648     return pout;
649 }
650 
651 D3DXMATRIX* WINAPI D3DXMatrixRotationY(D3DXMATRIX *pout, FLOAT angle)
652 {
653     TRACE("pout %p, angle %f\n", pout, angle);
654 
655     D3DXMatrixIdentity(pout);
656     pout->u.m[0][0] = cosf(angle);
657     pout->u.m[2][2] = cosf(angle);
658     pout->u.m[0][2] = -sinf(angle);
659     pout->u.m[2][0] = sinf(angle);
660     return pout;
661 }
662 
663 D3DXMATRIX * WINAPI D3DXMatrixRotationYawPitchRoll(D3DXMATRIX *out, FLOAT yaw, FLOAT pitch, FLOAT roll)
664 {
665     FLOAT sroll, croll, spitch, cpitch, syaw, cyaw;
666 
667     TRACE("out %p, yaw %f, pitch %f, roll %f\n", out, yaw, pitch, roll);
668 
669     sroll = sinf(roll);
670     croll = cosf(roll);
671     spitch = sinf(pitch);
672     cpitch = cosf(pitch);
673     syaw = sinf(yaw);
674     cyaw = cosf(yaw);
675 
676     out->u.m[0][0] = sroll * spitch * syaw + croll * cyaw;
677     out->u.m[0][1] = sroll * cpitch;
678     out->u.m[0][2] = sroll * spitch * cyaw - croll * syaw;
679     out->u.m[0][3] = 0.0f;
680     out->u.m[1][0] = croll * spitch * syaw - sroll * cyaw;
681     out->u.m[1][1] = croll * cpitch;
682     out->u.m[1][2] = croll * spitch * cyaw + sroll * syaw;
683     out->u.m[1][3] = 0.0f;
684     out->u.m[2][0] = cpitch * syaw;
685     out->u.m[2][1] = -spitch;
686     out->u.m[2][2] = cpitch * cyaw;
687     out->u.m[2][3] = 0.0f;
688     out->u.m[3][0] = 0.0f;
689     out->u.m[3][1] = 0.0f;
690     out->u.m[3][2] = 0.0f;
691     out->u.m[3][3] = 1.0f;
692 
693     return out;
694 }
695 
696 D3DXMATRIX* WINAPI D3DXMatrixRotationZ(D3DXMATRIX *pout, FLOAT angle)
697 {
698     TRACE("pout %p, angle %f\n", pout, angle);
699 
700     D3DXMatrixIdentity(pout);
701     pout->u.m[0][0] = cosf(angle);
702     pout->u.m[1][1] = cosf(angle);
703     pout->u.m[0][1] = sinf(angle);
704     pout->u.m[1][0] = -sinf(angle);
705     return pout;
706 }
707 
708 D3DXMATRIX* WINAPI D3DXMatrixScaling(D3DXMATRIX *pout, FLOAT sx, FLOAT sy, FLOAT sz)
709 {
710     TRACE("pout %p, sx %f, sy %f, sz %f\n", pout, sx, sy, sz);
711 
712     D3DXMatrixIdentity(pout);
713     pout->u.m[0][0] = sx;
714     pout->u.m[1][1] = sy;
715     pout->u.m[2][2] = sz;
716     return pout;
717 }
718 
719 D3DXMATRIX* WINAPI D3DXMatrixShadow(D3DXMATRIX *pout, const D3DXVECTOR4 *plight, const D3DXPLANE *pplane)
720 {
721     D3DXPLANE Nplane;
722     FLOAT dot;
723 
724     TRACE("pout %p, plight %p, pplane %p\n", pout, plight, pplane);
725 
726     D3DXPlaneNormalize(&Nplane, pplane);
727     dot = D3DXPlaneDot(&Nplane, plight);
728     pout->u.m[0][0] = dot - Nplane.a * plight->x;
729     pout->u.m[0][1] = -Nplane.a * plight->y;
730     pout->u.m[0][2] = -Nplane.a * plight->z;
731     pout->u.m[0][3] = -Nplane.a * plight->w;
732     pout->u.m[1][0] = -Nplane.b * plight->x;
733     pout->u.m[1][1] = dot - Nplane.b * plight->y;
734     pout->u.m[1][2] = -Nplane.b * plight->z;
735     pout->u.m[1][3] = -Nplane.b * plight->w;
736     pout->u.m[2][0] = -Nplane.c * plight->x;
737     pout->u.m[2][1] = -Nplane.c * plight->y;
738     pout->u.m[2][2] = dot - Nplane.c * plight->z;
739     pout->u.m[2][3] = -Nplane.c * plight->w;
740     pout->u.m[3][0] = -Nplane.d * plight->x;
741     pout->u.m[3][1] = -Nplane.d * plight->y;
742     pout->u.m[3][2] = -Nplane.d * plight->z;
743     pout->u.m[3][3] = dot - Nplane.d * plight->w;
744     return pout;
745 }
746 
747 D3DXMATRIX* WINAPI D3DXMatrixTransformation(D3DXMATRIX *pout, const D3DXVECTOR3 *pscalingcenter, const D3DXQUATERNION *pscalingrotation, const D3DXVECTOR3 *pscaling, const D3DXVECTOR3 *protationcenter, const D3DXQUATERNION *protation, const D3DXVECTOR3 *ptranslation)
748 {
749     D3DXMATRIX m1, m2, m3, m4, m5, m6, m7;
750     D3DXQUATERNION prc;
751     D3DXVECTOR3 psc, pt;
752 
753     TRACE("pout %p, pscalingcenter %p, pscalingrotation %p, pscaling %p, protationcentr %p, protation %p, ptranslation %p\n",
754         pout, pscalingcenter, pscalingrotation, pscaling, protationcenter, protation, ptranslation);
755 
756     if ( !pscalingcenter )
757     {
758         psc.x = 0.0f;
759         psc.y = 0.0f;
760         psc.z = 0.0f;
761     }
762     else
763     {
764         psc.x = pscalingcenter->x;
765         psc.y = pscalingcenter->y;
766         psc.z = pscalingcenter->z;
767     }
768 
769     if ( !protationcenter )
770     {
771         prc.x = 0.0f;
772         prc.y = 0.0f;
773         prc.z = 0.0f;
774     }
775     else
776     {
777         prc.x = protationcenter->x;
778         prc.y = protationcenter->y;
779         prc.z = protationcenter->z;
780     }
781 
782     if ( !ptranslation )
783     {
784         pt.x = 0.0f;
785         pt.y = 0.0f;
786         pt.z = 0.0f;
787     }
788     else
789     {
790         pt.x = ptranslation->x;
791         pt.y = ptranslation->y;
792         pt.z = ptranslation->z;
793     }
794 
795     D3DXMatrixTranslation(&m1, -psc.x, -psc.y, -psc.z);
796 
797     if ( !pscalingrotation )
798     {
799         D3DXMatrixIdentity(&m2);
800         D3DXMatrixIdentity(&m4);
801     }
802     else
803     {
804         D3DXMatrixRotationQuaternion(&m4, pscalingrotation);
805         D3DXMatrixInverse(&m2, NULL, &m4);
806     }
807 
808     if ( !pscaling ) D3DXMatrixIdentity(&m3);
809     else D3DXMatrixScaling(&m3, pscaling->x, pscaling->y, pscaling->z);
810 
811     if ( !protation ) D3DXMatrixIdentity(&m6);
812     else D3DXMatrixRotationQuaternion(&m6, protation);
813 
814     D3DXMatrixTranslation(&m5, psc.x - prc.x,  psc.y - prc.y,  psc.z - prc.z);
815     D3DXMatrixTranslation(&m7, prc.x + pt.x, prc.y + pt.y, prc.z + pt.z);
816     D3DXMatrixMultiply(&m1, &m1, &m2);
817     D3DXMatrixMultiply(&m1, &m1, &m3);
818     D3DXMatrixMultiply(&m1, &m1, &m4);
819     D3DXMatrixMultiply(&m1, &m1, &m5);
820     D3DXMatrixMultiply(&m1, &m1, &m6);
821     D3DXMatrixMultiply(pout, &m1, &m7);
822     return pout;
823 }
824 
825 D3DXMATRIX* WINAPI D3DXMatrixTransformation2D(D3DXMATRIX *pout, const D3DXVECTOR2 *pscalingcenter, FLOAT scalingrotation, const D3DXVECTOR2 *pscaling, const D3DXVECTOR2 *protationcenter, FLOAT rotation, const D3DXVECTOR2 *ptranslation)
826 {
827     D3DXQUATERNION rot, sca_rot;
828     D3DXVECTOR3 rot_center, sca, sca_center, trans;
829 
830     TRACE("pout %p, pscalingcenter %p, scalingrotation %f, pscaling %p, protztioncenter %p, rotation %f, ptranslation %p\n",
831         pout, pscalingcenter, scalingrotation, pscaling, protationcenter, rotation, ptranslation);
832 
833     if ( pscalingcenter )
834     {
835         sca_center.x=pscalingcenter->x;
836         sca_center.y=pscalingcenter->y;
837         sca_center.z=0.0f;
838     }
839     else
840     {
841         sca_center.x=0.0f;
842         sca_center.y=0.0f;
843         sca_center.z=0.0f;
844     }
845 
846     if ( pscaling )
847     {
848         sca.x=pscaling->x;
849         sca.y=pscaling->y;
850         sca.z=1.0f;
851     }
852     else
853     {
854         sca.x=1.0f;
855         sca.y=1.0f;
856         sca.z=1.0f;
857     }
858 
859     if ( protationcenter )
860     {
861         rot_center.x=protationcenter->x;
862         rot_center.y=protationcenter->y;
863         rot_center.z=0.0f;
864     }
865     else
866     {
867         rot_center.x=0.0f;
868         rot_center.y=0.0f;
869         rot_center.z=0.0f;
870     }
871 
872     if ( ptranslation )
873     {
874         trans.x=ptranslation->x;
875         trans.y=ptranslation->y;
876         trans.z=0.0f;
877     }
878     else
879     {
880         trans.x=0.0f;
881         trans.y=0.0f;
882         trans.z=0.0f;
883     }
884 
885     rot.w=cosf(rotation/2.0f);
886     rot.x=0.0f;
887     rot.y=0.0f;
888     rot.z=sinf(rotation/2.0f);
889 
890     sca_rot.w=cosf(scalingrotation/2.0f);
891     sca_rot.x=0.0f;
892     sca_rot.y=0.0f;
893     sca_rot.z=sinf(scalingrotation/2.0f);
894 
895     D3DXMatrixTransformation(pout, &sca_center, &sca_rot, &sca, &rot_center, &rot, &trans);
896 
897     return pout;
898 }
899 
900 D3DXMATRIX* WINAPI D3DXMatrixTranslation(D3DXMATRIX *pout, FLOAT x, FLOAT y, FLOAT z)
901 {
902     TRACE("pout %p, x %f, y %f, z %f\n", pout, x, y, z);
903 
904     D3DXMatrixIdentity(pout);
905     pout->u.m[3][0] = x;
906     pout->u.m[3][1] = y;
907     pout->u.m[3][2] = z;
908     return pout;
909 }
910 
911 D3DXMATRIX* WINAPI D3DXMatrixTranspose(D3DXMATRIX *pout, const D3DXMATRIX *pm)
912 {
913     const D3DXMATRIX m = *pm;
914     int i,j;
915 
916     TRACE("pout %p, pm %p\n", pout, pm);
917 
918     for (i=0; i<4; i++)
919         for (j=0; j<4; j++) pout->u.m[i][j] = m.u.m[j][i];
920 
921     return pout;
922 }
923 
924 /*_________________D3DXMatrixStack____________________*/
925 
926 
927 static inline struct ID3DXMatrixStackImpl *impl_from_ID3DXMatrixStack(ID3DXMatrixStack *iface)
928 {
929   return CONTAINING_RECORD(iface, struct ID3DXMatrixStackImpl, ID3DXMatrixStack_iface);
930 }
931 
932 static HRESULT WINAPI ID3DXMatrixStackImpl_QueryInterface(ID3DXMatrixStack *iface, REFIID riid, void **out)
933 {
934     TRACE("iface %p, riid %s, out %p.\n", iface, debugstr_guid(riid), out);
935 
936     if (IsEqualGUID(riid, &IID_ID3DXMatrixStack)
937             || IsEqualGUID(riid, &IID_IUnknown))
938     {
939         ID3DXMatrixStack_AddRef(iface);
940         *out = iface;
941         return S_OK;
942     }
943 
944     WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));
945 
946     *out = NULL;
947     return E_NOINTERFACE;
948 }
949 
950 static ULONG WINAPI ID3DXMatrixStackImpl_AddRef(ID3DXMatrixStack *iface)
951 {
952     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
953     ULONG ref = InterlockedIncrement(&This->ref);
954     TRACE("(%p) : AddRef from %d\n", This, ref - 1);
955     return ref;
956 }
957 
958 static ULONG WINAPI ID3DXMatrixStackImpl_Release(ID3DXMatrixStack *iface)
959 {
960     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
961     ULONG ref = InterlockedDecrement(&This->ref);
962     if (!ref)
963     {
964         HeapFree(GetProcessHeap(), 0, This->stack);
965         HeapFree(GetProcessHeap(), 0, This);
966     }
967     TRACE("(%p) : ReleaseRef to %d\n", This, ref);
968     return ref;
969 }
970 
971 static D3DXMATRIX* WINAPI ID3DXMatrixStackImpl_GetTop(ID3DXMatrixStack *iface)
972 {
973     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
974 
975     TRACE("iface %p\n", iface);
976 
977     return &This->stack[This->current];
978 }
979 
980 static HRESULT WINAPI ID3DXMatrixStackImpl_LoadIdentity(ID3DXMatrixStack *iface)
981 {
982     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
983 
984     TRACE("iface %p\n", iface);
985 
986     D3DXMatrixIdentity(&This->stack[This->current]);
987 
988     return D3D_OK;
989 }
990 
991 static HRESULT WINAPI ID3DXMatrixStackImpl_LoadMatrix(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
992 {
993     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
994 
995     TRACE("iface %p, pm %p\n", iface, pm);
996 
997     This->stack[This->current] = *pm;
998 
999     return D3D_OK;
1000 }
1001 
1002 static HRESULT WINAPI ID3DXMatrixStackImpl_MultMatrix(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
1003 {
1004     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1005 
1006     TRACE("iface %p, pm %p\n", iface, pm);
1007 
1008     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], pm);
1009 
1010     return D3D_OK;
1011 }
1012 
1013 static HRESULT WINAPI ID3DXMatrixStackImpl_MultMatrixLocal(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
1014 {
1015     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1016 
1017     TRACE("iface %p, pm %p\n", iface, pm);
1018 
1019     D3DXMatrixMultiply(&This->stack[This->current], pm, &This->stack[This->current]);
1020 
1021     return D3D_OK;
1022 }
1023 
1024 static HRESULT WINAPI ID3DXMatrixStackImpl_Pop(ID3DXMatrixStack *iface)
1025 {
1026     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1027 
1028     TRACE("iface %p\n", iface);
1029 
1030     /* Popping the last element on the stack returns D3D_OK, but does nothing. */
1031     if (!This->current) return D3D_OK;
1032 
1033     if (This->current <= This->stack_size / 4 && This->stack_size >= INITIAL_STACK_SIZE * 2)
1034     {
1035         unsigned int new_size;
1036         D3DXMATRIX *new_stack;
1037 
1038         new_size = This->stack_size / 2;
1039         new_stack = HeapReAlloc(GetProcessHeap(), 0, This->stack, new_size * sizeof(*new_stack));
1040         if (new_stack)
1041         {
1042             This->stack_size = new_size;
1043             This->stack = new_stack;
1044         }
1045     }
1046 
1047     --This->current;
1048 
1049     return D3D_OK;
1050 }
1051 
1052 static HRESULT WINAPI ID3DXMatrixStackImpl_Push(ID3DXMatrixStack *iface)
1053 {
1054     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1055 
1056     TRACE("iface %p\n", iface);
1057 
1058     if (This->current == This->stack_size - 1)
1059     {
1060         unsigned int new_size;
1061         D3DXMATRIX *new_stack;
1062 
1063         if (This->stack_size > UINT_MAX / 2) return E_OUTOFMEMORY;
1064 
1065         new_size = This->stack_size * 2;
1066         new_stack = HeapReAlloc(GetProcessHeap(), 0, This->stack, new_size * sizeof(*new_stack));
1067         if (!new_stack) return E_OUTOFMEMORY;
1068 
1069         This->stack_size = new_size;
1070         This->stack = new_stack;
1071     }
1072 
1073     ++This->current;
1074     This->stack[This->current] = This->stack[This->current - 1];
1075 
1076     return D3D_OK;
1077 }
1078 
1079 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateAxis(ID3DXMatrixStack *iface, const D3DXVECTOR3 *pv, FLOAT angle)
1080 {
1081     D3DXMATRIX temp;
1082     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1083 
1084     TRACE("iface %p, pv %p, angle %f\n", iface, pv, angle);
1085 
1086     D3DXMatrixRotationAxis(&temp, pv, angle);
1087     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1088 
1089     return D3D_OK;
1090 }
1091 
1092 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateAxisLocal(ID3DXMatrixStack *iface, const D3DXVECTOR3 *pv, FLOAT angle)
1093 {
1094     D3DXMATRIX temp;
1095     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1096 
1097     TRACE("iface %p, pv %p, angle %f\n", iface, pv, angle);
1098 
1099     D3DXMatrixRotationAxis(&temp, pv, angle);
1100     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1101 
1102     return D3D_OK;
1103 }
1104 
1105 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateYawPitchRoll(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1106 {
1107     D3DXMATRIX temp;
1108     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1109 
1110     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1111 
1112     D3DXMatrixRotationYawPitchRoll(&temp, x, y, z);
1113     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1114 
1115     return D3D_OK;
1116 }
1117 
1118 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateYawPitchRollLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1119 {
1120     D3DXMATRIX temp;
1121     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1122 
1123     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1124 
1125     D3DXMatrixRotationYawPitchRoll(&temp, x, y, z);
1126     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1127 
1128     return D3D_OK;
1129 }
1130 
1131 static HRESULT WINAPI ID3DXMatrixStackImpl_Scale(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1132 {
1133     D3DXMATRIX temp;
1134     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1135 
1136     TRACE("iface %p,x %f, y %f, z %f\n", iface, x, y, z);
1137 
1138     D3DXMatrixScaling(&temp, x, y, z);
1139     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1140 
1141     return D3D_OK;
1142 }
1143 
1144 static HRESULT WINAPI ID3DXMatrixStackImpl_ScaleLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1145 {
1146     D3DXMATRIX temp;
1147     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1148 
1149     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1150 
1151     D3DXMatrixScaling(&temp, x, y, z);
1152     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1153 
1154     return D3D_OK;
1155 }
1156 
1157 static HRESULT WINAPI ID3DXMatrixStackImpl_Translate(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1158 {
1159     D3DXMATRIX temp;
1160     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1161 
1162     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1163 
1164     D3DXMatrixTranslation(&temp, x, y, z);
1165     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1166 
1167     return D3D_OK;
1168 }
1169 
1170 static HRESULT WINAPI ID3DXMatrixStackImpl_TranslateLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1171 {
1172     D3DXMATRIX temp;
1173     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1174 
1175     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1176 
1177     D3DXMatrixTranslation(&temp, x, y, z);
1178     D3DXMatrixMultiply(&This->stack[This->current], &temp,&This->stack[This->current]);
1179 
1180     return D3D_OK;
1181 }
1182 
1183 static const ID3DXMatrixStackVtbl ID3DXMatrixStack_Vtbl =
1184 {
1185     ID3DXMatrixStackImpl_QueryInterface,
1186     ID3DXMatrixStackImpl_AddRef,
1187     ID3DXMatrixStackImpl_Release,
1188     ID3DXMatrixStackImpl_Pop,
1189     ID3DXMatrixStackImpl_Push,
1190     ID3DXMatrixStackImpl_LoadIdentity,
1191     ID3DXMatrixStackImpl_LoadMatrix,
1192     ID3DXMatrixStackImpl_MultMatrix,
1193     ID3DXMatrixStackImpl_MultMatrixLocal,
1194     ID3DXMatrixStackImpl_RotateAxis,
1195     ID3DXMatrixStackImpl_RotateAxisLocal,
1196     ID3DXMatrixStackImpl_RotateYawPitchRoll,
1197     ID3DXMatrixStackImpl_RotateYawPitchRollLocal,
1198     ID3DXMatrixStackImpl_Scale,
1199     ID3DXMatrixStackImpl_ScaleLocal,
1200     ID3DXMatrixStackImpl_Translate,
1201     ID3DXMatrixStackImpl_TranslateLocal,
1202     ID3DXMatrixStackImpl_GetTop
1203 };
1204 
1205 HRESULT WINAPI D3DXCreateMatrixStack(DWORD flags, ID3DXMatrixStack **stack)
1206 {
1207     struct ID3DXMatrixStackImpl *object;
1208 
1209     TRACE("flags %#x, stack %p.\n", flags, stack);
1210 
1211     if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object))))
1212     {
1213         *stack = NULL;
1214         return E_OUTOFMEMORY;
1215     }
1216     object->ID3DXMatrixStack_iface.lpVtbl = &ID3DXMatrixStack_Vtbl;
1217     object->ref = 1;
1218 
1219     if (!(object->stack = HeapAlloc(GetProcessHeap(), 0, INITIAL_STACK_SIZE * sizeof(*object->stack))))
1220     {
1221         HeapFree(GetProcessHeap(), 0, object);
1222         *stack = NULL;
1223         return E_OUTOFMEMORY;
1224     }
1225 
1226     object->current = 0;
1227     object->stack_size = INITIAL_STACK_SIZE;
1228     D3DXMatrixIdentity(&object->stack[0]);
1229 
1230     TRACE("Created matrix stack %p.\n", object);
1231 
1232     *stack = &object->ID3DXMatrixStack_iface;
1233     return D3D_OK;
1234 }
1235 
1236 /*_________________D3DXPLANE________________*/
1237 
1238 D3DXPLANE* WINAPI D3DXPlaneFromPointNormal(D3DXPLANE *pout, const D3DXVECTOR3 *pvpoint, const D3DXVECTOR3 *pvnormal)
1239 {
1240     TRACE("pout %p, pvpoint %p, pvnormal %p\n", pout, pvpoint, pvnormal);
1241 
1242     pout->a = pvnormal->x;
1243     pout->b = pvnormal->y;
1244     pout->c = pvnormal->z;
1245     pout->d = -D3DXVec3Dot(pvpoint, pvnormal);
1246     return pout;
1247 }
1248 
1249 D3DXPLANE* WINAPI D3DXPlaneFromPoints(D3DXPLANE *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3)
1250 {
1251     D3DXVECTOR3 edge1, edge2, normal, Nnormal;
1252 
1253     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p\n", pout, pv1, pv2, pv3);
1254 
1255     edge1.x = 0.0f; edge1.y = 0.0f; edge1.z = 0.0f;
1256     edge2.x = 0.0f; edge2.y = 0.0f; edge2.z = 0.0f;
1257     D3DXVec3Subtract(&edge1, pv2, pv1);
1258     D3DXVec3Subtract(&edge2, pv3, pv1);
1259     D3DXVec3Cross(&normal, &edge1, &edge2);
1260     D3DXVec3Normalize(&Nnormal, &normal);
1261     D3DXPlaneFromPointNormal(pout, pv1, &Nnormal);
1262     return pout;
1263 }
1264 
1265 D3DXVECTOR3* WINAPI D3DXPlaneIntersectLine(D3DXVECTOR3 *pout, const D3DXPLANE *pp, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2)
1266 {
1267     D3DXVECTOR3 direction, normal;
1268     FLOAT dot, temp;
1269 
1270     TRACE("pout %p, pp %p, pv1 %p, pv2 %p\n", pout, pp, pv1, pv2);
1271 
1272     normal.x = pp->a;
1273     normal.y = pp->b;
1274     normal.z = pp->c;
1275     direction.x = pv2->x - pv1->x;
1276     direction.y = pv2->y - pv1->y;
1277     direction.z = pv2->z - pv1->z;
1278     dot = D3DXVec3Dot(&normal, &direction);
1279     if ( !dot ) return NULL;
1280     temp = ( pp->d + D3DXVec3Dot(&normal, pv1) ) / dot;
1281     pout->x = pv1->x - temp * direction.x;
1282     pout->y = pv1->y - temp * direction.y;
1283     pout->z = pv1->z - temp * direction.z;
1284     return pout;
1285 }
1286 
1287 D3DXPLANE * WINAPI D3DXPlaneNormalize(D3DXPLANE *out, const D3DXPLANE *p)
1288 {
1289     FLOAT norm;
1290 
1291     TRACE("out %p, p %p\n", out, p);
1292 
1293     norm = sqrtf(p->a * p->a + p->b * p->b + p->c * p->c);
1294     if (norm)
1295     {
1296         out->a = p->a / norm;
1297         out->b = p->b / norm;
1298         out->c = p->c / norm;
1299         out->d = p->d / norm;
1300     }
1301     else
1302     {
1303         out->a = 0.0f;
1304         out->b = 0.0f;
1305         out->c = 0.0f;
1306         out->d = 0.0f;
1307     }
1308 
1309     return out;
1310 }
1311 
1312 D3DXPLANE* WINAPI D3DXPlaneTransform(D3DXPLANE *pout, const D3DXPLANE *pplane, const D3DXMATRIX *pm)
1313 {
1314     const D3DXPLANE plane = *pplane;
1315 
1316     TRACE("pout %p, pplane %p, pm %p\n", pout, pplane, pm);
1317 
1318     pout->a = pm->u.m[0][0] * plane.a + pm->u.m[1][0] * plane.b + pm->u.m[2][0] * plane.c + pm->u.m[3][0] * plane.d;
1319     pout->b = pm->u.m[0][1] * plane.a + pm->u.m[1][1] * plane.b + pm->u.m[2][1] * plane.c + pm->u.m[3][1] * plane.d;
1320     pout->c = pm->u.m[0][2] * plane.a + pm->u.m[1][2] * plane.b + pm->u.m[2][2] * plane.c + pm->u.m[3][2] * plane.d;
1321     pout->d = pm->u.m[0][3] * plane.a + pm->u.m[1][3] * plane.b + pm->u.m[2][3] * plane.c + pm->u.m[3][3] * plane.d;
1322     return pout;
1323 }
1324 
1325 D3DXPLANE* WINAPI D3DXPlaneTransformArray(D3DXPLANE* out, UINT outstride, const D3DXPLANE* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1326 {
1327     UINT i;
1328 
1329     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1330 
1331     for (i = 0; i < elements; ++i) {
1332         D3DXPlaneTransform(
1333             (D3DXPLANE*)((char*)out + outstride * i),
1334             (const D3DXPLANE*)((const char*)in + instride * i),
1335             matrix);
1336     }
1337     return out;
1338 }
1339 
1340 /*_________________D3DXQUATERNION________________*/
1341 
1342 D3DXQUATERNION* WINAPI D3DXQuaternionBaryCentric(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3, FLOAT f, FLOAT g)
1343 {
1344     D3DXQUATERNION temp1, temp2;
1345 
1346      TRACE("pout %p, pq1 %p, pq2 %p, pq3 %p, f %f, g %f\n", pout, pq1, pq2, pq3, f, g);
1347 
1348     D3DXQuaternionSlerp(pout, D3DXQuaternionSlerp(&temp1, pq1, pq2, f + g), D3DXQuaternionSlerp(&temp2, pq1, pq3, f+g), g / (f + g));
1349     return pout;
1350 }
1351 
1352 D3DXQUATERNION * WINAPI D3DXQuaternionExp(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1353 {
1354     FLOAT norm;
1355 
1356     TRACE("out %p, q %p\n", out, q);
1357 
1358     norm = sqrtf(q->x * q->x + q->y * q->y + q->z * q->z);
1359     if (norm)
1360     {
1361         out->x = sinf(norm) * q->x / norm;
1362         out->y = sinf(norm) * q->y / norm;
1363         out->z = sinf(norm) * q->z / norm;
1364         out->w = cosf(norm);
1365     }
1366     else
1367     {
1368         out->x = 0.0f;
1369         out->y = 0.0f;
1370         out->z = 0.0f;
1371         out->w = 1.0f;
1372     }
1373 
1374     return out;
1375 }
1376 
1377 D3DXQUATERNION* WINAPI D3DXQuaternionInverse(D3DXQUATERNION *pout, const D3DXQUATERNION *pq)
1378 {
1379     FLOAT norm;
1380 
1381     TRACE("pout %p, pq %p\n", pout, pq);
1382 
1383     norm = D3DXQuaternionLengthSq(pq);
1384 
1385     pout->x = -pq->x / norm;
1386     pout->y = -pq->y / norm;
1387     pout->z = -pq->z / norm;
1388     pout->w = pq->w / norm;
1389     return pout;
1390 }
1391 
1392 D3DXQUATERNION * WINAPI D3DXQuaternionLn(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1393 {
1394     FLOAT t;
1395 
1396     TRACE("out %p, q %p\n", out, q);
1397 
1398     if ((q->w >= 1.0f) || (q->w == -1.0f))
1399         t = 1.0f;
1400     else
1401         t = acosf(q->w) / sqrtf(1.0f - q->w * q->w);
1402 
1403     out->x = t * q->x;
1404     out->y = t * q->y;
1405     out->z = t * q->z;
1406     out->w = 0.0f;
1407 
1408     return out;
1409 }
1410 
1411 D3DXQUATERNION* WINAPI D3DXQuaternionMultiply(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2)
1412 {
1413     D3DXQUATERNION out;
1414 
1415     TRACE("pout %p, pq1 %p, pq2 %p\n", pout, pq1, pq2);
1416 
1417     out.x = pq2->w * pq1->x + pq2->x * pq1->w + pq2->y * pq1->z - pq2->z * pq1->y;
1418     out.y = pq2->w * pq1->y - pq2->x * pq1->z + pq2->y * pq1->w + pq2->z * pq1->x;
1419     out.z = pq2->w * pq1->z + pq2->x * pq1->y - pq2->y * pq1->x + pq2->z * pq1->w;
1420     out.w = pq2->w * pq1->w - pq2->x * pq1->x - pq2->y * pq1->y - pq2->z * pq1->z;
1421     *pout = out;
1422     return pout;
1423 }
1424 
1425 D3DXQUATERNION * WINAPI D3DXQuaternionNormalize(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1426 {
1427     FLOAT norm;
1428 
1429     TRACE("out %p, q %p\n", out, q);
1430 
1431     norm = D3DXQuaternionLength(q);
1432 
1433     out->x = q->x / norm;
1434     out->y = q->y / norm;
1435     out->z = q->z / norm;
1436     out->w = q->w / norm;
1437 
1438     return out;
1439 }
1440 
1441 D3DXQUATERNION * WINAPI D3DXQuaternionRotationAxis(D3DXQUATERNION *out, const D3DXVECTOR3 *v, FLOAT angle)
1442 {
1443     D3DXVECTOR3 temp;
1444 
1445     TRACE("out %p, v %p, angle %f\n", out, v, angle);
1446 
1447     D3DXVec3Normalize(&temp, v);
1448 
1449     out->x = sinf(angle / 2.0f) * temp.x;
1450     out->y = sinf(angle / 2.0f) * temp.y;
1451     out->z = sinf(angle / 2.0f) * temp.z;
1452     out->w = cosf(angle / 2.0f);
1453 
1454     return out;
1455 }
1456 
1457 D3DXQUATERNION * WINAPI D3DXQuaternionRotationMatrix(D3DXQUATERNION *out, const D3DXMATRIX *m)
1458 {
1459     FLOAT s, trace;
1460 
1461     TRACE("out %p, m %p\n", out, m);
1462 
1463     trace = m->u.m[0][0] + m->u.m[1][1] + m->u.m[2][2] + 1.0f;
1464     if (trace > 1.0f)
1465     {
1466         s = 2.0f * sqrtf(trace);
1467         out->x = (m->u.m[1][2] - m->u.m[2][1]) / s;
1468         out->y = (m->u.m[2][0] - m->u.m[0][2]) / s;
1469         out->z = (m->u.m[0][1] - m->u.m[1][0]) / s;
1470         out->w = 0.25f * s;
1471     }
1472     else
1473     {
1474         int i, maxi = 0;
1475 
1476         for (i = 1; i < 3; i++)
1477         {
1478             if (m->u.m[i][i] > m->u.m[maxi][maxi])
1479                 maxi = i;
1480         }
1481 
1482         switch (maxi)
1483         {
1484             case 0:
1485                 s = 2.0f * sqrtf(1.0f + m->u.m[0][0] - m->u.m[1][1] - m->u.m[2][2]);
1486                 out->x = 0.25f * s;
1487                 out->y = (m->u.m[0][1] + m->u.m[1][0]) / s;
1488                 out->z = (m->u.m[0][2] + m->u.m[2][0]) / s;
1489                 out->w = (m->u.m[1][2] - m->u.m[2][1]) / s;
1490                 break;
1491 
1492             case 1:
1493                 s = 2.0f * sqrtf(1.0f + m->u.m[1][1] - m->u.m[0][0] - m->u.m[2][2]);
1494                 out->x = (m->u.m[0][1] + m->u.m[1][0]) / s;
1495                 out->y = 0.25f * s;
1496                 out->z = (m->u.m[1][2] + m->u.m[2][1]) / s;
1497                 out->w = (m->u.m[2][0] - m->u.m[0][2]) / s;
1498                 break;
1499 
1500             case 2:
1501                 s = 2.0f * sqrtf(1.0f + m->u.m[2][2] - m->u.m[0][0] - m->u.m[1][1]);
1502                 out->x = (m->u.m[0][2] + m->u.m[2][0]) / s;
1503                 out->y = (m->u.m[1][2] + m->u.m[2][1]) / s;
1504                 out->z = 0.25f * s;
1505                 out->w = (m->u.m[0][1] - m->u.m[1][0]) / s;
1506                 break;
1507         }
1508     }
1509 
1510     return out;
1511 }
1512 
1513 D3DXQUATERNION * WINAPI D3DXQuaternionRotationYawPitchRoll(D3DXQUATERNION *out, FLOAT yaw, FLOAT pitch, FLOAT roll)
1514 {
1515     FLOAT syaw, cyaw, spitch, cpitch, sroll, croll;
1516 
1517     TRACE("out %p, yaw %f, pitch %f, roll %f\n", out, yaw, pitch, roll);
1518 
1519     syaw = sinf(yaw / 2.0f);
1520     cyaw = cosf(yaw / 2.0f);
1521     spitch = sinf(pitch / 2.0f);
1522     cpitch = cosf(pitch / 2.0f);
1523     sroll = sinf(roll / 2.0f);
1524     croll = cosf(roll / 2.0f);
1525 
1526     out->x = syaw * cpitch * sroll + cyaw * spitch * croll;
1527     out->y = syaw * cpitch * croll - cyaw * spitch * sroll;
1528     out->z = cyaw * cpitch * sroll - syaw * spitch * croll;
1529     out->w = cyaw * cpitch * croll + syaw * spitch * sroll;
1530 
1531     return out;
1532 }
1533 
1534 D3DXQUATERNION * WINAPI D3DXQuaternionSlerp(D3DXQUATERNION *out, const D3DXQUATERNION *q1,
1535         const D3DXQUATERNION *q2, FLOAT t)
1536 {
1537     FLOAT dot, temp;
1538 
1539     TRACE("out %p, q1 %p, q2 %p, t %f\n", out, q1, q2, t);
1540 
1541     temp = 1.0f - t;
1542     dot = D3DXQuaternionDot(q1, q2);
1543     if (dot < 0.0f)
1544     {
1545         t = -t;
1546         dot = -dot;
1547     }
1548 
1549     if (1.0f - dot > 0.001f)
1550     {
1551         FLOAT theta = acosf(dot);
1552 
1553         temp = sinf(theta * temp) / sinf(theta);
1554         t = sinf(theta * t) / sinf(theta);
1555     }
1556 
1557     out->x = temp * q1->x + t * q2->x;
1558     out->y = temp * q1->y + t * q2->y;
1559     out->z = temp * q1->z + t * q2->z;
1560     out->w = temp * q1->w + t * q2->w;
1561 
1562     return out;
1563 }
1564 
1565 D3DXQUATERNION* WINAPI D3DXQuaternionSquad(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3, const D3DXQUATERNION *pq4, FLOAT t)
1566 {
1567     D3DXQUATERNION temp1, temp2;
1568 
1569     TRACE("pout %p, pq1 %p, pq2 %p, pq3 %p, pq4 %p, t %f\n", pout, pq1, pq2, pq3, pq4, t);
1570 
1571     D3DXQuaternionSlerp(pout, D3DXQuaternionSlerp(&temp1, pq1, pq4, t), D3DXQuaternionSlerp(&temp2, pq2, pq3, t), 2.0f * t * (1.0f - t));
1572     return pout;
1573 }
1574 
1575 static D3DXQUATERNION add_diff(const D3DXQUATERNION *q1, const D3DXQUATERNION *q2, const FLOAT add)
1576 {
1577     D3DXQUATERNION temp;
1578 
1579     temp.x = q1->x + add * q2->x;
1580     temp.y = q1->y + add * q2->y;
1581     temp.z = q1->z + add * q2->z;
1582     temp.w = q1->w + add * q2->w;
1583 
1584     return temp;
1585 }
1586 
1587 void WINAPI D3DXQuaternionSquadSetup(D3DXQUATERNION *paout, D3DXQUATERNION *pbout, D3DXQUATERNION *pcout, const D3DXQUATERNION *pq0, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3)
1588 {
1589     D3DXQUATERNION q, temp1, temp2, temp3, zero;
1590     D3DXQUATERNION aout, cout;
1591 
1592     TRACE("paout %p, pbout %p, pcout %p, pq0 %p, pq1 %p, pq2 %p, pq3 %p\n", paout, pbout, pcout, pq0, pq1, pq2, pq3);
1593 
1594     zero.x = 0.0f;
1595     zero.y = 0.0f;
1596     zero.z = 0.0f;
1597     zero.w = 0.0f;
1598 
1599     if (D3DXQuaternionDot(pq0, pq1) < 0.0f)
1600         temp2 = add_diff(&zero, pq0, -1.0f);
1601     else
1602         temp2 = *pq0;
1603 
1604     if (D3DXQuaternionDot(pq1, pq2) < 0.0f)
1605         cout = add_diff(&zero, pq2, -1.0f);
1606     else
1607         cout = *pq2;
1608 
1609     if (D3DXQuaternionDot(&cout, pq3) < 0.0f)
1610         temp3 = add_diff(&zero, pq3, -1.0f);
1611     else
1612         temp3 = *pq3;
1613 
1614     D3DXQuaternionInverse(&temp1, pq1);
1615     D3DXQuaternionMultiply(&temp2, &temp1, &temp2);
1616     D3DXQuaternionLn(&temp2, &temp2);
1617     D3DXQuaternionMultiply(&q, &temp1, &cout);
1618     D3DXQuaternionLn(&q, &q);
1619     temp1 = add_diff(&temp2, &q, 1.0f);
1620     temp1.x *= -0.25f;
1621     temp1.y *= -0.25f;
1622     temp1.z *= -0.25f;
1623     temp1.w *= -0.25f;
1624     D3DXQuaternionExp(&temp1, &temp1);
1625     D3DXQuaternionMultiply(&aout, pq1, &temp1);
1626 
1627     D3DXQuaternionInverse(&temp1, &cout);
1628     D3DXQuaternionMultiply(&temp2, &temp1, pq1);
1629     D3DXQuaternionLn(&temp2, &temp2);
1630     D3DXQuaternionMultiply(&q, &temp1, &temp3);
1631     D3DXQuaternionLn(&q, &q);
1632     temp1 = add_diff(&temp2, &q, 1.0f);
1633     temp1.x *= -0.25f;
1634     temp1.y *= -0.25f;
1635     temp1.z *= -0.25f;
1636     temp1.w *= -0.25f;
1637     D3DXQuaternionExp(&temp1, &temp1);
1638     D3DXQuaternionMultiply(pbout, &cout, &temp1);
1639     *paout = aout;
1640     *pcout = cout;
1641 }
1642 
1643 void WINAPI D3DXQuaternionToAxisAngle(const D3DXQUATERNION *pq, D3DXVECTOR3 *paxis, FLOAT *pangle)
1644 {
1645     TRACE("pq %p, paxis %p, pangle %p\n", pq, paxis, pangle);
1646 
1647     if (paxis)
1648     {
1649         paxis->x = pq->x;
1650         paxis->y = pq->y;
1651         paxis->z = pq->z;
1652     }
1653     if (pangle)
1654         *pangle = 2.0f * acosf(pq->w);
1655 }
1656 
1657 /*_________________D3DXVec2_____________________*/
1658 
1659 D3DXVECTOR2* WINAPI D3DXVec2BaryCentric(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pv3, FLOAT f, FLOAT g)
1660 {
1661     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
1662 
1663     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
1664     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
1665     return pout;
1666 }
1667 
1668 D3DXVECTOR2* WINAPI D3DXVec2CatmullRom(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv0, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pv3, FLOAT s)
1669 {
1670     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
1671 
1672     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
1673     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
1674     return pout;
1675 }
1676 
1677 D3DXVECTOR2* WINAPI D3DXVec2Hermite(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pt1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pt2, FLOAT s)
1678 {
1679     FLOAT h1, h2, h3, h4;
1680 
1681     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
1682 
1683     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
1684     h2 = s * s * s - 2.0f * s * s + s;
1685     h3 = -2.0f * s * s * s + 3.0f * s * s;
1686     h4 = s * s * s - s * s;
1687 
1688     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
1689     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
1690     return pout;
1691 }
1692 
1693 D3DXVECTOR2* WINAPI D3DXVec2Normalize(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv)
1694 {
1695     FLOAT norm;
1696 
1697     TRACE("pout %p, pv %p\n", pout, pv);
1698 
1699     norm = D3DXVec2Length(pv);
1700     if ( !norm )
1701     {
1702         pout->x = 0.0f;
1703         pout->y = 0.0f;
1704     }
1705     else
1706     {
1707         pout->x = pv->x / norm;
1708         pout->y = pv->y / norm;
1709     }
1710 
1711     return pout;
1712 }
1713 
1714 D3DXVECTOR4* WINAPI D3DXVec2Transform(D3DXVECTOR4 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1715 {
1716     D3DXVECTOR4 out;
1717 
1718     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1719 
1720     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y  + pm->u.m[3][0];
1721     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y  + pm->u.m[3][1];
1722     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y  + pm->u.m[3][2];
1723     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y  + pm->u.m[3][3];
1724     *pout = out;
1725     return pout;
1726 }
1727 
1728 D3DXVECTOR4* WINAPI D3DXVec2TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR2* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1729 {
1730     UINT i;
1731 
1732     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1733 
1734     for (i = 0; i < elements; ++i) {
1735         D3DXVec2Transform(
1736             (D3DXVECTOR4*)((char*)out + outstride * i),
1737             (const D3DXVECTOR2*)((const char*)in + instride * i),
1738             matrix);
1739     }
1740     return out;
1741 }
1742 
1743 D3DXVECTOR2* WINAPI D3DXVec2TransformCoord(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1744 {
1745     D3DXVECTOR2 v;
1746     FLOAT norm;
1747 
1748     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1749 
1750     v = *pv;
1751     norm = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[3][3];
1752 
1753     pout->x = (pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y + pm->u.m[3][0]) / norm;
1754     pout->y = (pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y + pm->u.m[3][1]) / norm;
1755 
1756     return pout;
1757 }
1758 
1759 D3DXVECTOR2* WINAPI D3DXVec2TransformCoordArray(D3DXVECTOR2* out, UINT outstride, const D3DXVECTOR2* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1760 {
1761     UINT i;
1762 
1763     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1764 
1765     for (i = 0; i < elements; ++i) {
1766         D3DXVec2TransformCoord(
1767             (D3DXVECTOR2*)((char*)out + outstride * i),
1768             (const D3DXVECTOR2*)((const char*)in + instride * i),
1769             matrix);
1770     }
1771     return out;
1772 }
1773 
1774 D3DXVECTOR2* WINAPI D3DXVec2TransformNormal(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1775 {
1776     const D3DXVECTOR2 v = *pv;
1777 
1778     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1779 
1780     pout->x = pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y;
1781     pout->y = pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y;
1782     return pout;
1783 }
1784 
1785 D3DXVECTOR2* WINAPI D3DXVec2TransformNormalArray(D3DXVECTOR2* out, UINT outstride, const D3DXVECTOR2 *in, UINT instride, const D3DXMATRIX *matrix, UINT elements)
1786 {
1787     UINT i;
1788 
1789     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1790 
1791     for (i = 0; i < elements; ++i) {
1792         D3DXVec2TransformNormal(
1793             (D3DXVECTOR2*)((char*)out + outstride * i),
1794             (const D3DXVECTOR2*)((const char*)in + instride * i),
1795             matrix);
1796     }
1797     return out;
1798 }
1799 
1800 /*_________________D3DXVec3_____________________*/
1801 
1802 D3DXVECTOR3* WINAPI D3DXVec3BaryCentric(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3, FLOAT f, FLOAT g)
1803 {
1804     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
1805 
1806     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
1807     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
1808     pout->z = (1.0f-f-g) * (pv1->z) + f * (pv2->z) + g * (pv3->z);
1809     return pout;
1810 }
1811 
1812 D3DXVECTOR3* WINAPI D3DXVec3CatmullRom( D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv0, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3, FLOAT s)
1813 {
1814     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
1815 
1816     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
1817     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
1818     pout->z = 0.5f * (2.0f * pv1->z + (pv2->z - pv0->z) *s + (2.0f *pv0->z - 5.0f * pv1->z + 4.0f * pv2->z - pv3->z) * s * s + (pv3->z -3.0f * pv2->z + 3.0f * pv1->z - pv0->z) * s * s * s);
1819     return pout;
1820 }
1821 
1822 D3DXVECTOR3* WINAPI D3DXVec3Hermite(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pt1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pt2, FLOAT s)
1823 {
1824     FLOAT h1, h2, h3, h4;
1825 
1826     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
1827 
1828     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
1829     h2 = s * s * s - 2.0f * s * s + s;
1830     h3 = -2.0f * s * s * s + 3.0f * s * s;
1831     h4 = s * s * s - s * s;
1832 
1833     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
1834     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
1835     pout->z = h1 * (pv1->z) + h2 * (pt1->z) + h3 * (pv2->z) + h4 * (pt2->z);
1836     return pout;
1837 }
1838 
1839 D3DXVECTOR3* WINAPI D3DXVec3Normalize(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv)
1840 {
1841     FLOAT norm;
1842 
1843     TRACE("pout %p, pv %p\n", pout, pv);
1844 
1845     norm = D3DXVec3Length(pv);
1846     if ( !norm )
1847     {
1848         pout->x = 0.0f;
1849         pout->y = 0.0f;
1850         pout->z = 0.0f;
1851     }
1852     else
1853     {
1854         pout->x = pv->x / norm;
1855         pout->y = pv->y / norm;
1856         pout->z = pv->z / norm;
1857     }
1858 
1859     return pout;
1860 }
1861 
1862 D3DXVECTOR3* WINAPI D3DXVec3Project(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DVIEWPORT9 *pviewport, const D3DXMATRIX *pprojection, const D3DXMATRIX *pview, const D3DXMATRIX *pworld)
1863 {
1864     D3DXMATRIX m;
1865 
1866     TRACE("pout %p, pv %p, pviewport %p, pprojection %p, pview %p, pworld %p\n", pout, pv, pviewport, pprojection, pview, pworld);
1867 
1868     D3DXMatrixIdentity(&m);
1869     if (pworld) D3DXMatrixMultiply(&m, &m, pworld);
1870     if (pview) D3DXMatrixMultiply(&m, &m, pview);
1871     if (pprojection) D3DXMatrixMultiply(&m, &m, pprojection);
1872 
1873     D3DXVec3TransformCoord(pout, pv, &m);
1874 
1875     if (pviewport)
1876     {
1877         pout->x = pviewport->X +  ( 1.0f + pout->x ) * pviewport->Width / 2.0f;
1878         pout->y = pviewport->Y +  ( 1.0f - pout->y ) * pviewport->Height / 2.0f;
1879         pout->z = pviewport->MinZ + pout->z * ( pviewport->MaxZ - pviewport->MinZ );
1880     }
1881     return pout;
1882 }
1883 
1884 D3DXVECTOR3* WINAPI D3DXVec3ProjectArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DVIEWPORT9* viewport, const D3DXMATRIX* projection, const D3DXMATRIX* view, const D3DXMATRIX* world, UINT elements)
1885 {
1886     UINT i;
1887 
1888     TRACE("out %p, outstride %u, in %p, instride %u, viewport %p, projection %p, view %p, world %p, elements %u\n",
1889         out, outstride, in, instride, viewport, projection, view, world, elements);
1890 
1891     for (i = 0; i < elements; ++i) {
1892         D3DXVec3Project(
1893             (D3DXVECTOR3*)((char*)out + outstride * i),
1894             (const D3DXVECTOR3*)((const char*)in + instride * i),
1895             viewport, projection, view, world);
1896     }
1897     return out;
1898 }
1899 
1900 D3DXVECTOR4* WINAPI D3DXVec3Transform(D3DXVECTOR4 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1901 {
1902     D3DXVECTOR4 out;
1903 
1904     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1905 
1906     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0];
1907     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1];
1908     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2];
1909     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] * pv->z + pm->u.m[3][3];
1910     *pout = out;
1911     return pout;
1912 }
1913 
1914 D3DXVECTOR4* WINAPI D3DXVec3TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1915 {
1916     UINT i;
1917 
1918     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1919 
1920     for (i = 0; i < elements; ++i) {
1921         D3DXVec3Transform(
1922             (D3DXVECTOR4*)((char*)out + outstride * i),
1923             (const D3DXVECTOR3*)((const char*)in + instride * i),
1924             matrix);
1925     }
1926     return out;
1927 }
1928 
1929 D3DXVECTOR3* WINAPI D3DXVec3TransformCoord(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1930 {
1931     D3DXVECTOR3 out;
1932     FLOAT norm;
1933 
1934     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1935 
1936     norm = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] *pv->z + pm->u.m[3][3];
1937 
1938     out.x = (pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0]) / norm;
1939     out.y = (pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1]) / norm;
1940     out.z = (pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2]) / norm;
1941 
1942     *pout = out;
1943 
1944     return pout;
1945 }
1946 
1947 D3DXVECTOR3* WINAPI D3DXVec3TransformCoordArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1948 {
1949     UINT i;
1950 
1951     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1952 
1953     for (i = 0; i < elements; ++i) {
1954         D3DXVec3TransformCoord(
1955             (D3DXVECTOR3*)((char*)out + outstride * i),
1956             (const D3DXVECTOR3*)((const char*)in + instride * i),
1957             matrix);
1958     }
1959     return out;
1960 }
1961 
1962 D3DXVECTOR3* WINAPI D3DXVec3TransformNormal(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1963 {
1964     const D3DXVECTOR3 v = *pv;
1965 
1966     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1967 
1968     pout->x = pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y + pm->u.m[2][0] * v.z;
1969     pout->y = pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y + pm->u.m[2][1] * v.z;
1970     pout->z = pm->u.m[0][2] * v.x + pm->u.m[1][2] * v.y + pm->u.m[2][2] * v.z;
1971     return pout;
1972 
1973 }
1974 
1975 D3DXVECTOR3* WINAPI D3DXVec3TransformNormalArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1976 {
1977     UINT i;
1978 
1979     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1980 
1981     for (i = 0; i < elements; ++i) {
1982         D3DXVec3TransformNormal(
1983             (D3DXVECTOR3*)((char*)out + outstride * i),
1984             (const D3DXVECTOR3*)((const char*)in + instride * i),
1985             matrix);
1986     }
1987     return out;
1988 }
1989 
1990 D3DXVECTOR3* WINAPI D3DXVec3Unproject(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DVIEWPORT9 *pviewport, const D3DXMATRIX *pprojection, const D3DXMATRIX *pview, const D3DXMATRIX *pworld)
1991 {
1992     D3DXMATRIX m;
1993 
1994     TRACE("pout %p, pv %p, pviewport %p, pprojection %p, pview %p, pworlds %p\n", pout, pv, pviewport, pprojection, pview, pworld);
1995 
1996     D3DXMatrixIdentity(&m);
1997     if (pworld) D3DXMatrixMultiply(&m, &m, pworld);
1998     if (pview) D3DXMatrixMultiply(&m, &m, pview);
1999     if (pprojection) D3DXMatrixMultiply(&m, &m, pprojection);
2000     D3DXMatrixInverse(&m, NULL, &m);
2001 
2002     *pout = *pv;
2003     if (pviewport)
2004     {
2005         pout->x = 2.0f * ( pout->x - pviewport->X ) / pviewport->Width - 1.0f;
2006         pout->y = 1.0f - 2.0f * ( pout->y - pviewport->Y ) / pviewport->Height;
2007         pout->z = ( pout->z - pviewport->MinZ) / ( pviewport->MaxZ - pviewport->MinZ );
2008     }
2009     D3DXVec3TransformCoord(pout, pout, &m);
2010     return pout;
2011 }
2012 
2013 D3DXVECTOR3* WINAPI D3DXVec3UnprojectArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DVIEWPORT9* viewport, const D3DXMATRIX* projection, const D3DXMATRIX* view, const D3DXMATRIX* world, UINT elements)
2014 {
2015     UINT i;
2016 
2017     TRACE("out %p, outstride %u, in %p, instride %u, viewport %p, projection %p, view %p, world %p, elements %u\n",
2018         out, outstride, in, instride, viewport, projection, view, world, elements);
2019 
2020     for (i = 0; i < elements; ++i) {
2021         D3DXVec3Unproject(
2022             (D3DXVECTOR3*)((char*)out + outstride * i),
2023             (const D3DXVECTOR3*)((const char*)in + instride * i),
2024             viewport, projection, view, world);
2025     }
2026     return out;
2027 }
2028 
2029 /*_________________D3DXVec4_____________________*/
2030 
2031 D3DXVECTOR4* WINAPI D3DXVec4BaryCentric(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3, FLOAT f, FLOAT g)
2032 {
2033     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
2034 
2035     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
2036     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
2037     pout->z = (1.0f-f-g) * (pv1->z) + f * (pv2->z) + g * (pv3->z);
2038     pout->w = (1.0f-f-g) * (pv1->w) + f * (pv2->w) + g * (pv3->w);
2039     return pout;
2040 }
2041 
2042 D3DXVECTOR4* WINAPI D3DXVec4CatmullRom(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv0, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3, FLOAT s)
2043 {
2044     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
2045 
2046     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
2047     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
2048     pout->z = 0.5f * (2.0f * pv1->z + (pv2->z - pv0->z) *s + (2.0f *pv0->z - 5.0f * pv1->z + 4.0f * pv2->z - pv3->z) * s * s + (pv3->z -3.0f * pv2->z + 3.0f * pv1->z - pv0->z) * s * s * s);
2049     pout->w = 0.5f * (2.0f * pv1->w + (pv2->w - pv0->w) *s + (2.0f *pv0->w - 5.0f * pv1->w + 4.0f * pv2->w - pv3->w) * s * s + (pv3->w -3.0f * pv2->w + 3.0f * pv1->w - pv0->w) * s * s * s);
2050     return pout;
2051 }
2052 
2053 D3DXVECTOR4* WINAPI D3DXVec4Cross(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3)
2054 {
2055     D3DXVECTOR4 out;
2056 
2057     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p\n", pout, pv1, pv2, pv3);
2058 
2059     out.x = pv1->y * (pv2->z * pv3->w - pv3->z * pv2->w) - pv1->z * (pv2->y * pv3->w - pv3->y * pv2->w) + pv1->w * (pv2->y * pv3->z - pv2->z *pv3->y);
2060     out.y = -(pv1->x * (pv2->z * pv3->w - pv3->z * pv2->w) - pv1->z * (pv2->x * pv3->w - pv3->x * pv2->w) + pv1->w * (pv2->x * pv3->z - pv3->x * pv2->z));
2061     out.z = pv1->x * (pv2->y * pv3->w - pv3->y * pv2->w) - pv1->y * (pv2->x *pv3->w - pv3->x * pv2->w) + pv1->w * (pv2->x * pv3->y - pv3->x * pv2->y);
2062     out.w = -(pv1->x * (pv2->y * pv3->z - pv3->y * pv2->z) - pv1->y * (pv2->x * pv3->z - pv3->x *pv2->z) + pv1->z * (pv2->x * pv3->y - pv3->x * pv2->y));
2063     *pout = out;
2064     return pout;
2065 }
2066 
2067 D3DXVECTOR4* WINAPI D3DXVec4Hermite(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pt1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pt2, FLOAT s)
2068 {
2069     FLOAT h1, h2, h3, h4;
2070 
2071     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
2072 
2073     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
2074     h2 = s * s * s - 2.0f * s * s + s;
2075     h3 = -2.0f * s * s * s + 3.0f * s * s;
2076     h4 = s * s * s - s * s;
2077 
2078     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
2079     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
2080     pout->z = h1 * (pv1->z) + h2 * (pt1->z) + h3 * (pv2->z) + h4 * (pt2->z);
2081     pout->w = h1 * (pv1->w) + h2 * (pt1->w) + h3 * (pv2->w) + h4 * (pt2->w);
2082     return pout;
2083 }
2084 
2085 D3DXVECTOR4* WINAPI D3DXVec4Normalize(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv)
2086 {
2087     FLOAT norm;
2088 
2089     TRACE("pout %p, pv %p\n", pout, pv);
2090 
2091     norm = D3DXVec4Length(pv);
2092 
2093     pout->x = pv->x / norm;
2094     pout->y = pv->y / norm;
2095     pout->z = pv->z / norm;
2096     pout->w = pv->w / norm;
2097 
2098     return pout;
2099 }
2100 
2101 D3DXVECTOR4* WINAPI D3DXVec4Transform(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv, const D3DXMATRIX *pm)
2102 {
2103     D3DXVECTOR4 out;
2104 
2105     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
2106 
2107     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0] * pv->w;
2108     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1] * pv->w;
2109     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2] * pv->w;
2110     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] * pv->z + pm->u.m[3][3] * pv->w;
2111     *pout = out;
2112     return pout;
2113 }
2114 
2115 D3DXVECTOR4* WINAPI D3DXVec4TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR4* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
2116 {
2117     UINT i;
2118 
2119     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
2120 
2121     for (i = 0; i < elements; ++i) {
2122         D3DXVec4Transform(
2123             (D3DXVECTOR4*)((char*)out + outstride * i),
2124             (const D3DXVECTOR4*)((const char*)in + instride * i),
2125             matrix);
2126     }
2127     return out;
2128 }
2129 
2130 unsigned short float_32_to_16(const float in)
2131 {
2132     int exp = 0, origexp;
2133     float tmp = fabsf(in);
2134     int sign = (copysignf(1, in) < 0);
2135     unsigned int mantissa;
2136     unsigned short ret;
2137 
2138     /* Deal with special numbers */
2139     if (isinf(in)) return (sign ? 0xffff : 0x7fff);
2140     if (isnan(in)) return (sign ? 0xffff : 0x7fff);
2141     if (in == 0.0f) return (sign ? 0x8000 : 0x0000);
2142 
2143     if (tmp < (float)(1u << 10))
2144     {
2145         do
2146         {
2147             tmp *= 2.0f;
2148             exp--;
2149         } while (tmp < (float)(1u << 10));
2150     }
2151     else if (tmp >= (float)(1u << 11))
2152     {
2153         do
2154         {
2155             tmp /= 2.0f;
2156             exp++;
2157         } while (tmp >= (float)(1u << 11));
2158     }
2159 
2160     exp += 10;  /* Normalize the mantissa */
2161     exp += 15;  /* Exponent is encoded with excess 15 */
2162 
2163     origexp = exp;
2164 
2165     mantissa = (unsigned int) tmp;
2166     if ((tmp - mantissa == 0.5f && mantissa % 2 == 1) || /* round half to even */
2167         (tmp - mantissa > 0.5f))
2168     {
2169         mantissa++; /* round to nearest, away from zero */
2170     }
2171     if (mantissa == 2048)
2172     {
2173         mantissa = 1024;
2174         exp++;
2175     }
2176 
2177     if (exp > 31)
2178     {
2179         /* too big */
2180         ret = 0x7fff; /* INF */
2181     }
2182     else if (exp <= 0)
2183     {
2184         unsigned int rounding = 0;
2185 
2186         /* Denormalized half float */
2187 
2188         /* return 0x0000 (=0.0) for numbers too small to represent in half floats */
2189         if (exp < -11)
2190             return (sign ? 0x8000 : 0x0000);
2191 
2192         exp = origexp;
2193 
2194         /* the 13 extra bits from single precision are used for rounding */
2195         mantissa = (unsigned int)(tmp * (1u << 13));
2196         mantissa >>= 1 - exp; /* denormalize */
2197 
2198         mantissa -= ~(mantissa >> 13) & 1; /* round half to even */
2199         /* remove 13 least significant bits to get half float precision */
2200         mantissa >>= 12;
2201         rounding = mantissa & 1;
2202         mantissa >>= 1;
2203 
2204         ret = mantissa + rounding;
2205     }
2206     else
2207     {
2208         ret = (exp << 10) | (mantissa & 0x3ff);
2209     }
2210 
2211     ret |= ((sign ? 1 : 0) << 15); /* Add the sign */
2212     return ret;
2213 }
2214 
2215 D3DXFLOAT16 *WINAPI D3DXFloat32To16Array(D3DXFLOAT16 *pout, const FLOAT *pin, UINT n)
2216 {
2217     unsigned int i;
2218 
2219     TRACE("pout %p, pin %p, n %u\n", pout, pin, n);
2220 
2221     for (i = 0; i < n; ++i)
2222     {
2223         pout[i].value = float_32_to_16(pin[i]);
2224     }
2225 
2226     return pout;
2227 }
2228 
2229 /* Native d3dx9's D3DXFloat16to32Array lacks support for NaN and Inf. Specifically, e = 16 is treated as a
2230  * regular number - e.g., 0x7fff is converted to 131008.0 and 0xffff to -131008.0. */
2231 float float_16_to_32(const unsigned short in)
2232 {
2233     const unsigned short s = (in & 0x8000);
2234     const unsigned short e = (in & 0x7C00) >> 10;
2235     const unsigned short m = in & 0x3FF;
2236     const float sgn = (s ? -1.0f : 1.0f);
2237 
2238     if (e == 0)
2239     {
2240         if (m == 0) return sgn * 0.0f; /* +0.0 or -0.0 */
2241         else return sgn * powf(2, -14.0f) * (m / 1024.0f);
2242     }
2243     else
2244     {
2245         return sgn * powf(2, e - 15.0f) * (1.0f + (m / 1024.0f));
2246     }
2247 }
2248 
2249 FLOAT *WINAPI D3DXFloat16To32Array(FLOAT *pout, const D3DXFLOAT16 *pin, UINT n)
2250 {
2251     unsigned int i;
2252 
2253     TRACE("pout %p, pin %p, n %u\n", pout, pin, n);
2254 
2255     for (i = 0; i < n; ++i)
2256     {
2257         pout[i] = float_16_to_32(pin[i].value);
2258     }
2259 
2260     return pout;
2261 }
2262 
2263 /*_________________D3DXSH________________*/
2264 
2265 FLOAT* WINAPI D3DXSHAdd(FLOAT *out, UINT order, const FLOAT *a, const FLOAT *b)
2266 {
2267     UINT i;
2268 
2269     TRACE("out %p, order %u, a %p, b %p\n", out, order, a, b);
2270 
2271     for (i = 0; i < order * order; i++)
2272         out[i] = a[i] + b[i];
2273 
2274     return out;
2275 }
2276 
2277 FLOAT WINAPI D3DXSHDot(UINT order, const FLOAT *a, const FLOAT *b)
2278 {
2279     FLOAT s;
2280     UINT i;
2281 
2282     TRACE("order %u, a %p, b %p\n", order, a, b);
2283 
2284     s = a[0] * b[0];
2285     for (i = 1; i < order * order; i++)
2286         s += a[i] * b[i];
2287 
2288     return s;
2289 }
2290 
2291 static void weightedcapintegrale(FLOAT *out, UINT order, FLOAT angle)
2292 {
2293     FLOAT coeff[3];
2294 
2295     coeff[0] = cosf(angle);
2296 
2297     out[0] = 2.0f * D3DX_PI * (1.0f - coeff[0]);
2298     out[1] = D3DX_PI * sinf(angle) * sinf(angle);
2299     if (order <= 2)
2300         return;
2301 
2302     out[2] = coeff[0] * out[1];
2303     if (order == 3)
2304         return;
2305 
2306     coeff[1] = coeff[0] * coeff[0];
2307     coeff[2] = coeff[1] * coeff[1];
2308 
2309     out[3] = D3DX_PI * (-1.25f * coeff[2] + 1.5f * coeff[1] - 0.25f);
2310     if (order == 4)
2311         return;
2312 
2313     out[4] = -0.25f * D3DX_PI * coeff[0] * (7.0f * coeff[2] - 10.0f * coeff[1] + 3.0f);
2314     if (order == 5)
2315         return;
2316 
2317     out[5] = D3DX_PI * (-2.625f * coeff[2] * coeff[1] + 4.375f * coeff[2] - 1.875f * coeff[1] + 0.125f);
2318 }
2319 
2320 HRESULT WINAPI D3DXSHEvalConeLight(UINT order, const D3DXVECTOR3 *dir, FLOAT radius,
2321     FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *rout, FLOAT *gout, FLOAT *bout)
2322 {
2323     FLOAT cap[6], clamped_angle, norm, scale, temp;
2324     UINT i, index, j;
2325 
2326     TRACE("order %u, dir %p, radius %f, red %f, green %f, blue %f, rout %p, gout %p, bout %p\n",
2327         order, dir, radius, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2328 
2329     if (radius <= 0.0f)
2330         return D3DXSHEvalDirectionalLight(order, dir, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2331 
2332     clamped_angle = (radius > D3DX_PI / 2.0f) ? (D3DX_PI / 2.0f) : radius;
2333     norm = sinf(clamped_angle) * sinf(clamped_angle);
2334 
2335     if (order > D3DXSH_MAXORDER)
2336     {
2337         WARN("Order clamped at D3DXSH_MAXORDER\n");
2338         order = D3DXSH_MAXORDER;
2339     }
2340 
2341     weightedcapintegrale(cap, order, radius);
2342     D3DXSHEvalDirection(rout, order, dir);
2343 
2344     for (i = 0; i < order; i++)
2345     {
2346         scale = cap[i] / norm;
2347 
2348         for (j = 0; j < 2 * i + 1; j++)
2349         {
2350             index = i * i + j;
2351             temp = rout[index] * scale;
2352 
2353             rout[index] = temp * Rintensity;
2354             if (gout)
2355                 gout[index] = temp * Gintensity;
2356             if (bout)
2357                 bout[index] = temp * Bintensity;
2358         }
2359     }
2360 
2361     return D3D_OK;
2362 }
2363 
2364 FLOAT* WINAPI D3DXSHEvalDirection(FLOAT *out, UINT order, const D3DXVECTOR3 *dir)
2365 {
2366     const FLOAT dirxx = dir->x * dir->x;
2367     const FLOAT dirxy = dir->x * dir->y;
2368     const FLOAT dirxz = dir->x * dir->z;
2369     const FLOAT diryy = dir->y * dir->y;
2370     const FLOAT diryz = dir->y * dir->z;
2371     const FLOAT dirzz = dir->z * dir->z;
2372     const FLOAT dirxxxx = dirxx * dirxx;
2373     const FLOAT diryyyy = diryy * diryy;
2374     const FLOAT dirzzzz = dirzz * dirzz;
2375     const FLOAT dirxyxy = dirxy * dirxy;
2376 
2377     TRACE("out %p, order %u, dir %p\n", out, order, dir);
2378 
2379     if ((order < D3DXSH_MINORDER) || (order > D3DXSH_MAXORDER))
2380         return out;
2381 
2382     out[0] = 0.5f / sqrtf(D3DX_PI);
2383     out[1] = -0.5f / sqrtf(D3DX_PI / 3.0f) * dir->y;
2384     out[2] = 0.5f / sqrtf(D3DX_PI / 3.0f) * dir->z;
2385     out[3] = -0.5f / sqrtf(D3DX_PI / 3.0f) * dir->x;
2386     if (order == 2)
2387         return out;
2388 
2389     out[4] = 0.5f / sqrtf(D3DX_PI / 15.0f) * dirxy;
2390     out[5] = -0.5f / sqrtf(D3DX_PI / 15.0f) * diryz;
2391     out[6] = 0.25f / sqrtf(D3DX_PI / 5.0f) * (3.0f * dirzz - 1.0f);
2392     out[7] = -0.5f / sqrtf(D3DX_PI / 15.0f) * dirxz;
2393     out[8] = 0.25f / sqrtf(D3DX_PI / 15.0f) * (dirxx - diryy);
2394     if (order == 3)
2395         return out;
2396 
2397     out[9] = -sqrtf(70.0f / D3DX_PI) / 8.0f * dir->y * (3.0f * dirxx - diryy);
2398     out[10] = sqrtf(105.0f / D3DX_PI) / 2.0f * dirxy * dir->z;
2399     out[11] = -sqrtf(42.0f / D3DX_PI) / 8.0f * dir->y * (-1.0f + 5.0f * dirzz);
2400     out[12] = sqrtf(7.0f / D3DX_PI) / 4.0f * dir->z * (5.0f * dirzz - 3.0f);
2401     out[13] = sqrtf(42.0f / D3DX_PI) / 8.0f * dir->x * (1.0f - 5.0f * dirzz);
2402     out[14] = sqrtf(105.0f / D3DX_PI) / 4.0f * dir->z * (dirxx - diryy);
2403     out[15] = -sqrtf(70.0f / D3DX_PI) / 8.0f * dir->x * (dirxx - 3.0f * diryy);
2404     if (order == 4)
2405         return out;
2406 
2407     out[16] = 0.75f * sqrtf(35.0f / D3DX_PI) * dirxy * (dirxx - diryy);
2408     out[17] = 3.0f * dir->z * out[9];
2409     out[18] = 0.75f * sqrtf(5.0f / D3DX_PI) * dirxy * (7.0f * dirzz - 1.0f);
2410     out[19] = 0.375f * sqrtf(10.0f / D3DX_PI) * diryz * (3.0f - 7.0f * dirzz);
2411     out[20] = 3.0f / (16.0f * sqrtf(D3DX_PI)) * (35.0f * dirzzzz - 30.f * dirzz + 3.0f);
2412     out[21] = 0.375f * sqrtf(10.0f / D3DX_PI) * dirxz * (3.0f - 7.0f * dirzz);
2413     out[22] = 0.375f * sqrtf(5.0f / D3DX_PI) * (dirxx - diryy) * (7.0f * dirzz - 1.0f);
2414     out[23] = 3.0f * dir->z * out[15];
2415     out[24] = 3.0f / 16.0f * sqrtf(35.0f / D3DX_PI) * (dirxxxx - 6.0f * dirxyxy + diryyyy);
2416     if (order == 5)
2417         return out;
2418 
2419     out[25] = -3.0f/ 32.0f * sqrtf(154.0f / D3DX_PI) * dir->y * (5.0f * dirxxxx - 10.0f * dirxyxy + diryyyy);
2420     out[26] = 0.75f * sqrtf(385.0f / D3DX_PI) * dirxy * dir->z * (dirxx - diryy);
2421     out[27] = sqrtf(770.0f / D3DX_PI) / 32.0f * dir->y * (3.0f * dirxx - diryy) * (1.0f - 9.0f * dirzz);
2422     out[28] = sqrtf(1155.0f / D3DX_PI) / 4.0f * dirxy * dir->z * (3.0f * dirzz - 1.0f);
2423     out[29] = sqrtf(165.0f / D3DX_PI) / 16.0f * dir->y * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
2424     out[30] = sqrtf(11.0f / D3DX_PI) / 16.0f * dir->z * (63.0f * dirzzzz - 70.0f * dirzz + 15.0f);
2425     out[31] = sqrtf(165.0f / D3DX_PI) / 16.0f * dir->x * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
2426     out[32] = sqrtf(1155.0f / D3DX_PI) / 8.0f * dir->z * (dirxx - diryy) * (3.0f * dirzz - 1.0f);
2427     out[33] = sqrtf(770.0f / D3DX_PI) / 32.0f * dir->x * (dirxx - 3.0f * diryy) * (1.0f - 9.0f * dirzz);
2428     out[34] = 3.0f / 16.0f * sqrtf(385.0f / D3DX_PI) * dir->z * (dirxxxx - 6.0f * dirxyxy + diryyyy);
2429     out[35] = -3.0f/ 32.0f * sqrtf(154.0f / D3DX_PI) * dir->x * (dirxxxx - 10.0f * dirxyxy + 5.0f * diryyyy);
2430 
2431     return out;
2432 }
2433 
2434 HRESULT WINAPI D3DXSHEvalDirectionalLight(UINT order, const D3DXVECTOR3 *dir, FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *Rout, FLOAT *Gout, FLOAT *Bout)
2435 {
2436     FLOAT s, temp;
2437     UINT j;
2438 
2439     TRACE("Order %u, Vector %p, Red %f, Green %f, Blue %f, Rout %p, Gout %p, Bout %p\n", order, dir, Rintensity, Gintensity, Bintensity, Rout, Gout, Bout);
2440 
2441     s = 0.75f;
2442     if ( order > 2 )
2443         s += 5.0f / 16.0f;
2444     if ( order > 4 )
2445         s -= 3.0f / 32.0f;
2446     s /= D3DX_PI;
2447 
2448     D3DXSHEvalDirection(Rout, order, dir);
2449     for (j = 0; j < order * order; j++)
2450     {
2451         temp = Rout[j] / s;
2452 
2453         Rout[j] = Rintensity * temp;
2454         if ( Gout )
2455             Gout[j] = Gintensity * temp;
2456         if ( Bout )
2457             Bout[j] = Bintensity * temp;
2458     }
2459 
2460     return D3D_OK;
2461 }
2462 
2463 HRESULT WINAPI D3DXSHEvalHemisphereLight(UINT order, const D3DXVECTOR3 *dir, D3DXCOLOR top, D3DXCOLOR bottom,
2464     FLOAT *rout, FLOAT *gout, FLOAT *bout)
2465 {
2466     FLOAT a[2], temp[4];
2467     UINT i, j;
2468 
2469     TRACE("order %u, dir %p, rout %p, gout %p, bout %p\n", order, dir, rout, gout, bout);
2470 
2471     D3DXSHEvalDirection(temp, 2, dir);
2472 
2473     a[0] = (top.r + bottom.r) * 3.0f * D3DX_PI;
2474     a[1] = (top.r - bottom.r) * D3DX_PI;
2475     for (i = 0; i < order; i++)
2476         for (j = 0; j < 2 * i + 1; j++)
2477             if (i < 2)
2478                 rout[i * i + j] = temp[i * i + j] * a[i];
2479             else
2480                 rout[i * i + j] = 0.0f;
2481 
2482     if (gout)
2483     {
2484         a[0] = (top.g + bottom.g) * 3.0f * D3DX_PI;
2485         a[1] = (top.g - bottom.g) * D3DX_PI;
2486         for (i = 0; i < order; i++)
2487             for (j = 0; j < 2 * i + 1; j++)
2488                 if (i < 2)
2489                     gout[i * i + j] = temp[i * i + j] * a[i];
2490                 else
2491                     gout[i * i + j] = 0.0f;
2492     }
2493 
2494     if (bout)
2495     {
2496         a[0] = (top.b + bottom.b) * 3.0f * D3DX_PI;
2497         a[1] = (top.b - bottom.b) * D3DX_PI;
2498         for (i = 0; i < order; i++)
2499             for (j = 0; j < 2 * i + 1; j++)
2500                 if (i < 2)
2501                     bout[i * i + j] = temp[i * i + j] * a[i];
2502                 else
2503                     bout[i * i + j] = 0.0f;
2504     }
2505 
2506     return D3D_OK;
2507 }
2508 
2509 HRESULT WINAPI D3DXSHEvalSphericalLight(UINT order, const D3DXVECTOR3 *dir, FLOAT radius,
2510     FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *rout, FLOAT *gout, FLOAT *bout)
2511 {
2512     D3DXVECTOR3 normal;
2513     FLOAT cap[6], clamped_angle, dist, temp;
2514     UINT i, index, j;
2515 
2516     TRACE("order %u, dir %p, radius %f, red %f, green %f, blue %f, rout %p, gout %p, bout %p\n",
2517         order, dir, radius, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2518 
2519     if (order > D3DXSH_MAXORDER)
2520     {
2521         WARN("Order clamped at D3DXSH_MAXORDER\n");
2522         order = D3DXSH_MAXORDER;
2523     }
2524 
2525     if (radius < 0.0f)
2526         radius = -radius;
2527 
2528     dist = D3DXVec3Length(dir);
2529     clamped_angle = (dist <= radius) ? D3DX_PI / 2.0f : asinf(radius / dist);
2530 
2531     weightedcapintegrale(cap, order, clamped_angle);
2532     D3DXVec3Normalize(&normal, dir);
2533     D3DXSHEvalDirection(rout, order, &normal);
2534 
2535     for (i = 0; i < order; i++)
2536         for (j = 0; j < 2 * i + 1; j++)
2537         {
2538             index = i * i + j;
2539             temp = rout[index] * cap[i];
2540 
2541             rout[index] = temp * Rintensity;
2542             if (gout)
2543                 gout[index] = temp * Gintensity;
2544             if (bout)
2545                 bout[index] = temp * Bintensity;
2546         }
2547 
2548     return D3D_OK;
2549 }
2550 
2551 FLOAT * WINAPI D3DXSHMultiply2(FLOAT *out, const FLOAT *a, const FLOAT *b)
2552 {
2553     FLOAT ta, tb;
2554 
2555     TRACE("out %p, a %p, b %p\n", out, a, b);
2556 
2557     ta = 0.28209479f * a[0];
2558     tb = 0.28209479f * b[0];
2559 
2560     out[0] = 0.28209479f * D3DXSHDot(2, a, b);
2561     out[1] = ta * b[1] + tb * a[1];
2562     out[2] = ta * b[2] + tb * a[2];
2563     out[3] = ta * b[3] + tb * a[3];
2564 
2565     return out;
2566 }
2567 
2568 FLOAT * WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b)
2569 {
2570     FLOAT t, ta, tb;
2571 
2572     TRACE("out %p, a %p, b %p\n", out, a, b);
2573 
2574     out[0] = 0.28209479f * a[0] * b[0];
2575 
2576     ta = 0.28209479f * a[0] - 0.12615663f * a[6] - 0.21850969f * a[8];
2577     tb = 0.28209479f * b[0] - 0.12615663f * b[6] - 0.21850969f * b[8];
2578     out[1] = ta * b[1] + tb * a[1];
2579     t = a[1] * b[1];
2580     out[0] += 0.28209479f * t;
2581     out[6] = -0.12615663f * t;
2582     out[8] = -0.21850969f * t;
2583 
2584     ta = 0.21850969f * a[5];
2585     tb = 0.21850969f * b[5];
2586     out[1] += ta * b[2] + tb * a[2];
2587     out[2] = ta * b[1] + tb * a[1];
2588     t = a[1] * b[2] +a[2] * b[1];
2589     out[5] = 0.21850969f * t;
2590 
2591     ta = 0.21850969f * a[4];
2592     tb = 0.21850969f * b[4];
2593     out[1] += ta * b[3] + tb * a[3];
2594     out[3]  = ta * b[1] + tb * a[1];
2595     t = a[1] * b[3] + a[3] * b[1];
2596     out[4] = 0.21850969f * t;
2597 
2598     ta = 0.28209480f * a[0] + 0.25231326f * a[6];
2599     tb = 0.28209480f * b[0] + 0.25231326f * b[6];
2600     out[2] += ta * b[2] + tb * a[2];
2601     t = a[2] * b[2];
2602     out[0] += 0.28209480f * t;
2603     out[6] += 0.25231326f * t;
2604 
2605     ta = 0.21850969f * a[7];
2606     tb = 0.21850969f * b[7];
2607     out[2] += ta * b[3] + tb * a[3];
2608     out[3] += ta * b[2] + tb * a[2];
2609     t = a[2] * b[3] + a[3] * b[2];
2610     out[7] = 0.21850969f * t;
2611 
2612     ta = 0.28209479f * a[0] - 0.12615663f * a[6] + 0.21850969f * a[8];
2613     tb = 0.28209479f * b[0] - 0.12615663f * b[6] + 0.21850969f * b[8];
2614     out[3] += ta * b[3] + tb * a[3];
2615     t = a[3] * b[3];
2616     out[0] += 0.28209479f * t;
2617     out[6] -= 0.12615663f * t;
2618     out[8] += 0.21850969f * t;
2619 
2620     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2621     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2622     out[4] += ta * b[4] + tb * a[4];
2623     t = a[4] * b[4];
2624     out[0] += 0.28209479f * t;
2625     out[6] -= 0.18022375f * t;
2626 
2627     ta = 0.15607835f * a[7];
2628     tb = 0.15607835f * b[7];
2629     out[4] += ta * b[5] + tb * a[5];
2630     out[5] += ta * b[4] + tb * a[4];
2631     t = a[4] * b[5] + a[5] * b[4];
2632     out[7] += 0.15607835f * t;
2633 
2634     ta = 0.28209479f * a[0] + 0.09011188f * a[6] - 0.15607835f * a[8];
2635     tb = 0.28209479f * b[0] + 0.09011188f * b[6] - 0.15607835f * b[8];
2636     out[5] += ta * b[5] + tb * a[5];
2637     t = a[5] * b[5];
2638     out[0] += 0.28209479f * t;
2639     out[6] += 0.09011188f * t;
2640     out[8] -= 0.15607835f * t;
2641 
2642     ta = 0.28209480f * a[0];
2643     tb = 0.28209480f * b[0];
2644     out[6] += ta * b[6] + tb * a[6];
2645     t = a[6] * b[6];
2646     out[0] += 0.28209480f * t;
2647     out[6] += 0.18022376f * t;
2648 
2649     ta = 0.28209479f * a[0] + 0.09011188f * a[6] + 0.15607835f * a[8];
2650     tb = 0.28209479f * b[0] + 0.09011188f * b[6] + 0.15607835f * b[8];
2651     out[7] += ta * b[7] + tb * a[7];
2652     t = a[7] * b[7];
2653     out[0] += 0.28209479f * t;
2654     out[6] += 0.09011188f * t;
2655     out[8] += 0.15607835f * t;
2656 
2657     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2658     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2659     out[8] += ta * b[8] + tb * a[8];
2660     t = a[8] * b[8];
2661     out[0] += 0.28209479f * t;
2662     out[6] -= 0.18022375f * t;
2663 
2664     return out;
2665 }
2666 
2667 FLOAT * WINAPI D3DXSHMultiply4(FLOAT *out, const FLOAT *a, const FLOAT *b)
2668 {
2669     FLOAT ta, tb, t;
2670 
2671     TRACE("out %p, a %p, b %p\n", out, a, b);
2672 
2673     out[0] = 0.28209479f * a[0] * b[0];
2674 
2675     ta = 0.28209479f * a[0] - 0.12615663f * a[6] - 0.21850969f * a[8];
2676     tb = 0.28209479f * b[0] - 0.12615663f * b[6] - 0.21850969f * b[8];
2677     out[1] = ta * b[1] + tb * a[1];
2678     t = a[1] * b[1];
2679     out[0] += 0.28209479f * t;
2680     out[6] = -0.12615663f * t;
2681     out[8] = -0.21850969f * t;
2682 
2683     ta = 0.21850969f * a[3] - 0.05839917f * a[13] - 0.22617901f * a[15];
2684     tb = 0.21850969f * b[3] - 0.05839917f * b[13] - 0.22617901f * b[15];
2685     out[1] += ta * b[4] + tb * a[4];
2686     out[4] = ta * b[1] + tb * a[1];
2687     t = a[1] * b[4] + a[4] * b[1];
2688     out[3] = 0.21850969f * t;
2689     out[13] = -0.05839917f * t;
2690     out[15] = -0.22617901f * t;
2691 
2692     ta = 0.21850969f * a[2] - 0.14304817f * a[12] - 0.18467439f * a[14];
2693     tb = 0.21850969f * b[2] - 0.14304817f * b[12] - 0.18467439f * b[14];
2694     out[1] += ta * b[5] + tb * a[5];
2695     out[5] = ta * b[1] + tb * a[1];
2696     t = a[1] * b[5] + a[5] * b[1];
2697     out[2] = 0.21850969f * t;
2698     out[12] = -0.14304817f * t;
2699     out[14] = -0.18467439f * t;
2700 
2701     ta = 0.20230066f * a[11];
2702     tb = 0.20230066f * b[11];
2703     out[1] += ta * b[6] + tb * a[6];
2704     out[6] += ta * b[1] + tb * a[1];
2705     t = a[1] * b[6] + a[6] * b[1];
2706     out[11] = 0.20230066f * t;
2707 
2708     ta = 0.22617901f * a[9] + 0.05839917f * a[11];
2709     tb = 0.22617901f * b[9] + 0.05839917f * b[11];
2710     out[1] += ta * b[8] + tb * a[8];
2711     out[8] += ta * b[1] + tb * a[1];
2712     t = a[1] * b[8] + a[8] * b[1];
2713     out[9] = 0.22617901f * t;
2714     out[11] += 0.05839917f * t;
2715 
2716     ta = 0.28209480f * a[0] + 0.25231326f * a[6];
2717     tb = 0.28209480f * b[0] + 0.25231326f * b[6];
2718     out[2] += ta * b[2] + tb * a[2];
2719     t = a[2] * b[2];
2720     out[0] += 0.28209480f * t;
2721     out[6] += 0.25231326f * t;
2722 
2723     ta = 0.24776671f * a[12];
2724     tb = 0.24776671f * b[12];
2725     out[2] += ta * b[6] + tb * a[6];
2726     out[6] += ta * b[2] + tb * a[2];
2727     t = a[2] * b[6] + a[6] * b[2];
2728     out[12] += 0.24776671f * t;
2729 
2730     ta = 0.28209480f * a[0] - 0.12615663f * a[6] + 0.21850969f * a[8];
2731     tb = 0.28209480f * b[0] - 0.12615663f * b[6] + 0.21850969f * b[8];
2732     out[3] += ta * b[3] + tb * a[3];
2733     t = a[3] * b[3];
2734     out[0] += 0.28209480f * t;
2735     out[6] -= 0.12615663f * t;
2736     out[8] += 0.21850969f * t;
2737 
2738     ta = 0.20230066f * a[13];
2739     tb = 0.20230066f * b[13];
2740     out[3] += ta * b[6] + tb * a[6];
2741     out[6] += ta * b[3] + tb * a[3];
2742     t = a[3] * b[6] + a[6] * b[3];
2743     out[13] += 0.20230066f * t;
2744 
2745     ta = 0.21850969f * a[2] - 0.14304817f * a[12] + 0.18467439f * a[14];
2746     tb = 0.21850969f * b[2] - 0.14304817f * b[12] + 0.18467439f * b[14];
2747     out[3] += ta * b[7] + tb * a[7];
2748     out[7] = ta * b[3] + tb * a[3];
2749     t = a[3] * b[7] + a[7] * b[3];
2750     out[2] += 0.21850969f * t;
2751     out[12] -= 0.14304817f * t;
2752     out[14] += 0.18467439f * t;
2753 
2754     ta = -0.05839917f * a[13] + 0.22617901f * a[15];
2755     tb = -0.05839917f * b[13] + 0.22617901f * b[15];
2756     out[3] += ta * b[8] + tb * a[8];
2757     out[8] += ta * b[3] + tb * a[3];
2758     t = a[3] * b[8] + a[8] * b[3];
2759     out[13] -= 0.05839917f * t;
2760     out[15] += 0.22617901f * t;
2761 
2762     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2763     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2764     out[4] += ta * b[4] + tb * a[4];
2765     t = a[4] * b[4];
2766     out[0] += 0.28209479f * t;
2767     out[6] -= 0.18022375f * t;
2768 
2769     ta = 0.15607835f * a[7];
2770     tb = 0.15607835f * b[7];
2771     out[4] += ta * b[5] + tb * a[5];
2772     out[5] += ta * b[4] + tb * a[4];
2773     t = a[4] * b[5] + a[5] * b[4];
2774     out[7] += 0.15607835f * t;
2775 
2776     ta = 0.22617901f * a[3] - 0.09403160f * a[13];
2777     tb = 0.22617901f * b[3] - 0.09403160f * b[13];
2778     out[4] += ta * b[9] + tb * a[9];
2779     out[9] += ta * b[4] + tb * a[4];
2780     t = a[4] * b[9] + a[9] * b[4];
2781     out[3] += 0.22617901f * t;
2782     out[13] -= 0.09403160f * t;
2783 
2784     ta = 0.18467439f * a[2] - 0.18806319f * a[12];
2785     tb = 0.18467439f * b[2] - 0.18806319f * b[12];
2786     out[4] += ta * b[10] + tb * a [10];
2787     out[10] = ta * b[4] + tb * a[4];
2788     t = a[4] * b[10] + a[10] * b[4];
2789     out[2] += 0.18467439f * t;
2790     out[12] -= 0.18806319f * t;
2791 
2792     ta = -0.05839917f * a[3] + 0.14567312f * a[13] + 0.09403160f * a[15];
2793     tb = -0.05839917f * b[3] + 0.14567312f * b[13] + 0.09403160f * b[15];
2794     out[4] += ta * b[11] + tb * a[11];
2795     out[11] += ta * b[4] + tb * a[4];
2796     t = a[4] * b[11] + a[11] * b[4];
2797     out[3] -= 0.05839917f * t;
2798     out[13] += 0.14567312f * t;
2799     out[15] += 0.09403160f * t;
2800 
2801     ta = 0.28209479f * a[0] + 0.09011186f * a[6] - 0.15607835f * a[8];
2802     tb = 0.28209479f * b[0] + 0.09011186f * b[6] - 0.15607835f * b[8];
2803     out[5] += ta * b[5] + tb * a[5];
2804     t = a[5] * b[5];
2805     out[0] += 0.28209479f * t;
2806     out[6] += 0.09011186f * t;
2807     out[8] -= 0.15607835f * t;
2808 
2809     ta = 0.14867701f * a[14];
2810     tb = 0.14867701f * b[14];
2811     out[5] += ta * b[9] + tb * a[9];
2812     out[9] += ta * b[5] + tb * a[5];
2813     t = a[5] * b[9] + a[9] * b[5];
2814     out[14] += 0.14867701f * t;
2815 
2816     ta = 0.18467439f * a[3] + 0.11516472f * a[13] - 0.14867701f * a[15];
2817     tb = 0.18467439f * b[3] + 0.11516472f * b[13] - 0.14867701f * b[15];
2818     out[5] += ta * b[10] + tb * a[10];
2819     out[10] += ta * b[5] + tb * a[5];
2820     t = a[5] * b[10] + a[10] * b[5];
2821     out[3] += 0.18467439f * t;
2822     out[13] += 0.11516472f * t;
2823     out[15] -= 0.14867701f * t;
2824 
2825     ta = 0.23359668f * a[2] + 0.05947080f * a[12] - 0.11516472f * a[14];
2826     tb = 0.23359668f * b[2] + 0.05947080f * b[12] - 0.11516472f * b[14];
2827     out[5] += ta * b[11] + tb * a[11];
2828     out[11] += ta * b[5] + tb * a[5];
2829     t = a[5] * b[11] + a[11] * b[5];
2830     out[2] += 0.23359668f * t;
2831     out[12] += 0.05947080f * t;
2832     out[14] -= 0.11516472f * t;
2833 
2834     ta = 0.28209479f * a[0];
2835     tb = 0.28209479f * b[0];
2836     out[6] += ta * b[6] + tb * a[6];
2837     t = a[6] * b[6];
2838     out[0] += 0.28209479f * t;
2839     out[6] += 0.18022376f * t;
2840 
2841     ta = 0.09011186f * a[6] + 0.28209479f * a[0] + 0.15607835f * a[8];
2842     tb = 0.09011186f * b[6] + 0.28209479f * b[0] + 0.15607835f * b[8];
2843     out[7] += ta * b[7] + tb * a[7];
2844     t = a[7] * b[7];
2845     out[6] += 0.09011186f * t;
2846     out[0] += 0.28209479f * t;
2847     out[8] += 0.15607835f * t;
2848 
2849     ta = 0.14867701f * a[9] + 0.18467439f * a[1] + 0.11516472f * a[11];
2850     tb = 0.14867701f * b[9] + 0.18467439f * b[1] + 0.11516472f * b[11];
2851     out[7] += ta * b[10] + tb * a[10];
2852     out[10] += ta * b[7] + tb * a[7];
2853     t = a[7] * b[10] + a[10] * b[7];
2854     out[9] += 0.14867701f * t;
2855     out[1] += 0.18467439f * t;
2856     out[11] += 0.11516472f * t;
2857 
2858     ta = 0.05947080f * a[12] + 0.23359668f * a[2] + 0.11516472f * a[14];
2859     tb = 0.05947080f * b[12] + 0.23359668f * b[2] + 0.11516472f * b[14];
2860     out[7] += ta * b[13] + tb * a[13];
2861     out[13] += ta * b[7]+ tb * a[7];
2862     t = a[7] * b[13] + a[13] * b[7];
2863     out[12] += 0.05947080f * t;
2864     out[2] += 0.23359668f * t;
2865     out[14] += 0.11516472f * t;
2866 
2867     ta = 0.14867701f * a[15];
2868     tb = 0.14867701f * b[15];
2869     out[7] += ta * b[14] + tb * a[14];
2870     out[14] += ta * b[7] + tb * a[7];
2871     t = a[7] * b[14] + a[14] * b[7];
2872     out[15] += 0.14867701f * t;
2873 
2874     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2875     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2876     out[8] += ta * b[8] + tb * a[8];
2877     t = a[8] * b[8];
2878     out[0] += 0.28209479f * t;
2879     out[6] -= 0.18022375f * t;
2880 
2881     ta = -0.09403160f * a[11];
2882     tb = -0.09403160f * b[11];
2883     out[8] += ta * b[9] + tb * a[9];
2884     out[9] += ta * b[8] + tb * a[8];
2885     t = a[8] * b[9] + a[9] * b[8];
2886     out[11] -= 0.09403160f * t;
2887 
2888     ta = -0.09403160f * a[15];
2889     tb = -0.09403160f * b[15];
2890     out[8] += ta * b[13] + tb * a[13];
2891     out[13] += ta * b[8] + tb * a[8];
2892     t = a[8] * b[13] + a[13] * b[8];
2893     out[15] -= 0.09403160f * t;
2894 
2895     ta = 0.18467439f * a[2] - 0.18806319f * a[12];
2896     tb = 0.18467439f * b[2] - 0.18806319f * b[12];
2897     out[8] += ta * b[14] + tb * a[14];
2898     out[14] += ta * b[8] + tb * a[8];
2899     t = a[8] * b[14] + a[14] * b[8];
2900     out[2] += 0.18467439f * t;
2901     out[12] -= 0.18806319f * t;
2902 
2903     ta = -0.21026104f * a[6] + 0.28209479f * a[0];
2904     tb = -0.21026104f * b[6] + 0.28209479f * b[0];
2905     out[9] += ta * b[9] + tb * a[9];
2906     t = a[9] * b[9];
2907     out[6] -= 0.21026104f * t;
2908     out[0] += 0.28209479f * t;
2909 
2910     ta = 0.28209479f * a[0];
2911     tb = 0.28209479f * b[0];
2912     out[10] += ta * b[10] + tb * a[10];
2913     t = a[10] * b[10];
2914     out[0] += 0.28209479f * t;
2915 
2916     ta = 0.28209479f * a[0] + 0.12615663f * a[6] - 0.14567312f * a[8];
2917     tb = 0.28209479f * b[0] + 0.12615663f * b[6] - 0.14567312f * b[8];
2918     out[11] += ta * b[11] + tb * a[11];
2919     t = a[11] * b[11];
2920     out[0] += 0.28209479f * t;
2921     out[6] += 0.12615663f * t;
2922     out[8] -= 0.14567312f * t;
2923 
2924     ta = 0.28209479f * a[0] + 0.16820885f * a[6];
2925     tb = 0.28209479f * b[0] + 0.16820885f * b[6];
2926     out[12] += ta * b[12] + tb * a[12];
2927     t = a[12] * b[12];
2928     out[0] += 0.28209479f * t;
2929     out[6] += 0.16820885f * t;
2930 
2931     ta =0.28209479f * a[0] + 0.14567312f * a[8] + 0.12615663f * a[6];
2932     tb =0.28209479f * b[0] + 0.14567312f * b[8] + 0.12615663f * b[6];
2933     out[13] += ta * b[13] + tb * a[13];
2934     t = a[13] * b[13];
2935     out[0] += 0.28209479f * t;
2936     out[8] += 0.14567312f * t;
2937     out[6] += 0.12615663f * t;
2938 
2939     ta = 0.28209479f * a[0];
2940     tb = 0.28209479f * b[0];
2941     out[14] += ta * b[14] + tb * a[14];
2942     t = a[14] * b[14];
2943     out[0] += 0.28209479f * t;
2944 
2945     ta = 0.28209479f * a[0] - 0.21026104f * a[6];
2946     tb = 0.28209479f * b[0] - 0.21026104f * b[6];
2947     out[15] += ta * b[15] + tb * a[15];
2948     t = a[15] * b[15];
2949     out[0] += 0.28209479f * t;
2950     out[6] -= 0.21026104f * t;
2951 
2952     return out;
2953 }
2954 
2955 static void rotate_X(FLOAT *out, UINT order, FLOAT a, FLOAT *in)
2956 {
2957     out[0] = in[0];
2958 
2959     out[1] = a * in[2];
2960     out[2] = -a * in[1];
2961     out[3] = in[3];
2962 
2963     out[4] = a * in[7];
2964     out[5] = -in[5];
2965     out[6] = -0.5f * in[6] - 0.8660253882f * in[8];
2966     out[7] = -a * in[4];
2967     out[8] = -0.8660253882f * in[6] + 0.5f * in[8];
2968     out[9] = -a * 0.7905694842f * in[12] + a * 0.6123724580f * in[14];
2969 
2970     out[10] = -in[10];
2971     out[11] = -a * 0.6123724580f * in[12] - a * 0.7905694842f * in[14];
2972     out[12] = a * 0.7905694842f * in[9] + a * 0.6123724580f * in[11];
2973     out[13] = -0.25f * in[13] - 0.9682458639f * in[15];
2974     out[14] = -a * 0.6123724580f * in[9] + a * 0.7905694842f * in[11];
2975     out[15] = -0.9682458639f * in[13] + 0.25f * in[15];
2976     if (order == 4)
2977         return;
2978 
2979     out[16] = -a * 0.9354143739f * in[21] + a * 0.3535533845f * in[23];
2980     out[17] = -0.75f * in[17] + 0.6614378095f * in[19];
2981     out[18] = -a * 0.3535533845f * in[21] - a * 0.9354143739f * in[23];
2982     out[19] = 0.6614378095f * in[17] + 0.75f * in[19];
2983     out[20] = 0.375f * in[20] + 0.5590170026f * in[22] + 0.7395099998f * in[24];
2984     out[21] = a * 0.9354143739f * in[16] + a * 0.3535533845f * in[18];
2985     out[22] = 0.5590170026f * in[20] + 0.5f * in[22] - 0.6614378691f * in[24];
2986     out[23] = -a * 0.3535533845f * in[16] + a * 0.9354143739f * in[18];
2987     out[24] = 0.7395099998f * in[20] - 0.6614378691f * in[22] + 0.125f * in[24];
2988     if (order == 5)
2989         return;
2990 
2991     out[25] = a * 0.7015607357f * in[30] - a * 0.6846531630f * in[32] + a * 0.1976423711f * in[34];
2992     out[26] = -0.5f * in[26] + 0.8660253882f * in[28];
2993     out[27] = a * 0.5229125023f * in[30] + a * 0.3061861992f * in[32] - a * 0.7954951525f * in[34];
2994     out[28] = 0.8660253882f * in[26] + 0.5f * in[28];
2995     out[29] = a * 0.4841229022f * in[30] + a * 0.6614378691f * in[32] + a * 0.5728219748f * in[34];
2996     out[30] = -a * 0.7015607357f * in[25] - a * 0.5229125023f * in[27] - a * 0.4841229022f * in[29];
2997     out[31] = 0.125f * in[31] + 0.4050463140f * in[33] + 0.9057110548f * in[35];
2998     out[32] = a * 0.6846531630f * in[25] - a * 0.3061861992f * in[27] - a * 0.6614378691f * in[29];
2999     out[33] = 0.4050463140f * in[31] + 0.8125f * in[33] - 0.4192627370f * in[35];
3000     out[34] = -a * 0.1976423711f * in[25] + a * 0.7954951525f * in[27] - a * 0.5728219748f * in[29];
3001     out[35] = 0.9057110548f * in[31] - 0.4192627370f * in[33] + 0.0624999329f * in[35];
3002 }
3003 
3004 FLOAT* WINAPI D3DXSHRotate(FLOAT *out, UINT order, const D3DXMATRIX *matrix, const FLOAT *in)
3005 {
3006     FLOAT alpha, beta, gamma, sinb, temp[36], temp1[36];
3007 
3008     TRACE("out %p, order %u, matrix %p, in %p\n", out, order, matrix, in);
3009 
3010     out[0] = in[0];
3011 
3012     if ((order > D3DXSH_MAXORDER) || (order < D3DXSH_MINORDER))
3013         return out;
3014 
3015     if (order <= 3)
3016     {
3017         out[1] = matrix->u.m[1][1] * in[1] - matrix->u.m[2][1] * in[2] + matrix->u.m[0][1] * in[3];
3018         out[2] = -matrix->u.m[1][2] * in[1] + matrix->u.m[2][2] * in[2] - matrix->u.m[0][2] * in[3];
3019         out[3] = matrix->u.m[1][0] * in[1] - matrix->u.m[2][0] * in[2] + matrix->u.m[0][0] * in[3];
3020 
3021         if (order == 3)
3022         {
3023             FLOAT coeff[]={
3024                 matrix->u.m[1][0] * matrix->u.m[0][0], matrix->u.m[1][1] * matrix->u.m[0][1],
3025                 matrix->u.m[1][1] * matrix->u.m[2][1], matrix->u.m[1][0] * matrix->u.m[2][0],
3026                 matrix->u.m[2][0] * matrix->u.m[2][0], matrix->u.m[2][1] * matrix->u.m[2][1],
3027                 matrix->u.m[0][0] * matrix->u.m[2][0], matrix->u.m[0][1] * matrix->u.m[2][1],
3028                 matrix->u.m[0][1] * matrix->u.m[0][1], matrix->u.m[1][0] * matrix->u.m[1][0],
3029                 matrix->u.m[1][1] * matrix->u.m[1][1], matrix->u.m[0][0] * matrix->u.m[0][0], };
3030 
3031             out[4] = (matrix->u.m[1][1] * matrix->u.m[0][0] + matrix->u.m[0][1] * matrix->u.m[1][0]) * in[4];
3032             out[4] -= (matrix->u.m[1][0] * matrix->u.m[2][1] + matrix->u.m[1][1] * matrix->u.m[2][0]) * in[5];
3033             out[4] += 1.7320508076f * matrix->u.m[2][0] * matrix->u.m[2][1] * in[6];
3034             out[4] -= (matrix->u.m[0][1] * matrix->u.m[2][0] + matrix->u.m[0][0] * matrix->u.m[2][1]) * in[7];
3035             out[4] += (matrix->u.m[0][0] * matrix->u.m[0][1] - matrix->u.m[1][0] * matrix->u.m[1][1]) * in[8];
3036 
3037             out[5] = (matrix->u.m[1][1] * matrix->u.m[2][2] + matrix->u.m[1][2] * matrix->u.m[2][1]) * in[5];
3038             out[5] -= (matrix->u.m[1][1] * matrix->u.m[0][2] + matrix->u.m[1][2] * matrix->u.m[0][1]) * in[4];
3039             out[5] -= 1.7320508076f * matrix->u.m[2][2] * matrix->u.m[2][1] * in[6];
3040             out[5] += (matrix->u.m[0][2] * matrix->u.m[2][1] + matrix->u.m[0][1] * matrix->u.m[2][2]) * in[7];
3041             out[5] -= (matrix->u.m[0][1] * matrix->u.m[0][2] - matrix->u.m[1][1] * matrix->u.m[1][2]) * in[8];
3042 
3043             out[6] = (matrix->u.m[2][2] * matrix->u.m[2][2] - 0.5f * (coeff[4] + coeff[5])) * in[6];
3044             out[6] -= (0.5773502692f * (coeff[0] + coeff[1]) - 1.1547005384f * matrix->u.m[1][2] * matrix->u.m[0][2]) * in[4];
3045             out[6] += (0.5773502692f * (coeff[2] + coeff[3]) - 1.1547005384f * matrix->u.m[1][2] * matrix->u.m[2][2]) * in[5];
3046             out[6] += (0.5773502692f * (coeff[6] + coeff[7]) - 1.1547005384f * matrix->u.m[0][2] * matrix->u.m[2][2]) * in[7];
3047             out[6] += (0.2886751347f * (coeff[9] - coeff[8] + coeff[10] - coeff[11]) - 0.5773502692f *
3048                   (matrix->u.m[1][2] * matrix->u.m[1][2] - matrix->u.m[0][2] * matrix->u.m[0][2])) * in[8];
3049 
3050             out[7] = (matrix->u.m[0][0] * matrix->u.m[2][2] + matrix->u.m[0][2] * matrix->u.m[2][0]) * in[7];
3051             out[7] -= (matrix->u.m[1][0] * matrix->u.m[0][2] + matrix->u.m[1][2] * matrix->u.m[0][0]) * in[4];
3052             out[7] += (matrix->u.m[1][0] * matrix->u.m[2][2] + matrix->u.m[1][2] * matrix->u.m[2][0]) * in[5];
3053             out[7] -= 1.7320508076f * matrix->u.m[2][2] * matrix->u.m[2][0] * in[6];
3054             out[7] -= (matrix->u.m[0][0] * matrix->u.m[0][2] - matrix->u.m[1][0] * matrix->u.m[1][2]) * in[8];
3055 
3056             out[8] = 0.5f * (coeff[11] - coeff[8] - coeff[9] + coeff[10]) * in[8];
3057             out[8] += (coeff[0] - coeff[1]) * in[4];
3058             out[8] += (coeff[2] - coeff[3]) * in[5];
3059             out[8] += 0.86602540f * (coeff[4] - coeff[5]) * in[6];
3060             out[8] += (coeff[7] - coeff[6]) * in[7];
3061         }
3062 
3063         return out;
3064     }
3065 
3066     if (fabsf(matrix->u.m[2][2]) != 1.0f)
3067     {
3068         sinb = sqrtf(1.0f - matrix->u.m[2][2] * matrix->u.m[2][2]);
3069         alpha = atan2f(matrix->u.m[2][1] / sinb, matrix->u.m[2][0] / sinb);
3070         beta = atan2f(sinb, matrix->u.m[2][2]);
3071         gamma = atan2f(matrix->u.m[1][2] / sinb, -matrix->u.m[0][2] / sinb);
3072     }
3073     else
3074     {
3075         alpha = atan2f(matrix->u.m[0][1], matrix->u.m[0][0]);
3076         beta = 0.0f;
3077         gamma = 0.0f;
3078     }
3079 
3080     D3DXSHRotateZ(temp, order, gamma, in);
3081     rotate_X(temp1, order, 1.0f, temp);
3082     D3DXSHRotateZ(temp, order, beta, temp1);
3083     rotate_X(temp1, order, -1.0f, temp);
3084     D3DXSHRotateZ(out, order, alpha, temp1);
3085 
3086     return out;
3087 }
3088 
3089 FLOAT * WINAPI D3DXSHRotateZ(FLOAT *out, UINT order, FLOAT angle, const FLOAT *in)
3090 {
3091     UINT i, sum = 0;
3092     FLOAT c[5], s[5];
3093 
3094     TRACE("out %p, order %u, angle %f, in %p\n", out, order, angle, in);
3095 
3096     order = min(max(order, D3DXSH_MINORDER), D3DXSH_MAXORDER);
3097 
3098     out[0] = in[0];
3099 
3100     for (i = 1; i < order; i++)
3101     {
3102         UINT j;
3103 
3104         c[i - 1] = cosf(i * angle);
3105         s[i - 1] = sinf(i * angle);
3106         sum += i * 2;
3107 
3108         out[sum - i] = c[i - 1] * in[sum - i];
3109         out[sum - i] += s[i - 1] * in[sum + i];
3110         for (j = i - 1; j > 0; j--)
3111         {
3112             out[sum - j] = 0.0f;
3113             out[sum - j] = c[j - 1] * in[sum - j];
3114             out[sum - j] += s[j - 1] * in[sum + j];
3115         }
3116 
3117         if (in == out)
3118             out[sum] = 0.0f;
3119         else
3120             out[sum] = in[sum];
3121 
3122         for (j = 1; j < i; j++)
3123         {
3124             out[sum + j] = 0.0f;
3125             out[sum + j] = -s[j - 1] * in[sum - j];
3126             out[sum + j] += c[j - 1] * in[sum + j];
3127         }
3128         out[sum + i] = -s[i - 1] * in[sum - i];
3129         out[sum + i] += c[i - 1] * in[sum + i];
3130     }
3131 
3132     return out;
3133 }
3134 
3135 FLOAT* WINAPI D3DXSHScale(FLOAT *out, UINT order, const FLOAT *a, const FLOAT scale)
3136 {
3137     UINT i;
3138 
3139     TRACE("out %p, order %u, a %p, scale %f\n", out, order, a, scale);
3140 
3141     for (i = 0; i < order * order; i++)
3142         out[i] = a[i] * scale;
3143 
3144     return out;
3145 }
3146