xref: /reactos/dll/directx/wine/d3dx9_36/math.c (revision b99f0b49)
1 /*
2  * Mathematical operations specific to D3DX9.
3  *
4  * Copyright (C) 2008 David Adam
5  * Copyright (C) 2008 Luis Busquets
6  * Copyright (C) 2008 Jérôme Gardou
7  * Copyright (C) 2008 Philip Nilsson
8  * Copyright (C) 2008 Henri Verbeet
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24 
25 #include "d3dx9_36_private.h"
26 
27 struct ID3DXMatrixStackImpl
28 {
29   ID3DXMatrixStack ID3DXMatrixStack_iface;
30   LONG ref;
31 
32   unsigned int current;
33   unsigned int stack_size;
34   D3DXMATRIX *stack;
35 };
36 
37 static const unsigned int INITIAL_STACK_SIZE = 32;
38 
39 /*_________________D3DXColor____________________*/
40 
41 D3DXCOLOR* WINAPI D3DXColorAdjustContrast(D3DXCOLOR *pout, const D3DXCOLOR *pc, FLOAT s)
42 {
43     TRACE("pout %p, pc %p, s %f\n", pout, pc, s);
44 
45     pout->r = 0.5f + s * (pc->r - 0.5f);
46     pout->g = 0.5f + s * (pc->g - 0.5f);
47     pout->b = 0.5f + s * (pc->b - 0.5f);
48     pout->a = pc->a;
49     return pout;
50 }
51 
52 D3DXCOLOR* WINAPI D3DXColorAdjustSaturation(D3DXCOLOR *pout, const D3DXCOLOR *pc, FLOAT s)
53 {
54     FLOAT grey;
55 
56     TRACE("pout %p, pc %p, s %f\n", pout, pc, s);
57 
58     grey = pc->r * 0.2125f + pc->g * 0.7154f + pc->b * 0.0721f;
59     pout->r = grey + s * (pc->r - grey);
60     pout->g = grey + s * (pc->g - grey);
61     pout->b = grey + s * (pc->b - grey);
62     pout->a = pc->a;
63     return pout;
64 }
65 
66 /*_________________Misc__________________________*/
67 
68 FLOAT WINAPI D3DXFresnelTerm(FLOAT costheta, FLOAT refractionindex)
69 {
70     FLOAT a, d, g, result;
71 
72     TRACE("costheta %f, refractionindex %f\n", costheta, refractionindex);
73 
74     g = sqrtf(refractionindex * refractionindex + costheta * costheta - 1.0f);
75     a = g + costheta;
76     d = g - costheta;
77     result = (costheta * a - 1.0f) * (costheta * a - 1.0f) / ((costheta * d + 1.0f) * (costheta * d + 1.0f)) + 1.0f;
78     result *= 0.5f * d * d / (a * a);
79 
80     return result;
81 }
82 
83 /*_________________D3DXMatrix____________________*/
84 
85 D3DXMATRIX * WINAPI D3DXMatrixAffineTransformation(D3DXMATRIX *out, FLOAT scaling, const D3DXVECTOR3 *rotationcenter,
86         const D3DXQUATERNION *rotation, const D3DXVECTOR3 *translation)
87 {
88     TRACE("out %p, scaling %f, rotationcenter %p, rotation %p, translation %p\n",
89             out, scaling, rotationcenter, rotation, translation);
90 
91     D3DXMatrixIdentity(out);
92 
93     if (rotation)
94     {
95         FLOAT temp00, temp01, temp02, temp10, temp11, temp12, temp20, temp21, temp22;
96 
97         temp00 = 1.0f - 2.0f * (rotation->y * rotation->y + rotation->z * rotation->z);
98         temp01 = 2.0f * (rotation->x * rotation->y + rotation->z * rotation->w);
99         temp02 = 2.0f * (rotation->x * rotation->z - rotation->y * rotation->w);
100         temp10 = 2.0f * (rotation->x * rotation->y - rotation->z * rotation->w);
101         temp11 = 1.0f - 2.0f * (rotation->x * rotation->x + rotation->z * rotation->z);
102         temp12 = 2.0f * (rotation->y * rotation->z + rotation->x * rotation->w);
103         temp20 = 2.0f * (rotation->x * rotation->z + rotation->y * rotation->w);
104         temp21 = 2.0f * (rotation->y * rotation->z - rotation->x * rotation->w);
105         temp22 = 1.0f - 2.0f * (rotation->x * rotation->x + rotation->y * rotation->y);
106 
107         out->u.m[0][0] = scaling * temp00;
108         out->u.m[0][1] = scaling * temp01;
109         out->u.m[0][2] = scaling * temp02;
110         out->u.m[1][0] = scaling * temp10;
111         out->u.m[1][1] = scaling * temp11;
112         out->u.m[1][2] = scaling * temp12;
113         out->u.m[2][0] = scaling * temp20;
114         out->u.m[2][1] = scaling * temp21;
115         out->u.m[2][2] = scaling * temp22;
116 
117         if (rotationcenter)
118         {
119             out->u.m[3][0] = rotationcenter->x * (1.0f - temp00) - rotationcenter->y * temp10
120                     - rotationcenter->z * temp20;
121             out->u.m[3][1] = rotationcenter->y * (1.0f - temp11) - rotationcenter->x * temp01
122                     - rotationcenter->z * temp21;
123             out->u.m[3][2] = rotationcenter->z * (1.0f - temp22) - rotationcenter->x * temp02
124                     - rotationcenter->y * temp12;
125         }
126     }
127     else
128     {
129         out->u.m[0][0] = scaling;
130         out->u.m[1][1] = scaling;
131         out->u.m[2][2] = scaling;
132     }
133 
134     if (translation)
135     {
136         out->u.m[3][0] += translation->x;
137         out->u.m[3][1] += translation->y;
138         out->u.m[3][2] += translation->z;
139     }
140 
141     return out;
142 }
143 
144 D3DXMATRIX * WINAPI D3DXMatrixAffineTransformation2D(D3DXMATRIX *out, FLOAT scaling,
145         const D3DXVECTOR2 *rotationcenter, FLOAT rotation, const D3DXVECTOR2 *translation)
146 {
147     FLOAT tmp1, tmp2, s;
148 
149     TRACE("out %p, scaling %f, rotationcenter %p, rotation %f, translation %p\n",
150             out, scaling, rotationcenter, rotation, translation);
151 
152     s = sinf(rotation / 2.0f);
153     tmp1 = 1.0f - 2.0f * s * s;
154     tmp2 = 2.0f * s * cosf(rotation / 2.0f);
155 
156     D3DXMatrixIdentity(out);
157     out->u.m[0][0] = scaling * tmp1;
158     out->u.m[0][1] = scaling * tmp2;
159     out->u.m[1][0] = -scaling * tmp2;
160     out->u.m[1][1] = scaling * tmp1;
161 
162     if (rotationcenter)
163     {
164         FLOAT x, y;
165 
166         x = rotationcenter->x;
167         y = rotationcenter->y;
168 
169         out->u.m[3][0] = y * tmp2 - x * tmp1 + x;
170         out->u.m[3][1] = -x * tmp2 - y * tmp1 + y;
171     }
172 
173     if (translation)
174     {
175         out->u.m[3][0] += translation->x;
176         out->u.m[3][1] += translation->y;
177     }
178 
179     return out;
180 }
181 
182 HRESULT WINAPI D3DXMatrixDecompose(D3DXVECTOR3 *poutscale, D3DXQUATERNION *poutrotation, D3DXVECTOR3 *pouttranslation, const D3DXMATRIX *pm)
183 {
184     D3DXMATRIX normalized;
185     D3DXVECTOR3 vec;
186 
187     TRACE("poutscale %p, poutrotation %p, pouttranslation %p, pm %p\n", poutscale, poutrotation, pouttranslation, pm);
188 
189     /*Compute the scaling part.*/
190     vec.x=pm->u.m[0][0];
191     vec.y=pm->u.m[0][1];
192     vec.z=pm->u.m[0][2];
193     poutscale->x=D3DXVec3Length(&vec);
194 
195     vec.x=pm->u.m[1][0];
196     vec.y=pm->u.m[1][1];
197     vec.z=pm->u.m[1][2];
198     poutscale->y=D3DXVec3Length(&vec);
199 
200     vec.x=pm->u.m[2][0];
201     vec.y=pm->u.m[2][1];
202     vec.z=pm->u.m[2][2];
203     poutscale->z=D3DXVec3Length(&vec);
204 
205     /*Compute the translation part.*/
206     pouttranslation->x=pm->u.m[3][0];
207     pouttranslation->y=pm->u.m[3][1];
208     pouttranslation->z=pm->u.m[3][2];
209 
210     /*Let's calculate the rotation now*/
211     if ( (poutscale->x == 0.0f) || (poutscale->y == 0.0f) || (poutscale->z == 0.0f) ) return D3DERR_INVALIDCALL;
212 
213     normalized.u.m[0][0]=pm->u.m[0][0]/poutscale->x;
214     normalized.u.m[0][1]=pm->u.m[0][1]/poutscale->x;
215     normalized.u.m[0][2]=pm->u.m[0][2]/poutscale->x;
216     normalized.u.m[1][0]=pm->u.m[1][0]/poutscale->y;
217     normalized.u.m[1][1]=pm->u.m[1][1]/poutscale->y;
218     normalized.u.m[1][2]=pm->u.m[1][2]/poutscale->y;
219     normalized.u.m[2][0]=pm->u.m[2][0]/poutscale->z;
220     normalized.u.m[2][1]=pm->u.m[2][1]/poutscale->z;
221     normalized.u.m[2][2]=pm->u.m[2][2]/poutscale->z;
222 
223     D3DXQuaternionRotationMatrix(poutrotation,&normalized);
224     return S_OK;
225 }
226 
227 FLOAT WINAPI D3DXMatrixDeterminant(const D3DXMATRIX *pm)
228 {
229     FLOAT t[3], v[4];
230 
231     TRACE("pm %p\n", pm);
232 
233     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
234     t[1] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
235     t[2] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
236     v[0] = pm->u.m[1][1] * t[0] - pm->u.m[2][1] * t[1] + pm->u.m[3][1] * t[2];
237     v[1] = -pm->u.m[1][0] * t[0] + pm->u.m[2][0] * t[1] - pm->u.m[3][0] * t[2];
238 
239     t[0] = pm->u.m[1][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[1][1];
240     t[1] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
241     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
242     v[2] = pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1] + pm->u.m[1][3] * t[2];
243     v[3] = -pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] - pm->u.m[1][2] * t[2];
244 
245     return pm->u.m[0][0] * v[0] + pm->u.m[0][1] * v[1] +
246         pm->u.m[0][2] * v[2] + pm->u.m[0][3] * v[3];
247 }
248 
249 D3DXMATRIX* WINAPI D3DXMatrixInverse(D3DXMATRIX *pout, FLOAT *pdeterminant, const D3DXMATRIX *pm)
250 {
251     FLOAT det, t[3], v[16];
252     UINT i, j;
253 
254     TRACE("pout %p, pdeterminant %p, pm %p\n", pout, pdeterminant, pm);
255 
256     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
257     t[1] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
258     t[2] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
259     v[0] = pm->u.m[1][1] * t[0] - pm->u.m[2][1] * t[1] + pm->u.m[3][1] * t[2];
260     v[4] = -pm->u.m[1][0] * t[0] + pm->u.m[2][0] * t[1] - pm->u.m[3][0] * t[2];
261 
262     t[0] = pm->u.m[1][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[1][1];
263     t[1] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
264     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
265     v[8] = pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1] + pm->u.m[1][3] * t[2];
266     v[12] = -pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] - pm->u.m[1][2] * t[2];
267 
268     det = pm->u.m[0][0] * v[0] + pm->u.m[0][1] * v[4] +
269         pm->u.m[0][2] * v[8] + pm->u.m[0][3] * v[12];
270     if (det == 0.0f)
271         return NULL;
272     if (pdeterminant)
273         *pdeterminant = det;
274 
275     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
276     t[1] = pm->u.m[0][2] * pm->u.m[3][3] - pm->u.m[0][3] * pm->u.m[3][2];
277     t[2] = pm->u.m[0][2] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][2];
278     v[1] = -pm->u.m[0][1] * t[0] + pm->u.m[2][1] * t[1] - pm->u.m[3][1] * t[2];
279     v[5] = pm->u.m[0][0] * t[0] - pm->u.m[2][0] * t[1] + pm->u.m[3][0] * t[2];
280 
281     t[0] = pm->u.m[0][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[0][1];
282     t[1] = pm->u.m[3][0] * pm->u.m[0][1] - pm->u.m[0][0] * pm->u.m[3][1];
283     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
284     v[9] = -pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1]- pm->u.m[0][3] * t[2];
285     v[13] = pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] + pm->u.m[0][2] * t[2];
286 
287     t[0] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
288     t[1] = pm->u.m[0][2] * pm->u.m[3][3] - pm->u.m[0][3] * pm->u.m[3][2];
289     t[2] = pm->u.m[0][2] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][2];
290     v[2] = pm->u.m[0][1] * t[0] - pm->u.m[1][1] * t[1] + pm->u.m[3][1] * t[2];
291     v[6] = -pm->u.m[0][0] * t[0] + pm->u.m[1][0] * t[1] - pm->u.m[3][0] * t[2];
292 
293     t[0] = pm->u.m[0][0] * pm->u.m[1][1] - pm->u.m[1][0] * pm->u.m[0][1];
294     t[1] = pm->u.m[3][0] * pm->u.m[0][1] - pm->u.m[0][0] * pm->u.m[3][1];
295     t[2] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
296     v[10] = pm->u.m[3][3] * t[0] + pm->u.m[1][3] * t[1] + pm->u.m[0][3] * t[2];
297     v[14] = -pm->u.m[3][2] * t[0] - pm->u.m[1][2] * t[1] - pm->u.m[0][2] * t[2];
298 
299     t[0] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
300     t[1] = pm->u.m[0][2] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][2];
301     t[2] = pm->u.m[0][2] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][2];
302     v[3] = -pm->u.m[0][1] * t[0] + pm->u.m[1][1] * t[1] - pm->u.m[2][1] * t[2];
303     v[7] = pm->u.m[0][0] * t[0] - pm->u.m[1][0] * t[1] + pm->u.m[2][0] * t[2];
304 
305     v[11] = -pm->u.m[0][0] * (pm->u.m[1][1] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][1]) +
306         pm->u.m[1][0] * (pm->u.m[0][1] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][1]) -
307         pm->u.m[2][0] * (pm->u.m[0][1] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][1]);
308 
309     v[15] = pm->u.m[0][0] * (pm->u.m[1][1] * pm->u.m[2][2] - pm->u.m[1][2] * pm->u.m[2][1]) -
310         pm->u.m[1][0] * (pm->u.m[0][1] * pm->u.m[2][2] - pm->u.m[0][2] * pm->u.m[2][1]) +
311         pm->u.m[2][0] * (pm->u.m[0][1] * pm->u.m[1][2] - pm->u.m[0][2] * pm->u.m[1][1]);
312 
313     det = 1.0f / det;
314 
315     for (i = 0; i < 4; i++)
316         for (j = 0; j < 4; j++)
317             pout->u.m[i][j] = v[4 * i + j] * det;
318 
319     return pout;
320 }
321 
322 D3DXMATRIX * WINAPI D3DXMatrixLookAtLH(D3DXMATRIX *out, const D3DXVECTOR3 *eye, const D3DXVECTOR3 *at,
323         const D3DXVECTOR3 *up)
324 {
325     D3DXVECTOR3 right, upn, vec;
326 
327     TRACE("out %p, eye %p, at %p, up %p\n", out, eye, at, up);
328 
329     D3DXVec3Subtract(&vec, at, eye);
330     D3DXVec3Normalize(&vec, &vec);
331     D3DXVec3Cross(&right, up, &vec);
332     D3DXVec3Cross(&upn, &vec, &right);
333     D3DXVec3Normalize(&right, &right);
334     D3DXVec3Normalize(&upn, &upn);
335     out->u.m[0][0] = right.x;
336     out->u.m[1][0] = right.y;
337     out->u.m[2][0] = right.z;
338     out->u.m[3][0] = -D3DXVec3Dot(&right, eye);
339     out->u.m[0][1] = upn.x;
340     out->u.m[1][1] = upn.y;
341     out->u.m[2][1] = upn.z;
342     out->u.m[3][1] = -D3DXVec3Dot(&upn, eye);
343     out->u.m[0][2] = vec.x;
344     out->u.m[1][2] = vec.y;
345     out->u.m[2][2] = vec.z;
346     out->u.m[3][2] = -D3DXVec3Dot(&vec, eye);
347     out->u.m[0][3] = 0.0f;
348     out->u.m[1][3] = 0.0f;
349     out->u.m[2][3] = 0.0f;
350     out->u.m[3][3] = 1.0f;
351 
352     return out;
353 }
354 
355 D3DXMATRIX * WINAPI D3DXMatrixLookAtRH(D3DXMATRIX *out, const D3DXVECTOR3 *eye, const D3DXVECTOR3 *at,
356         const D3DXVECTOR3 *up)
357 {
358     D3DXVECTOR3 right, upn, vec;
359 
360     TRACE("out %p, eye %p, at %p, up %p\n", out, eye, at, up);
361 
362     D3DXVec3Subtract(&vec, at, eye);
363     D3DXVec3Normalize(&vec, &vec);
364     D3DXVec3Cross(&right, up, &vec);
365     D3DXVec3Cross(&upn, &vec, &right);
366     D3DXVec3Normalize(&right, &right);
367     D3DXVec3Normalize(&upn, &upn);
368     out->u.m[0][0] = -right.x;
369     out->u.m[1][0] = -right.y;
370     out->u.m[2][0] = -right.z;
371     out->u.m[3][0] = D3DXVec3Dot(&right, eye);
372     out->u.m[0][1] = upn.x;
373     out->u.m[1][1] = upn.y;
374     out->u.m[2][1] = upn.z;
375     out->u.m[3][1] = -D3DXVec3Dot(&upn, eye);
376     out->u.m[0][2] = -vec.x;
377     out->u.m[1][2] = -vec.y;
378     out->u.m[2][2] = -vec.z;
379     out->u.m[3][2] = D3DXVec3Dot(&vec, eye);
380     out->u.m[0][3] = 0.0f;
381     out->u.m[1][3] = 0.0f;
382     out->u.m[2][3] = 0.0f;
383     out->u.m[3][3] = 1.0f;
384 
385     return out;
386 }
387 
388 D3DXMATRIX* WINAPI D3DXMatrixMultiply(D3DXMATRIX *pout, const D3DXMATRIX *pm1, const D3DXMATRIX *pm2)
389 {
390     D3DXMATRIX out;
391     int i,j;
392 
393     TRACE("pout %p, pm1 %p, pm2 %p\n", pout, pm1, pm2);
394 
395     for (i=0; i<4; i++)
396     {
397         for (j=0; j<4; j++)
398         {
399             out.u.m[i][j] = pm1->u.m[i][0] * pm2->u.m[0][j] + pm1->u.m[i][1] * pm2->u.m[1][j] + pm1->u.m[i][2] * pm2->u.m[2][j] + pm1->u.m[i][3] * pm2->u.m[3][j];
400         }
401     }
402 
403     *pout = out;
404     return pout;
405 }
406 
407 D3DXMATRIX* WINAPI D3DXMatrixMultiplyTranspose(D3DXMATRIX *pout, const D3DXMATRIX *pm1, const D3DXMATRIX *pm2)
408 {
409     D3DXMATRIX temp;
410     int i, j;
411 
412     TRACE("pout %p, pm1 %p, pm2 %p\n", pout, pm1, pm2);
413 
414     for (i = 0; i < 4; i++)
415         for (j = 0; j < 4; j++)
416             temp.u.m[j][i] = pm1->u.m[i][0] * pm2->u.m[0][j] + pm1->u.m[i][1] * pm2->u.m[1][j] + pm1->u.m[i][2] * pm2->u.m[2][j] + pm1->u.m[i][3] * pm2->u.m[3][j];
417 
418     *pout = temp;
419     return pout;
420 }
421 
422 D3DXMATRIX* WINAPI D3DXMatrixOrthoLH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
423 {
424     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
425 
426     D3DXMatrixIdentity(pout);
427     pout->u.m[0][0] = 2.0f / w;
428     pout->u.m[1][1] = 2.0f / h;
429     pout->u.m[2][2] = 1.0f / (zf - zn);
430     pout->u.m[3][2] = zn / (zn - zf);
431     return pout;
432 }
433 
434 D3DXMATRIX* WINAPI D3DXMatrixOrthoOffCenterLH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
435 {
436     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
437 
438     D3DXMatrixIdentity(pout);
439     pout->u.m[0][0] = 2.0f / (r - l);
440     pout->u.m[1][1] = 2.0f / (t - b);
441     pout->u.m[2][2] = 1.0f / (zf -zn);
442     pout->u.m[3][0] = -1.0f -2.0f *l / (r - l);
443     pout->u.m[3][1] = 1.0f + 2.0f * t / (b - t);
444     pout->u.m[3][2] = zn / (zn -zf);
445     return pout;
446 }
447 
448 D3DXMATRIX* WINAPI D3DXMatrixOrthoOffCenterRH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
449 {
450     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
451 
452     D3DXMatrixIdentity(pout);
453     pout->u.m[0][0] = 2.0f / (r - l);
454     pout->u.m[1][1] = 2.0f / (t - b);
455     pout->u.m[2][2] = 1.0f / (zn -zf);
456     pout->u.m[3][0] = -1.0f -2.0f *l / (r - l);
457     pout->u.m[3][1] = 1.0f + 2.0f * t / (b - t);
458     pout->u.m[3][2] = zn / (zn -zf);
459     return pout;
460 }
461 
462 D3DXMATRIX* WINAPI D3DXMatrixOrthoRH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
463 {
464     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
465 
466     D3DXMatrixIdentity(pout);
467     pout->u.m[0][0] = 2.0f / w;
468     pout->u.m[1][1] = 2.0f / h;
469     pout->u.m[2][2] = 1.0f / (zn - zf);
470     pout->u.m[3][2] = zn / (zn - zf);
471     return pout;
472 }
473 
474 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveFovLH(D3DXMATRIX *pout, FLOAT fovy, FLOAT aspect, FLOAT zn, FLOAT zf)
475 {
476     TRACE("pout %p, fovy %f, aspect %f, zn %f, zf %f\n", pout, fovy, aspect, zn, zf);
477 
478     D3DXMatrixIdentity(pout);
479     pout->u.m[0][0] = 1.0f / (aspect * tanf(fovy/2.0f));
480     pout->u.m[1][1] = 1.0f / tanf(fovy/2.0f);
481     pout->u.m[2][2] = zf / (zf - zn);
482     pout->u.m[2][3] = 1.0f;
483     pout->u.m[3][2] = (zf * zn) / (zn - zf);
484     pout->u.m[3][3] = 0.0f;
485     return pout;
486 }
487 
488 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveFovRH(D3DXMATRIX *pout, FLOAT fovy, FLOAT aspect, FLOAT zn, FLOAT zf)
489 {
490     TRACE("pout %p, fovy %f, aspect %f, zn %f, zf %f\n", pout, fovy, aspect, zn, zf);
491 
492     D3DXMatrixIdentity(pout);
493     pout->u.m[0][0] = 1.0f / (aspect * tanf(fovy/2.0f));
494     pout->u.m[1][1] = 1.0f / tanf(fovy/2.0f);
495     pout->u.m[2][2] = zf / (zn - zf);
496     pout->u.m[2][3] = -1.0f;
497     pout->u.m[3][2] = (zf * zn) / (zn - zf);
498     pout->u.m[3][3] = 0.0f;
499     return pout;
500 }
501 
502 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveLH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
503 {
504     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
505 
506     D3DXMatrixIdentity(pout);
507     pout->u.m[0][0] = 2.0f * zn / w;
508     pout->u.m[1][1] = 2.0f * zn / h;
509     pout->u.m[2][2] = zf / (zf - zn);
510     pout->u.m[3][2] = (zn * zf) / (zn - zf);
511     pout->u.m[2][3] = 1.0f;
512     pout->u.m[3][3] = 0.0f;
513     return pout;
514 }
515 
516 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveOffCenterLH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
517 {
518     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
519 
520     D3DXMatrixIdentity(pout);
521     pout->u.m[0][0] = 2.0f * zn / (r - l);
522     pout->u.m[1][1] = -2.0f * zn / (b - t);
523     pout->u.m[2][0] = -1.0f - 2.0f * l / (r - l);
524     pout->u.m[2][1] = 1.0f + 2.0f * t / (b - t);
525     pout->u.m[2][2] = - zf / (zn - zf);
526     pout->u.m[3][2] = (zn * zf) / (zn -zf);
527     pout->u.m[2][3] = 1.0f;
528     pout->u.m[3][3] = 0.0f;
529     return pout;
530 }
531 
532 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveOffCenterRH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
533 {
534     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
535 
536     D3DXMatrixIdentity(pout);
537     pout->u.m[0][0] = 2.0f * zn / (r - l);
538     pout->u.m[1][1] = -2.0f * zn / (b - t);
539     pout->u.m[2][0] = 1.0f + 2.0f * l / (r - l);
540     pout->u.m[2][1] = -1.0f -2.0f * t / (b - t);
541     pout->u.m[2][2] = zf / (zn - zf);
542     pout->u.m[3][2] = (zn * zf) / (zn -zf);
543     pout->u.m[2][3] = -1.0f;
544     pout->u.m[3][3] = 0.0f;
545     return pout;
546 }
547 
548 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveRH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
549 {
550     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
551 
552     D3DXMatrixIdentity(pout);
553     pout->u.m[0][0] = 2.0f * zn / w;
554     pout->u.m[1][1] = 2.0f * zn / h;
555     pout->u.m[2][2] = zf / (zn - zf);
556     pout->u.m[3][2] = (zn * zf) / (zn - zf);
557     pout->u.m[2][3] = -1.0f;
558     pout->u.m[3][3] = 0.0f;
559     return pout;
560 }
561 
562 D3DXMATRIX* WINAPI D3DXMatrixReflect(D3DXMATRIX *pout, const D3DXPLANE *pplane)
563 {
564     D3DXPLANE Nplane;
565 
566     TRACE("pout %p, pplane %p\n", pout, pplane);
567 
568     D3DXPlaneNormalize(&Nplane, pplane);
569     D3DXMatrixIdentity(pout);
570     pout->u.m[0][0] = 1.0f - 2.0f * Nplane.a * Nplane.a;
571     pout->u.m[0][1] = -2.0f * Nplane.a * Nplane.b;
572     pout->u.m[0][2] = -2.0f * Nplane.a * Nplane.c;
573     pout->u.m[1][0] = -2.0f * Nplane.a * Nplane.b;
574     pout->u.m[1][1] = 1.0f - 2.0f * Nplane.b * Nplane.b;
575     pout->u.m[1][2] = -2.0f * Nplane.b * Nplane.c;
576     pout->u.m[2][0] = -2.0f * Nplane.c * Nplane.a;
577     pout->u.m[2][1] = -2.0f * Nplane.c * Nplane.b;
578     pout->u.m[2][2] = 1.0f - 2.0f * Nplane.c * Nplane.c;
579     pout->u.m[3][0] = -2.0f * Nplane.d * Nplane.a;
580     pout->u.m[3][1] = -2.0f * Nplane.d * Nplane.b;
581     pout->u.m[3][2] = -2.0f * Nplane.d * Nplane.c;
582     return pout;
583 }
584 
585 D3DXMATRIX * WINAPI D3DXMatrixRotationAxis(D3DXMATRIX *out, const D3DXVECTOR3 *v, FLOAT angle)
586 {
587     D3DXVECTOR3 nv;
588     FLOAT sangle, cangle, cdiff;
589 
590     TRACE("out %p, v %p, angle %f\n", out, v, angle);
591 
592     D3DXVec3Normalize(&nv, v);
593     sangle = sinf(angle);
594     cangle = cosf(angle);
595     cdiff = 1.0f - cangle;
596 
597     out->u.m[0][0] = cdiff * nv.x * nv.x + cangle;
598     out->u.m[1][0] = cdiff * nv.x * nv.y - sangle * nv.z;
599     out->u.m[2][0] = cdiff * nv.x * nv.z + sangle * nv.y;
600     out->u.m[3][0] = 0.0f;
601     out->u.m[0][1] = cdiff * nv.y * nv.x + sangle * nv.z;
602     out->u.m[1][1] = cdiff * nv.y * nv.y + cangle;
603     out->u.m[2][1] = cdiff * nv.y * nv.z - sangle * nv.x;
604     out->u.m[3][1] = 0.0f;
605     out->u.m[0][2] = cdiff * nv.z * nv.x - sangle * nv.y;
606     out->u.m[1][2] = cdiff * nv.z * nv.y + sangle * nv.x;
607     out->u.m[2][2] = cdiff * nv.z * nv.z + cangle;
608     out->u.m[3][2] = 0.0f;
609     out->u.m[0][3] = 0.0f;
610     out->u.m[1][3] = 0.0f;
611     out->u.m[2][3] = 0.0f;
612     out->u.m[3][3] = 1.0f;
613 
614     return out;
615 }
616 
617 D3DXMATRIX* WINAPI D3DXMatrixRotationQuaternion(D3DXMATRIX *pout, const D3DXQUATERNION *pq)
618 {
619     TRACE("pout %p, pq %p\n", pout, pq);
620 
621     D3DXMatrixIdentity(pout);
622     pout->u.m[0][0] = 1.0f - 2.0f * (pq->y * pq->y + pq->z * pq->z);
623     pout->u.m[0][1] = 2.0f * (pq->x *pq->y + pq->z * pq->w);
624     pout->u.m[0][2] = 2.0f * (pq->x * pq->z - pq->y * pq->w);
625     pout->u.m[1][0] = 2.0f * (pq->x * pq->y - pq->z * pq->w);
626     pout->u.m[1][1] = 1.0f - 2.0f * (pq->x * pq->x + pq->z * pq->z);
627     pout->u.m[1][2] = 2.0f * (pq->y *pq->z + pq->x *pq->w);
628     pout->u.m[2][0] = 2.0f * (pq->x * pq->z + pq->y * pq->w);
629     pout->u.m[2][1] = 2.0f * (pq->y *pq->z - pq->x *pq->w);
630     pout->u.m[2][2] = 1.0f - 2.0f * (pq->x * pq->x + pq->y * pq->y);
631     return pout;
632 }
633 
634 D3DXMATRIX* WINAPI D3DXMatrixRotationX(D3DXMATRIX *pout, FLOAT angle)
635 {
636     TRACE("pout %p, angle %f\n", pout, angle);
637 
638     D3DXMatrixIdentity(pout);
639     pout->u.m[1][1] = cosf(angle);
640     pout->u.m[2][2] = cosf(angle);
641     pout->u.m[1][2] = sinf(angle);
642     pout->u.m[2][1] = -sinf(angle);
643     return pout;
644 }
645 
646 D3DXMATRIX* WINAPI D3DXMatrixRotationY(D3DXMATRIX *pout, FLOAT angle)
647 {
648     TRACE("pout %p, angle %f\n", pout, angle);
649 
650     D3DXMatrixIdentity(pout);
651     pout->u.m[0][0] = cosf(angle);
652     pout->u.m[2][2] = cosf(angle);
653     pout->u.m[0][2] = -sinf(angle);
654     pout->u.m[2][0] = sinf(angle);
655     return pout;
656 }
657 
658 D3DXMATRIX * WINAPI D3DXMatrixRotationYawPitchRoll(D3DXMATRIX *out, FLOAT yaw, FLOAT pitch, FLOAT roll)
659 {
660     FLOAT sroll, croll, spitch, cpitch, syaw, cyaw;
661 
662     TRACE("out %p, yaw %f, pitch %f, roll %f\n", out, yaw, pitch, roll);
663 
664     sroll = sinf(roll);
665     croll = cosf(roll);
666     spitch = sinf(pitch);
667     cpitch = cosf(pitch);
668     syaw = sinf(yaw);
669     cyaw = cosf(yaw);
670 
671     out->u.m[0][0] = sroll * spitch * syaw + croll * cyaw;
672     out->u.m[0][1] = sroll * cpitch;
673     out->u.m[0][2] = sroll * spitch * cyaw - croll * syaw;
674     out->u.m[0][3] = 0.0f;
675     out->u.m[1][0] = croll * spitch * syaw - sroll * cyaw;
676     out->u.m[1][1] = croll * cpitch;
677     out->u.m[1][2] = croll * spitch * cyaw + sroll * syaw;
678     out->u.m[1][3] = 0.0f;
679     out->u.m[2][0] = cpitch * syaw;
680     out->u.m[2][1] = -spitch;
681     out->u.m[2][2] = cpitch * cyaw;
682     out->u.m[2][3] = 0.0f;
683     out->u.m[3][0] = 0.0f;
684     out->u.m[3][1] = 0.0f;
685     out->u.m[3][2] = 0.0f;
686     out->u.m[3][3] = 1.0f;
687 
688     return out;
689 }
690 
691 D3DXMATRIX* WINAPI D3DXMatrixRotationZ(D3DXMATRIX *pout, FLOAT angle)
692 {
693     TRACE("pout %p, angle %f\n", pout, angle);
694 
695     D3DXMatrixIdentity(pout);
696     pout->u.m[0][0] = cosf(angle);
697     pout->u.m[1][1] = cosf(angle);
698     pout->u.m[0][1] = sinf(angle);
699     pout->u.m[1][0] = -sinf(angle);
700     return pout;
701 }
702 
703 D3DXMATRIX* WINAPI D3DXMatrixScaling(D3DXMATRIX *pout, FLOAT sx, FLOAT sy, FLOAT sz)
704 {
705     TRACE("pout %p, sx %f, sy %f, sz %f\n", pout, sx, sy, sz);
706 
707     D3DXMatrixIdentity(pout);
708     pout->u.m[0][0] = sx;
709     pout->u.m[1][1] = sy;
710     pout->u.m[2][2] = sz;
711     return pout;
712 }
713 
714 D3DXMATRIX* WINAPI D3DXMatrixShadow(D3DXMATRIX *pout, const D3DXVECTOR4 *plight, const D3DXPLANE *pplane)
715 {
716     D3DXPLANE Nplane;
717     FLOAT dot;
718 
719     TRACE("pout %p, plight %p, pplane %p\n", pout, plight, pplane);
720 
721     D3DXPlaneNormalize(&Nplane, pplane);
722     dot = D3DXPlaneDot(&Nplane, plight);
723     pout->u.m[0][0] = dot - Nplane.a * plight->x;
724     pout->u.m[0][1] = -Nplane.a * plight->y;
725     pout->u.m[0][2] = -Nplane.a * plight->z;
726     pout->u.m[0][3] = -Nplane.a * plight->w;
727     pout->u.m[1][0] = -Nplane.b * plight->x;
728     pout->u.m[1][1] = dot - Nplane.b * plight->y;
729     pout->u.m[1][2] = -Nplane.b * plight->z;
730     pout->u.m[1][3] = -Nplane.b * plight->w;
731     pout->u.m[2][0] = -Nplane.c * plight->x;
732     pout->u.m[2][1] = -Nplane.c * plight->y;
733     pout->u.m[2][2] = dot - Nplane.c * plight->z;
734     pout->u.m[2][3] = -Nplane.c * plight->w;
735     pout->u.m[3][0] = -Nplane.d * plight->x;
736     pout->u.m[3][1] = -Nplane.d * plight->y;
737     pout->u.m[3][2] = -Nplane.d * plight->z;
738     pout->u.m[3][3] = dot - Nplane.d * plight->w;
739     return pout;
740 }
741 
742 D3DXMATRIX* WINAPI D3DXMatrixTransformation(D3DXMATRIX *pout, const D3DXVECTOR3 *pscalingcenter, const D3DXQUATERNION *pscalingrotation, const D3DXVECTOR3 *pscaling, const D3DXVECTOR3 *protationcenter, const D3DXQUATERNION *protation, const D3DXVECTOR3 *ptranslation)
743 {
744     D3DXMATRIX m1, m2, m3, m4, m5, m6, m7;
745     D3DXQUATERNION prc;
746     D3DXVECTOR3 psc, pt;
747 
748     TRACE("pout %p, pscalingcenter %p, pscalingrotation %p, pscaling %p, protationcentr %p, protation %p, ptranslation %p\n",
749         pout, pscalingcenter, pscalingrotation, pscaling, protationcenter, protation, ptranslation);
750 
751     if ( !pscalingcenter )
752     {
753         psc.x = 0.0f;
754         psc.y = 0.0f;
755         psc.z = 0.0f;
756     }
757     else
758     {
759         psc.x = pscalingcenter->x;
760         psc.y = pscalingcenter->y;
761         psc.z = pscalingcenter->z;
762     }
763 
764     if ( !protationcenter )
765     {
766         prc.x = 0.0f;
767         prc.y = 0.0f;
768         prc.z = 0.0f;
769     }
770     else
771     {
772         prc.x = protationcenter->x;
773         prc.y = protationcenter->y;
774         prc.z = protationcenter->z;
775     }
776 
777     if ( !ptranslation )
778     {
779         pt.x = 0.0f;
780         pt.y = 0.0f;
781         pt.z = 0.0f;
782     }
783     else
784     {
785         pt.x = ptranslation->x;
786         pt.y = ptranslation->y;
787         pt.z = ptranslation->z;
788     }
789 
790     D3DXMatrixTranslation(&m1, -psc.x, -psc.y, -psc.z);
791 
792     if ( !pscalingrotation )
793     {
794         D3DXMatrixIdentity(&m2);
795         D3DXMatrixIdentity(&m4);
796     }
797     else
798     {
799         D3DXMatrixRotationQuaternion(&m4, pscalingrotation);
800         D3DXMatrixInverse(&m2, NULL, &m4);
801     }
802 
803     if ( !pscaling ) D3DXMatrixIdentity(&m3);
804     else D3DXMatrixScaling(&m3, pscaling->x, pscaling->y, pscaling->z);
805 
806     if ( !protation ) D3DXMatrixIdentity(&m6);
807     else D3DXMatrixRotationQuaternion(&m6, protation);
808 
809     D3DXMatrixTranslation(&m5, psc.x - prc.x,  psc.y - prc.y,  psc.z - prc.z);
810     D3DXMatrixTranslation(&m7, prc.x + pt.x, prc.y + pt.y, prc.z + pt.z);
811     D3DXMatrixMultiply(&m1, &m1, &m2);
812     D3DXMatrixMultiply(&m1, &m1, &m3);
813     D3DXMatrixMultiply(&m1, &m1, &m4);
814     D3DXMatrixMultiply(&m1, &m1, &m5);
815     D3DXMatrixMultiply(&m1, &m1, &m6);
816     D3DXMatrixMultiply(pout, &m1, &m7);
817     return pout;
818 }
819 
820 D3DXMATRIX* WINAPI D3DXMatrixTransformation2D(D3DXMATRIX *pout, const D3DXVECTOR2 *pscalingcenter, FLOAT scalingrotation, const D3DXVECTOR2 *pscaling, const D3DXVECTOR2 *protationcenter, FLOAT rotation, const D3DXVECTOR2 *ptranslation)
821 {
822     D3DXQUATERNION rot, sca_rot;
823     D3DXVECTOR3 rot_center, sca, sca_center, trans;
824 
825     TRACE("pout %p, pscalingcenter %p, scalingrotation %f, pscaling %p, protztioncenter %p, rotation %f, ptranslation %p\n",
826         pout, pscalingcenter, scalingrotation, pscaling, protationcenter, rotation, ptranslation);
827 
828     if ( pscalingcenter )
829     {
830         sca_center.x=pscalingcenter->x;
831         sca_center.y=pscalingcenter->y;
832         sca_center.z=0.0f;
833     }
834     else
835     {
836         sca_center.x=0.0f;
837         sca_center.y=0.0f;
838         sca_center.z=0.0f;
839     }
840 
841     if ( pscaling )
842     {
843         sca.x=pscaling->x;
844         sca.y=pscaling->y;
845         sca.z=1.0f;
846     }
847     else
848     {
849         sca.x=1.0f;
850         sca.y=1.0f;
851         sca.z=1.0f;
852     }
853 
854     if ( protationcenter )
855     {
856         rot_center.x=protationcenter->x;
857         rot_center.y=protationcenter->y;
858         rot_center.z=0.0f;
859     }
860     else
861     {
862         rot_center.x=0.0f;
863         rot_center.y=0.0f;
864         rot_center.z=0.0f;
865     }
866 
867     if ( ptranslation )
868     {
869         trans.x=ptranslation->x;
870         trans.y=ptranslation->y;
871         trans.z=0.0f;
872     }
873     else
874     {
875         trans.x=0.0f;
876         trans.y=0.0f;
877         trans.z=0.0f;
878     }
879 
880     rot.w=cosf(rotation/2.0f);
881     rot.x=0.0f;
882     rot.y=0.0f;
883     rot.z=sinf(rotation/2.0f);
884 
885     sca_rot.w=cosf(scalingrotation/2.0f);
886     sca_rot.x=0.0f;
887     sca_rot.y=0.0f;
888     sca_rot.z=sinf(scalingrotation/2.0f);
889 
890     D3DXMatrixTransformation(pout, &sca_center, &sca_rot, &sca, &rot_center, &rot, &trans);
891 
892     return pout;
893 }
894 
895 D3DXMATRIX* WINAPI D3DXMatrixTranslation(D3DXMATRIX *pout, FLOAT x, FLOAT y, FLOAT z)
896 {
897     TRACE("pout %p, x %f, y %f, z %f\n", pout, x, y, z);
898 
899     D3DXMatrixIdentity(pout);
900     pout->u.m[3][0] = x;
901     pout->u.m[3][1] = y;
902     pout->u.m[3][2] = z;
903     return pout;
904 }
905 
906 D3DXMATRIX* WINAPI D3DXMatrixTranspose(D3DXMATRIX *pout, const D3DXMATRIX *pm)
907 {
908     const D3DXMATRIX m = *pm;
909     int i,j;
910 
911     TRACE("pout %p, pm %p\n", pout, pm);
912 
913     for (i=0; i<4; i++)
914         for (j=0; j<4; j++) pout->u.m[i][j] = m.u.m[j][i];
915 
916     return pout;
917 }
918 
919 /*_________________D3DXMatrixStack____________________*/
920 
921 
922 static inline struct ID3DXMatrixStackImpl *impl_from_ID3DXMatrixStack(ID3DXMatrixStack *iface)
923 {
924   return CONTAINING_RECORD(iface, struct ID3DXMatrixStackImpl, ID3DXMatrixStack_iface);
925 }
926 
927 static HRESULT WINAPI ID3DXMatrixStackImpl_QueryInterface(ID3DXMatrixStack *iface, REFIID riid, void **out)
928 {
929     TRACE("iface %p, riid %s, out %p.\n", iface, debugstr_guid(riid), out);
930 
931     if (IsEqualGUID(riid, &IID_ID3DXMatrixStack)
932             || IsEqualGUID(riid, &IID_IUnknown))
933     {
934         ID3DXMatrixStack_AddRef(iface);
935         *out = iface;
936         return S_OK;
937     }
938 
939     WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));
940 
941     *out = NULL;
942     return E_NOINTERFACE;
943 }
944 
945 static ULONG WINAPI ID3DXMatrixStackImpl_AddRef(ID3DXMatrixStack *iface)
946 {
947     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
948     ULONG ref = InterlockedIncrement(&This->ref);
949     TRACE("(%p) : AddRef from %d\n", This, ref - 1);
950     return ref;
951 }
952 
953 static ULONG WINAPI ID3DXMatrixStackImpl_Release(ID3DXMatrixStack *iface)
954 {
955     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
956     ULONG ref = InterlockedDecrement(&This->ref);
957     if (!ref)
958     {
959         HeapFree(GetProcessHeap(), 0, This->stack);
960         HeapFree(GetProcessHeap(), 0, This);
961     }
962     TRACE("(%p) : ReleaseRef to %d\n", This, ref);
963     return ref;
964 }
965 
966 static D3DXMATRIX* WINAPI ID3DXMatrixStackImpl_GetTop(ID3DXMatrixStack *iface)
967 {
968     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
969 
970     TRACE("iface %p\n", iface);
971 
972     return &This->stack[This->current];
973 }
974 
975 static HRESULT WINAPI ID3DXMatrixStackImpl_LoadIdentity(ID3DXMatrixStack *iface)
976 {
977     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
978 
979     TRACE("iface %p\n", iface);
980 
981     D3DXMatrixIdentity(&This->stack[This->current]);
982 
983     return D3D_OK;
984 }
985 
986 static HRESULT WINAPI ID3DXMatrixStackImpl_LoadMatrix(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
987 {
988     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
989 
990     TRACE("iface %p, pm %p\n", iface, pm);
991 
992     This->stack[This->current] = *pm;
993 
994     return D3D_OK;
995 }
996 
997 static HRESULT WINAPI ID3DXMatrixStackImpl_MultMatrix(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
998 {
999     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1000 
1001     TRACE("iface %p, pm %p\n", iface, pm);
1002 
1003     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], pm);
1004 
1005     return D3D_OK;
1006 }
1007 
1008 static HRESULT WINAPI ID3DXMatrixStackImpl_MultMatrixLocal(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
1009 {
1010     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1011 
1012     TRACE("iface %p, pm %p\n", iface, pm);
1013 
1014     D3DXMatrixMultiply(&This->stack[This->current], pm, &This->stack[This->current]);
1015 
1016     return D3D_OK;
1017 }
1018 
1019 static HRESULT WINAPI ID3DXMatrixStackImpl_Pop(ID3DXMatrixStack *iface)
1020 {
1021     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1022 
1023     TRACE("iface %p\n", iface);
1024 
1025     /* Popping the last element on the stack returns D3D_OK, but does nothing. */
1026     if (!This->current) return D3D_OK;
1027 
1028     if (This->current <= This->stack_size / 4 && This->stack_size >= INITIAL_STACK_SIZE * 2)
1029     {
1030         unsigned int new_size;
1031         D3DXMATRIX *new_stack;
1032 
1033         new_size = This->stack_size / 2;
1034         new_stack = HeapReAlloc(GetProcessHeap(), 0, This->stack, new_size * sizeof(*new_stack));
1035         if (new_stack)
1036         {
1037             This->stack_size = new_size;
1038             This->stack = new_stack;
1039         }
1040     }
1041 
1042     --This->current;
1043 
1044     return D3D_OK;
1045 }
1046 
1047 static HRESULT WINAPI ID3DXMatrixStackImpl_Push(ID3DXMatrixStack *iface)
1048 {
1049     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1050 
1051     TRACE("iface %p\n", iface);
1052 
1053     if (This->current == This->stack_size - 1)
1054     {
1055         unsigned int new_size;
1056         D3DXMATRIX *new_stack;
1057 
1058         if (This->stack_size > UINT_MAX / 2) return E_OUTOFMEMORY;
1059 
1060         new_size = This->stack_size * 2;
1061         new_stack = HeapReAlloc(GetProcessHeap(), 0, This->stack, new_size * sizeof(*new_stack));
1062         if (!new_stack) return E_OUTOFMEMORY;
1063 
1064         This->stack_size = new_size;
1065         This->stack = new_stack;
1066     }
1067 
1068     ++This->current;
1069     This->stack[This->current] = This->stack[This->current - 1];
1070 
1071     return D3D_OK;
1072 }
1073 
1074 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateAxis(ID3DXMatrixStack *iface, const D3DXVECTOR3 *pv, FLOAT angle)
1075 {
1076     D3DXMATRIX temp;
1077     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1078 
1079     TRACE("iface %p, pv %p, angle %f\n", iface, pv, angle);
1080 
1081     D3DXMatrixRotationAxis(&temp, pv, angle);
1082     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1083 
1084     return D3D_OK;
1085 }
1086 
1087 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateAxisLocal(ID3DXMatrixStack *iface, const D3DXVECTOR3 *pv, FLOAT angle)
1088 {
1089     D3DXMATRIX temp;
1090     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1091 
1092     TRACE("iface %p, pv %p, angle %f\n", iface, pv, angle);
1093 
1094     D3DXMatrixRotationAxis(&temp, pv, angle);
1095     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1096 
1097     return D3D_OK;
1098 }
1099 
1100 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateYawPitchRoll(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1101 {
1102     D3DXMATRIX temp;
1103     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1104 
1105     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1106 
1107     D3DXMatrixRotationYawPitchRoll(&temp, x, y, z);
1108     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1109 
1110     return D3D_OK;
1111 }
1112 
1113 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateYawPitchRollLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1114 {
1115     D3DXMATRIX temp;
1116     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1117 
1118     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1119 
1120     D3DXMatrixRotationYawPitchRoll(&temp, x, y, z);
1121     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1122 
1123     return D3D_OK;
1124 }
1125 
1126 static HRESULT WINAPI ID3DXMatrixStackImpl_Scale(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1127 {
1128     D3DXMATRIX temp;
1129     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1130 
1131     TRACE("iface %p,x %f, y %f, z %f\n", iface, x, y, z);
1132 
1133     D3DXMatrixScaling(&temp, x, y, z);
1134     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1135 
1136     return D3D_OK;
1137 }
1138 
1139 static HRESULT WINAPI ID3DXMatrixStackImpl_ScaleLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1140 {
1141     D3DXMATRIX temp;
1142     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1143 
1144     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1145 
1146     D3DXMatrixScaling(&temp, x, y, z);
1147     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1148 
1149     return D3D_OK;
1150 }
1151 
1152 static HRESULT WINAPI ID3DXMatrixStackImpl_Translate(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1153 {
1154     D3DXMATRIX temp;
1155     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1156 
1157     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1158 
1159     D3DXMatrixTranslation(&temp, x, y, z);
1160     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1161 
1162     return D3D_OK;
1163 }
1164 
1165 static HRESULT WINAPI ID3DXMatrixStackImpl_TranslateLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1166 {
1167     D3DXMATRIX temp;
1168     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1169 
1170     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1171 
1172     D3DXMatrixTranslation(&temp, x, y, z);
1173     D3DXMatrixMultiply(&This->stack[This->current], &temp,&This->stack[This->current]);
1174 
1175     return D3D_OK;
1176 }
1177 
1178 static const ID3DXMatrixStackVtbl ID3DXMatrixStack_Vtbl =
1179 {
1180     ID3DXMatrixStackImpl_QueryInterface,
1181     ID3DXMatrixStackImpl_AddRef,
1182     ID3DXMatrixStackImpl_Release,
1183     ID3DXMatrixStackImpl_Pop,
1184     ID3DXMatrixStackImpl_Push,
1185     ID3DXMatrixStackImpl_LoadIdentity,
1186     ID3DXMatrixStackImpl_LoadMatrix,
1187     ID3DXMatrixStackImpl_MultMatrix,
1188     ID3DXMatrixStackImpl_MultMatrixLocal,
1189     ID3DXMatrixStackImpl_RotateAxis,
1190     ID3DXMatrixStackImpl_RotateAxisLocal,
1191     ID3DXMatrixStackImpl_RotateYawPitchRoll,
1192     ID3DXMatrixStackImpl_RotateYawPitchRollLocal,
1193     ID3DXMatrixStackImpl_Scale,
1194     ID3DXMatrixStackImpl_ScaleLocal,
1195     ID3DXMatrixStackImpl_Translate,
1196     ID3DXMatrixStackImpl_TranslateLocal,
1197     ID3DXMatrixStackImpl_GetTop
1198 };
1199 
1200 HRESULT WINAPI D3DXCreateMatrixStack(DWORD flags, ID3DXMatrixStack **stack)
1201 {
1202     struct ID3DXMatrixStackImpl *object;
1203 
1204     TRACE("flags %#x, stack %p.\n", flags, stack);
1205 
1206     if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object))))
1207     {
1208         *stack = NULL;
1209         return E_OUTOFMEMORY;
1210     }
1211     object->ID3DXMatrixStack_iface.lpVtbl = &ID3DXMatrixStack_Vtbl;
1212     object->ref = 1;
1213 
1214     if (!(object->stack = HeapAlloc(GetProcessHeap(), 0, INITIAL_STACK_SIZE * sizeof(*object->stack))))
1215     {
1216         HeapFree(GetProcessHeap(), 0, object);
1217         *stack = NULL;
1218         return E_OUTOFMEMORY;
1219     }
1220 
1221     object->current = 0;
1222     object->stack_size = INITIAL_STACK_SIZE;
1223     D3DXMatrixIdentity(&object->stack[0]);
1224 
1225     TRACE("Created matrix stack %p.\n", object);
1226 
1227     *stack = &object->ID3DXMatrixStack_iface;
1228     return D3D_OK;
1229 }
1230 
1231 /*_________________D3DXPLANE________________*/
1232 
1233 D3DXPLANE* WINAPI D3DXPlaneFromPointNormal(D3DXPLANE *pout, const D3DXVECTOR3 *pvpoint, const D3DXVECTOR3 *pvnormal)
1234 {
1235     TRACE("pout %p, pvpoint %p, pvnormal %p\n", pout, pvpoint, pvnormal);
1236 
1237     pout->a = pvnormal->x;
1238     pout->b = pvnormal->y;
1239     pout->c = pvnormal->z;
1240     pout->d = -D3DXVec3Dot(pvpoint, pvnormal);
1241     return pout;
1242 }
1243 
1244 D3DXPLANE* WINAPI D3DXPlaneFromPoints(D3DXPLANE *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3)
1245 {
1246     D3DXVECTOR3 edge1, edge2, normal, Nnormal;
1247 
1248     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p\n", pout, pv1, pv2, pv3);
1249 
1250     edge1.x = 0.0f; edge1.y = 0.0f; edge1.z = 0.0f;
1251     edge2.x = 0.0f; edge2.y = 0.0f; edge2.z = 0.0f;
1252     D3DXVec3Subtract(&edge1, pv2, pv1);
1253     D3DXVec3Subtract(&edge2, pv3, pv1);
1254     D3DXVec3Cross(&normal, &edge1, &edge2);
1255     D3DXVec3Normalize(&Nnormal, &normal);
1256     D3DXPlaneFromPointNormal(pout, pv1, &Nnormal);
1257     return pout;
1258 }
1259 
1260 D3DXVECTOR3* WINAPI D3DXPlaneIntersectLine(D3DXVECTOR3 *pout, const D3DXPLANE *pp, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2)
1261 {
1262     D3DXVECTOR3 direction, normal;
1263     FLOAT dot, temp;
1264 
1265     TRACE("pout %p, pp %p, pv1 %p, pv2 %p\n", pout, pp, pv1, pv2);
1266 
1267     normal.x = pp->a;
1268     normal.y = pp->b;
1269     normal.z = pp->c;
1270     direction.x = pv2->x - pv1->x;
1271     direction.y = pv2->y - pv1->y;
1272     direction.z = pv2->z - pv1->z;
1273     dot = D3DXVec3Dot(&normal, &direction);
1274     if ( !dot ) return NULL;
1275     temp = ( pp->d + D3DXVec3Dot(&normal, pv1) ) / dot;
1276     pout->x = pv1->x - temp * direction.x;
1277     pout->y = pv1->y - temp * direction.y;
1278     pout->z = pv1->z - temp * direction.z;
1279     return pout;
1280 }
1281 
1282 D3DXPLANE * WINAPI D3DXPlaneNormalize(D3DXPLANE *out, const D3DXPLANE *p)
1283 {
1284     FLOAT norm;
1285 
1286     TRACE("out %p, p %p\n", out, p);
1287 
1288     norm = sqrtf(p->a * p->a + p->b * p->b + p->c * p->c);
1289     if (norm)
1290     {
1291         out->a = p->a / norm;
1292         out->b = p->b / norm;
1293         out->c = p->c / norm;
1294         out->d = p->d / norm;
1295     }
1296     else
1297     {
1298         out->a = 0.0f;
1299         out->b = 0.0f;
1300         out->c = 0.0f;
1301         out->d = 0.0f;
1302     }
1303 
1304     return out;
1305 }
1306 
1307 D3DXPLANE* WINAPI D3DXPlaneTransform(D3DXPLANE *pout, const D3DXPLANE *pplane, const D3DXMATRIX *pm)
1308 {
1309     const D3DXPLANE plane = *pplane;
1310 
1311     TRACE("pout %p, pplane %p, pm %p\n", pout, pplane, pm);
1312 
1313     pout->a = pm->u.m[0][0] * plane.a + pm->u.m[1][0] * plane.b + pm->u.m[2][0] * plane.c + pm->u.m[3][0] * plane.d;
1314     pout->b = pm->u.m[0][1] * plane.a + pm->u.m[1][1] * plane.b + pm->u.m[2][1] * plane.c + pm->u.m[3][1] * plane.d;
1315     pout->c = pm->u.m[0][2] * plane.a + pm->u.m[1][2] * plane.b + pm->u.m[2][2] * plane.c + pm->u.m[3][2] * plane.d;
1316     pout->d = pm->u.m[0][3] * plane.a + pm->u.m[1][3] * plane.b + pm->u.m[2][3] * plane.c + pm->u.m[3][3] * plane.d;
1317     return pout;
1318 }
1319 
1320 D3DXPLANE* WINAPI D3DXPlaneTransformArray(D3DXPLANE* out, UINT outstride, const D3DXPLANE* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1321 {
1322     UINT i;
1323 
1324     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1325 
1326     for (i = 0; i < elements; ++i) {
1327         D3DXPlaneTransform(
1328             (D3DXPLANE*)((char*)out + outstride * i),
1329             (const D3DXPLANE*)((const char*)in + instride * i),
1330             matrix);
1331     }
1332     return out;
1333 }
1334 
1335 /*_________________D3DXQUATERNION________________*/
1336 
1337 D3DXQUATERNION* WINAPI D3DXQuaternionBaryCentric(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3, FLOAT f, FLOAT g)
1338 {
1339     D3DXQUATERNION temp1, temp2;
1340 
1341      TRACE("pout %p, pq1 %p, pq2 %p, pq3 %p, f %f, g %f\n", pout, pq1, pq2, pq3, f, g);
1342 
1343     D3DXQuaternionSlerp(pout, D3DXQuaternionSlerp(&temp1, pq1, pq2, f + g), D3DXQuaternionSlerp(&temp2, pq1, pq3, f+g), g / (f + g));
1344     return pout;
1345 }
1346 
1347 D3DXQUATERNION * WINAPI D3DXQuaternionExp(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1348 {
1349     FLOAT norm;
1350 
1351     TRACE("out %p, q %p\n", out, q);
1352 
1353     norm = sqrtf(q->x * q->x + q->y * q->y + q->z * q->z);
1354     if (norm)
1355     {
1356         out->x = sinf(norm) * q->x / norm;
1357         out->y = sinf(norm) * q->y / norm;
1358         out->z = sinf(norm) * q->z / norm;
1359         out->w = cosf(norm);
1360     }
1361     else
1362     {
1363         out->x = 0.0f;
1364         out->y = 0.0f;
1365         out->z = 0.0f;
1366         out->w = 1.0f;
1367     }
1368 
1369     return out;
1370 }
1371 
1372 D3DXQUATERNION* WINAPI D3DXQuaternionInverse(D3DXQUATERNION *pout, const D3DXQUATERNION *pq)
1373 {
1374     FLOAT norm;
1375 
1376     TRACE("pout %p, pq %p\n", pout, pq);
1377 
1378     norm = D3DXQuaternionLengthSq(pq);
1379 
1380     pout->x = -pq->x / norm;
1381     pout->y = -pq->y / norm;
1382     pout->z = -pq->z / norm;
1383     pout->w = pq->w / norm;
1384     return pout;
1385 }
1386 
1387 D3DXQUATERNION * WINAPI D3DXQuaternionLn(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1388 {
1389     FLOAT t;
1390 
1391     TRACE("out %p, q %p\n", out, q);
1392 
1393     if ((q->w >= 1.0f) || (q->w == -1.0f))
1394         t = 1.0f;
1395     else
1396         t = acosf(q->w) / sqrtf(1.0f - q->w * q->w);
1397 
1398     out->x = t * q->x;
1399     out->y = t * q->y;
1400     out->z = t * q->z;
1401     out->w = 0.0f;
1402 
1403     return out;
1404 }
1405 
1406 D3DXQUATERNION* WINAPI D3DXQuaternionMultiply(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2)
1407 {
1408     D3DXQUATERNION out;
1409 
1410     TRACE("pout %p, pq1 %p, pq2 %p\n", pout, pq1, pq2);
1411 
1412     out.x = pq2->w * pq1->x + pq2->x * pq1->w + pq2->y * pq1->z - pq2->z * pq1->y;
1413     out.y = pq2->w * pq1->y - pq2->x * pq1->z + pq2->y * pq1->w + pq2->z * pq1->x;
1414     out.z = pq2->w * pq1->z + pq2->x * pq1->y - pq2->y * pq1->x + pq2->z * pq1->w;
1415     out.w = pq2->w * pq1->w - pq2->x * pq1->x - pq2->y * pq1->y - pq2->z * pq1->z;
1416     *pout = out;
1417     return pout;
1418 }
1419 
1420 D3DXQUATERNION * WINAPI D3DXQuaternionNormalize(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1421 {
1422     FLOAT norm;
1423 
1424     TRACE("out %p, q %p\n", out, q);
1425 
1426     norm = D3DXQuaternionLength(q);
1427 
1428     out->x = q->x / norm;
1429     out->y = q->y / norm;
1430     out->z = q->z / norm;
1431     out->w = q->w / norm;
1432 
1433     return out;
1434 }
1435 
1436 D3DXQUATERNION * WINAPI D3DXQuaternionRotationAxis(D3DXQUATERNION *out, const D3DXVECTOR3 *v, FLOAT angle)
1437 {
1438     D3DXVECTOR3 temp;
1439 
1440     TRACE("out %p, v %p, angle %f\n", out, v, angle);
1441 
1442     D3DXVec3Normalize(&temp, v);
1443 
1444     out->x = sinf(angle / 2.0f) * temp.x;
1445     out->y = sinf(angle / 2.0f) * temp.y;
1446     out->z = sinf(angle / 2.0f) * temp.z;
1447     out->w = cosf(angle / 2.0f);
1448 
1449     return out;
1450 }
1451 
1452 D3DXQUATERNION * WINAPI D3DXQuaternionRotationMatrix(D3DXQUATERNION *out, const D3DXMATRIX *m)
1453 {
1454     FLOAT s, trace;
1455 
1456     TRACE("out %p, m %p\n", out, m);
1457 
1458     trace = m->u.m[0][0] + m->u.m[1][1] + m->u.m[2][2] + 1.0f;
1459     if (trace > 1.0f)
1460     {
1461         s = 2.0f * sqrtf(trace);
1462         out->x = (m->u.m[1][2] - m->u.m[2][1]) / s;
1463         out->y = (m->u.m[2][0] - m->u.m[0][2]) / s;
1464         out->z = (m->u.m[0][1] - m->u.m[1][0]) / s;
1465         out->w = 0.25f * s;
1466     }
1467     else
1468     {
1469         int i, maxi = 0;
1470 
1471         for (i = 1; i < 3; i++)
1472         {
1473             if (m->u.m[i][i] > m->u.m[maxi][maxi])
1474                 maxi = i;
1475         }
1476 
1477         switch (maxi)
1478         {
1479             case 0:
1480                 s = 2.0f * sqrtf(1.0f + m->u.m[0][0] - m->u.m[1][1] - m->u.m[2][2]);
1481                 out->x = 0.25f * s;
1482                 out->y = (m->u.m[0][1] + m->u.m[1][0]) / s;
1483                 out->z = (m->u.m[0][2] + m->u.m[2][0]) / s;
1484                 out->w = (m->u.m[1][2] - m->u.m[2][1]) / s;
1485                 break;
1486 
1487             case 1:
1488                 s = 2.0f * sqrtf(1.0f + m->u.m[1][1] - m->u.m[0][0] - m->u.m[2][2]);
1489                 out->x = (m->u.m[0][1] + m->u.m[1][0]) / s;
1490                 out->y = 0.25f * s;
1491                 out->z = (m->u.m[1][2] + m->u.m[2][1]) / s;
1492                 out->w = (m->u.m[2][0] - m->u.m[0][2]) / s;
1493                 break;
1494 
1495             case 2:
1496                 s = 2.0f * sqrtf(1.0f + m->u.m[2][2] - m->u.m[0][0] - m->u.m[1][1]);
1497                 out->x = (m->u.m[0][2] + m->u.m[2][0]) / s;
1498                 out->y = (m->u.m[1][2] + m->u.m[2][1]) / s;
1499                 out->z = 0.25f * s;
1500                 out->w = (m->u.m[0][1] - m->u.m[1][0]) / s;
1501                 break;
1502         }
1503     }
1504 
1505     return out;
1506 }
1507 
1508 D3DXQUATERNION * WINAPI D3DXQuaternionRotationYawPitchRoll(D3DXQUATERNION *out, FLOAT yaw, FLOAT pitch, FLOAT roll)
1509 {
1510     FLOAT syaw, cyaw, spitch, cpitch, sroll, croll;
1511 
1512     TRACE("out %p, yaw %f, pitch %f, roll %f\n", out, yaw, pitch, roll);
1513 
1514     syaw = sinf(yaw / 2.0f);
1515     cyaw = cosf(yaw / 2.0f);
1516     spitch = sinf(pitch / 2.0f);
1517     cpitch = cosf(pitch / 2.0f);
1518     sroll = sinf(roll / 2.0f);
1519     croll = cosf(roll / 2.0f);
1520 
1521     out->x = syaw * cpitch * sroll + cyaw * spitch * croll;
1522     out->y = syaw * cpitch * croll - cyaw * spitch * sroll;
1523     out->z = cyaw * cpitch * sroll - syaw * spitch * croll;
1524     out->w = cyaw * cpitch * croll + syaw * spitch * sroll;
1525 
1526     return out;
1527 }
1528 
1529 D3DXQUATERNION * WINAPI D3DXQuaternionSlerp(D3DXQUATERNION *out, const D3DXQUATERNION *q1,
1530         const D3DXQUATERNION *q2, FLOAT t)
1531 {
1532     FLOAT dot, temp;
1533 
1534     TRACE("out %p, q1 %p, q2 %p, t %f\n", out, q1, q2, t);
1535 
1536     temp = 1.0f - t;
1537     dot = D3DXQuaternionDot(q1, q2);
1538     if (dot < 0.0f)
1539     {
1540         t = -t;
1541         dot = -dot;
1542     }
1543 
1544     if (1.0f - dot > 0.001f)
1545     {
1546         FLOAT theta = acosf(dot);
1547 
1548         temp = sinf(theta * temp) / sinf(theta);
1549         t = sinf(theta * t) / sinf(theta);
1550     }
1551 
1552     out->x = temp * q1->x + t * q2->x;
1553     out->y = temp * q1->y + t * q2->y;
1554     out->z = temp * q1->z + t * q2->z;
1555     out->w = temp * q1->w + t * q2->w;
1556 
1557     return out;
1558 }
1559 
1560 D3DXQUATERNION* WINAPI D3DXQuaternionSquad(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3, const D3DXQUATERNION *pq4, FLOAT t)
1561 {
1562     D3DXQUATERNION temp1, temp2;
1563 
1564     TRACE("pout %p, pq1 %p, pq2 %p, pq3 %p, pq4 %p, t %f\n", pout, pq1, pq2, pq3, pq4, t);
1565 
1566     D3DXQuaternionSlerp(pout, D3DXQuaternionSlerp(&temp1, pq1, pq4, t), D3DXQuaternionSlerp(&temp2, pq2, pq3, t), 2.0f * t * (1.0f - t));
1567     return pout;
1568 }
1569 
1570 static D3DXQUATERNION add_diff(const D3DXQUATERNION *q1, const D3DXQUATERNION *q2, const FLOAT add)
1571 {
1572     D3DXQUATERNION temp;
1573 
1574     temp.x = q1->x + add * q2->x;
1575     temp.y = q1->y + add * q2->y;
1576     temp.z = q1->z + add * q2->z;
1577     temp.w = q1->w + add * q2->w;
1578 
1579     return temp;
1580 }
1581 
1582 void WINAPI D3DXQuaternionSquadSetup(D3DXQUATERNION *paout, D3DXQUATERNION *pbout, D3DXQUATERNION *pcout, const D3DXQUATERNION *pq0, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3)
1583 {
1584     D3DXQUATERNION q, temp1, temp2, temp3, zero;
1585     D3DXQUATERNION aout, cout;
1586 
1587     TRACE("paout %p, pbout %p, pcout %p, pq0 %p, pq1 %p, pq2 %p, pq3 %p\n", paout, pbout, pcout, pq0, pq1, pq2, pq3);
1588 
1589     zero.x = 0.0f;
1590     zero.y = 0.0f;
1591     zero.z = 0.0f;
1592     zero.w = 0.0f;
1593 
1594     if (D3DXQuaternionDot(pq0, pq1) < 0.0f)
1595         temp2 = add_diff(&zero, pq0, -1.0f);
1596     else
1597         temp2 = *pq0;
1598 
1599     if (D3DXQuaternionDot(pq1, pq2) < 0.0f)
1600         cout = add_diff(&zero, pq2, -1.0f);
1601     else
1602         cout = *pq2;
1603 
1604     if (D3DXQuaternionDot(&cout, pq3) < 0.0f)
1605         temp3 = add_diff(&zero, pq3, -1.0f);
1606     else
1607         temp3 = *pq3;
1608 
1609     D3DXQuaternionInverse(&temp1, pq1);
1610     D3DXQuaternionMultiply(&temp2, &temp1, &temp2);
1611     D3DXQuaternionLn(&temp2, &temp2);
1612     D3DXQuaternionMultiply(&q, &temp1, &cout);
1613     D3DXQuaternionLn(&q, &q);
1614     temp1 = add_diff(&temp2, &q, 1.0f);
1615     temp1.x *= -0.25f;
1616     temp1.y *= -0.25f;
1617     temp1.z *= -0.25f;
1618     temp1.w *= -0.25f;
1619     D3DXQuaternionExp(&temp1, &temp1);
1620     D3DXQuaternionMultiply(&aout, pq1, &temp1);
1621 
1622     D3DXQuaternionInverse(&temp1, &cout);
1623     D3DXQuaternionMultiply(&temp2, &temp1, pq1);
1624     D3DXQuaternionLn(&temp2, &temp2);
1625     D3DXQuaternionMultiply(&q, &temp1, &temp3);
1626     D3DXQuaternionLn(&q, &q);
1627     temp1 = add_diff(&temp2, &q, 1.0f);
1628     temp1.x *= -0.25f;
1629     temp1.y *= -0.25f;
1630     temp1.z *= -0.25f;
1631     temp1.w *= -0.25f;
1632     D3DXQuaternionExp(&temp1, &temp1);
1633     D3DXQuaternionMultiply(pbout, &cout, &temp1);
1634     *paout = aout;
1635     *pcout = cout;
1636 }
1637 
1638 void WINAPI D3DXQuaternionToAxisAngle(const D3DXQUATERNION *pq, D3DXVECTOR3 *paxis, FLOAT *pangle)
1639 {
1640     TRACE("pq %p, paxis %p, pangle %p\n", pq, paxis, pangle);
1641 
1642     if (paxis)
1643     {
1644         paxis->x = pq->x;
1645         paxis->y = pq->y;
1646         paxis->z = pq->z;
1647     }
1648     if (pangle)
1649         *pangle = 2.0f * acosf(pq->w);
1650 }
1651 
1652 /*_________________D3DXVec2_____________________*/
1653 
1654 D3DXVECTOR2* WINAPI D3DXVec2BaryCentric(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pv3, FLOAT f, FLOAT g)
1655 {
1656     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
1657 
1658     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
1659     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
1660     return pout;
1661 }
1662 
1663 D3DXVECTOR2* WINAPI D3DXVec2CatmullRom(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv0, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pv3, FLOAT s)
1664 {
1665     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
1666 
1667     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
1668     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
1669     return pout;
1670 }
1671 
1672 D3DXVECTOR2* WINAPI D3DXVec2Hermite(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pt1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pt2, FLOAT s)
1673 {
1674     FLOAT h1, h2, h3, h4;
1675 
1676     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
1677 
1678     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
1679     h2 = s * s * s - 2.0f * s * s + s;
1680     h3 = -2.0f * s * s * s + 3.0f * s * s;
1681     h4 = s * s * s - s * s;
1682 
1683     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
1684     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
1685     return pout;
1686 }
1687 
1688 D3DXVECTOR2* WINAPI D3DXVec2Normalize(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv)
1689 {
1690     FLOAT norm;
1691 
1692     TRACE("pout %p, pv %p\n", pout, pv);
1693 
1694     norm = D3DXVec2Length(pv);
1695     if ( !norm )
1696     {
1697         pout->x = 0.0f;
1698         pout->y = 0.0f;
1699     }
1700     else
1701     {
1702         pout->x = pv->x / norm;
1703         pout->y = pv->y / norm;
1704     }
1705 
1706     return pout;
1707 }
1708 
1709 D3DXVECTOR4* WINAPI D3DXVec2Transform(D3DXVECTOR4 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1710 {
1711     D3DXVECTOR4 out;
1712 
1713     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1714 
1715     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y  + pm->u.m[3][0];
1716     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y  + pm->u.m[3][1];
1717     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y  + pm->u.m[3][2];
1718     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y  + pm->u.m[3][3];
1719     *pout = out;
1720     return pout;
1721 }
1722 
1723 D3DXVECTOR4* WINAPI D3DXVec2TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR2* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1724 {
1725     UINT i;
1726 
1727     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1728 
1729     for (i = 0; i < elements; ++i) {
1730         D3DXVec2Transform(
1731             (D3DXVECTOR4*)((char*)out + outstride * i),
1732             (const D3DXVECTOR2*)((const char*)in + instride * i),
1733             matrix);
1734     }
1735     return out;
1736 }
1737 
1738 D3DXVECTOR2* WINAPI D3DXVec2TransformCoord(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1739 {
1740     D3DXVECTOR2 v;
1741     FLOAT norm;
1742 
1743     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1744 
1745     v = *pv;
1746     norm = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[3][3];
1747 
1748     pout->x = (pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y + pm->u.m[3][0]) / norm;
1749     pout->y = (pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y + pm->u.m[3][1]) / norm;
1750 
1751     return pout;
1752 }
1753 
1754 D3DXVECTOR2* WINAPI D3DXVec2TransformCoordArray(D3DXVECTOR2* out, UINT outstride, const D3DXVECTOR2* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1755 {
1756     UINT i;
1757 
1758     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1759 
1760     for (i = 0; i < elements; ++i) {
1761         D3DXVec2TransformCoord(
1762             (D3DXVECTOR2*)((char*)out + outstride * i),
1763             (const D3DXVECTOR2*)((const char*)in + instride * i),
1764             matrix);
1765     }
1766     return out;
1767 }
1768 
1769 D3DXVECTOR2* WINAPI D3DXVec2TransformNormal(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1770 {
1771     const D3DXVECTOR2 v = *pv;
1772 
1773     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1774 
1775     pout->x = pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y;
1776     pout->y = pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y;
1777     return pout;
1778 }
1779 
1780 D3DXVECTOR2* WINAPI D3DXVec2TransformNormalArray(D3DXVECTOR2* out, UINT outstride, const D3DXVECTOR2 *in, UINT instride, const D3DXMATRIX *matrix, UINT elements)
1781 {
1782     UINT i;
1783 
1784     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1785 
1786     for (i = 0; i < elements; ++i) {
1787         D3DXVec2TransformNormal(
1788             (D3DXVECTOR2*)((char*)out + outstride * i),
1789             (const D3DXVECTOR2*)((const char*)in + instride * i),
1790             matrix);
1791     }
1792     return out;
1793 }
1794 
1795 /*_________________D3DXVec3_____________________*/
1796 
1797 D3DXVECTOR3* WINAPI D3DXVec3BaryCentric(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3, FLOAT f, FLOAT g)
1798 {
1799     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
1800 
1801     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
1802     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
1803     pout->z = (1.0f-f-g) * (pv1->z) + f * (pv2->z) + g * (pv3->z);
1804     return pout;
1805 }
1806 
1807 D3DXVECTOR3* WINAPI D3DXVec3CatmullRom( D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv0, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3, FLOAT s)
1808 {
1809     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
1810 
1811     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
1812     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
1813     pout->z = 0.5f * (2.0f * pv1->z + (pv2->z - pv0->z) *s + (2.0f *pv0->z - 5.0f * pv1->z + 4.0f * pv2->z - pv3->z) * s * s + (pv3->z -3.0f * pv2->z + 3.0f * pv1->z - pv0->z) * s * s * s);
1814     return pout;
1815 }
1816 
1817 D3DXVECTOR3* WINAPI D3DXVec3Hermite(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pt1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pt2, FLOAT s)
1818 {
1819     FLOAT h1, h2, h3, h4;
1820 
1821     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
1822 
1823     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
1824     h2 = s * s * s - 2.0f * s * s + s;
1825     h3 = -2.0f * s * s * s + 3.0f * s * s;
1826     h4 = s * s * s - s * s;
1827 
1828     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
1829     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
1830     pout->z = h1 * (pv1->z) + h2 * (pt1->z) + h3 * (pv2->z) + h4 * (pt2->z);
1831     return pout;
1832 }
1833 
1834 D3DXVECTOR3* WINAPI D3DXVec3Normalize(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv)
1835 {
1836     FLOAT norm;
1837 
1838     TRACE("pout %p, pv %p\n", pout, pv);
1839 
1840     norm = D3DXVec3Length(pv);
1841     if ( !norm )
1842     {
1843         pout->x = 0.0f;
1844         pout->y = 0.0f;
1845         pout->z = 0.0f;
1846     }
1847     else
1848     {
1849         pout->x = pv->x / norm;
1850         pout->y = pv->y / norm;
1851         pout->z = pv->z / norm;
1852     }
1853 
1854     return pout;
1855 }
1856 
1857 D3DXVECTOR3* WINAPI D3DXVec3Project(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DVIEWPORT9 *pviewport, const D3DXMATRIX *pprojection, const D3DXMATRIX *pview, const D3DXMATRIX *pworld)
1858 {
1859     D3DXMATRIX m;
1860 
1861     TRACE("pout %p, pv %p, pviewport %p, pprojection %p, pview %p, pworld %p\n", pout, pv, pviewport, pprojection, pview, pworld);
1862 
1863     D3DXMatrixIdentity(&m);
1864     if (pworld) D3DXMatrixMultiply(&m, &m, pworld);
1865     if (pview) D3DXMatrixMultiply(&m, &m, pview);
1866     if (pprojection) D3DXMatrixMultiply(&m, &m, pprojection);
1867 
1868     D3DXVec3TransformCoord(pout, pv, &m);
1869 
1870     if (pviewport)
1871     {
1872         pout->x = pviewport->X +  ( 1.0f + pout->x ) * pviewport->Width / 2.0f;
1873         pout->y = pviewport->Y +  ( 1.0f - pout->y ) * pviewport->Height / 2.0f;
1874         pout->z = pviewport->MinZ + pout->z * ( pviewport->MaxZ - pviewport->MinZ );
1875     }
1876     return pout;
1877 }
1878 
1879 D3DXVECTOR3* WINAPI D3DXVec3ProjectArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DVIEWPORT9* viewport, const D3DXMATRIX* projection, const D3DXMATRIX* view, const D3DXMATRIX* world, UINT elements)
1880 {
1881     UINT i;
1882 
1883     TRACE("out %p, outstride %u, in %p, instride %u, viewport %p, projection %p, view %p, world %p, elements %u\n",
1884         out, outstride, in, instride, viewport, projection, view, world, elements);
1885 
1886     for (i = 0; i < elements; ++i) {
1887         D3DXVec3Project(
1888             (D3DXVECTOR3*)((char*)out + outstride * i),
1889             (const D3DXVECTOR3*)((const char*)in + instride * i),
1890             viewport, projection, view, world);
1891     }
1892     return out;
1893 }
1894 
1895 D3DXVECTOR4* WINAPI D3DXVec3Transform(D3DXVECTOR4 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1896 {
1897     D3DXVECTOR4 out;
1898 
1899     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1900 
1901     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0];
1902     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1];
1903     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2];
1904     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] * pv->z + pm->u.m[3][3];
1905     *pout = out;
1906     return pout;
1907 }
1908 
1909 D3DXVECTOR4* WINAPI D3DXVec3TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1910 {
1911     UINT i;
1912 
1913     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1914 
1915     for (i = 0; i < elements; ++i) {
1916         D3DXVec3Transform(
1917             (D3DXVECTOR4*)((char*)out + outstride * i),
1918             (const D3DXVECTOR3*)((const char*)in + instride * i),
1919             matrix);
1920     }
1921     return out;
1922 }
1923 
1924 D3DXVECTOR3* WINAPI D3DXVec3TransformCoord(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1925 {
1926     D3DXVECTOR3 out;
1927     FLOAT norm;
1928 
1929     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1930 
1931     norm = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] *pv->z + pm->u.m[3][3];
1932 
1933     out.x = (pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0]) / norm;
1934     out.y = (pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1]) / norm;
1935     out.z = (pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2]) / norm;
1936 
1937     *pout = out;
1938 
1939     return pout;
1940 }
1941 
1942 D3DXVECTOR3* WINAPI D3DXVec3TransformCoordArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1943 {
1944     UINT i;
1945 
1946     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1947 
1948     for (i = 0; i < elements; ++i) {
1949         D3DXVec3TransformCoord(
1950             (D3DXVECTOR3*)((char*)out + outstride * i),
1951             (const D3DXVECTOR3*)((const char*)in + instride * i),
1952             matrix);
1953     }
1954     return out;
1955 }
1956 
1957 D3DXVECTOR3* WINAPI D3DXVec3TransformNormal(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1958 {
1959     const D3DXVECTOR3 v = *pv;
1960 
1961     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1962 
1963     pout->x = pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y + pm->u.m[2][0] * v.z;
1964     pout->y = pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y + pm->u.m[2][1] * v.z;
1965     pout->z = pm->u.m[0][2] * v.x + pm->u.m[1][2] * v.y + pm->u.m[2][2] * v.z;
1966     return pout;
1967 
1968 }
1969 
1970 D3DXVECTOR3* WINAPI D3DXVec3TransformNormalArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1971 {
1972     UINT i;
1973 
1974     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1975 
1976     for (i = 0; i < elements; ++i) {
1977         D3DXVec3TransformNormal(
1978             (D3DXVECTOR3*)((char*)out + outstride * i),
1979             (const D3DXVECTOR3*)((const char*)in + instride * i),
1980             matrix);
1981     }
1982     return out;
1983 }
1984 
1985 D3DXVECTOR3* WINAPI D3DXVec3Unproject(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DVIEWPORT9 *pviewport, const D3DXMATRIX *pprojection, const D3DXMATRIX *pview, const D3DXMATRIX *pworld)
1986 {
1987     D3DXMATRIX m;
1988 
1989     TRACE("pout %p, pv %p, pviewport %p, pprojection %p, pview %p, pworlds %p\n", pout, pv, pviewport, pprojection, pview, pworld);
1990 
1991     D3DXMatrixIdentity(&m);
1992     if (pworld) D3DXMatrixMultiply(&m, &m, pworld);
1993     if (pview) D3DXMatrixMultiply(&m, &m, pview);
1994     if (pprojection) D3DXMatrixMultiply(&m, &m, pprojection);
1995     D3DXMatrixInverse(&m, NULL, &m);
1996 
1997     *pout = *pv;
1998     if (pviewport)
1999     {
2000         pout->x = 2.0f * ( pout->x - pviewport->X ) / pviewport->Width - 1.0f;
2001         pout->y = 1.0f - 2.0f * ( pout->y - pviewport->Y ) / pviewport->Height;
2002         pout->z = ( pout->z - pviewport->MinZ) / ( pviewport->MaxZ - pviewport->MinZ );
2003     }
2004     D3DXVec3TransformCoord(pout, pout, &m);
2005     return pout;
2006 }
2007 
2008 D3DXVECTOR3* WINAPI D3DXVec3UnprojectArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DVIEWPORT9* viewport, const D3DXMATRIX* projection, const D3DXMATRIX* view, const D3DXMATRIX* world, UINT elements)
2009 {
2010     UINT i;
2011 
2012     TRACE("out %p, outstride %u, in %p, instride %u, viewport %p, projection %p, view %p, world %p, elements %u\n",
2013         out, outstride, in, instride, viewport, projection, view, world, elements);
2014 
2015     for (i = 0; i < elements; ++i) {
2016         D3DXVec3Unproject(
2017             (D3DXVECTOR3*)((char*)out + outstride * i),
2018             (const D3DXVECTOR3*)((const char*)in + instride * i),
2019             viewport, projection, view, world);
2020     }
2021     return out;
2022 }
2023 
2024 /*_________________D3DXVec4_____________________*/
2025 
2026 D3DXVECTOR4* WINAPI D3DXVec4BaryCentric(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3, FLOAT f, FLOAT g)
2027 {
2028     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
2029 
2030     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
2031     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
2032     pout->z = (1.0f-f-g) * (pv1->z) + f * (pv2->z) + g * (pv3->z);
2033     pout->w = (1.0f-f-g) * (pv1->w) + f * (pv2->w) + g * (pv3->w);
2034     return pout;
2035 }
2036 
2037 D3DXVECTOR4* WINAPI D3DXVec4CatmullRom(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv0, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3, FLOAT s)
2038 {
2039     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
2040 
2041     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
2042     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
2043     pout->z = 0.5f * (2.0f * pv1->z + (pv2->z - pv0->z) *s + (2.0f *pv0->z - 5.0f * pv1->z + 4.0f * pv2->z - pv3->z) * s * s + (pv3->z -3.0f * pv2->z + 3.0f * pv1->z - pv0->z) * s * s * s);
2044     pout->w = 0.5f * (2.0f * pv1->w + (pv2->w - pv0->w) *s + (2.0f *pv0->w - 5.0f * pv1->w + 4.0f * pv2->w - pv3->w) * s * s + (pv3->w -3.0f * pv2->w + 3.0f * pv1->w - pv0->w) * s * s * s);
2045     return pout;
2046 }
2047 
2048 D3DXVECTOR4* WINAPI D3DXVec4Cross(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3)
2049 {
2050     D3DXVECTOR4 out;
2051 
2052     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p\n", pout, pv1, pv2, pv3);
2053 
2054     out.x = pv1->y * (pv2->z * pv3->w - pv3->z * pv2->w) - pv1->z * (pv2->y * pv3->w - pv3->y * pv2->w) + pv1->w * (pv2->y * pv3->z - pv2->z *pv3->y);
2055     out.y = -(pv1->x * (pv2->z * pv3->w - pv3->z * pv2->w) - pv1->z * (pv2->x * pv3->w - pv3->x * pv2->w) + pv1->w * (pv2->x * pv3->z - pv3->x * pv2->z));
2056     out.z = pv1->x * (pv2->y * pv3->w - pv3->y * pv2->w) - pv1->y * (pv2->x *pv3->w - pv3->x * pv2->w) + pv1->w * (pv2->x * pv3->y - pv3->x * pv2->y);
2057     out.w = -(pv1->x * (pv2->y * pv3->z - pv3->y * pv2->z) - pv1->y * (pv2->x * pv3->z - pv3->x *pv2->z) + pv1->z * (pv2->x * pv3->y - pv3->x * pv2->y));
2058     *pout = out;
2059     return pout;
2060 }
2061 
2062 D3DXVECTOR4* WINAPI D3DXVec4Hermite(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pt1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pt2, FLOAT s)
2063 {
2064     FLOAT h1, h2, h3, h4;
2065 
2066     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
2067 
2068     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
2069     h2 = s * s * s - 2.0f * s * s + s;
2070     h3 = -2.0f * s * s * s + 3.0f * s * s;
2071     h4 = s * s * s - s * s;
2072 
2073     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
2074     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
2075     pout->z = h1 * (pv1->z) + h2 * (pt1->z) + h3 * (pv2->z) + h4 * (pt2->z);
2076     pout->w = h1 * (pv1->w) + h2 * (pt1->w) + h3 * (pv2->w) + h4 * (pt2->w);
2077     return pout;
2078 }
2079 
2080 D3DXVECTOR4* WINAPI D3DXVec4Normalize(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv)
2081 {
2082     FLOAT norm;
2083 
2084     TRACE("pout %p, pv %p\n", pout, pv);
2085 
2086     norm = D3DXVec4Length(pv);
2087 
2088     pout->x = pv->x / norm;
2089     pout->y = pv->y / norm;
2090     pout->z = pv->z / norm;
2091     pout->w = pv->w / norm;
2092 
2093     return pout;
2094 }
2095 
2096 D3DXVECTOR4* WINAPI D3DXVec4Transform(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv, const D3DXMATRIX *pm)
2097 {
2098     D3DXVECTOR4 out;
2099 
2100     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
2101 
2102     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0] * pv->w;
2103     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1] * pv->w;
2104     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2] * pv->w;
2105     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] * pv->z + pm->u.m[3][3] * pv->w;
2106     *pout = out;
2107     return pout;
2108 }
2109 
2110 D3DXVECTOR4* WINAPI D3DXVec4TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR4* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
2111 {
2112     UINT i;
2113 
2114     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
2115 
2116     for (i = 0; i < elements; ++i) {
2117         D3DXVec4Transform(
2118             (D3DXVECTOR4*)((char*)out + outstride * i),
2119             (const D3DXVECTOR4*)((const char*)in + instride * i),
2120             matrix);
2121     }
2122     return out;
2123 }
2124 
2125 unsigned short float_32_to_16(const float in)
2126 {
2127     int exp = 0, origexp;
2128     float tmp = fabsf(in);
2129     int sign = (copysignf(1, in) < 0);
2130     unsigned int mantissa;
2131     unsigned short ret;
2132 
2133     /* Deal with special numbers */
2134     if (isinf(in)) return (sign ? 0xffff : 0x7fff);
2135     if (isnan(in)) return (sign ? 0xffff : 0x7fff);
2136     if (in == 0.0f) return (sign ? 0x8000 : 0x0000);
2137 
2138     if (tmp < (float)(1u << 10))
2139     {
2140         do
2141         {
2142             tmp *= 2.0f;
2143             exp--;
2144         } while (tmp < (float)(1u << 10));
2145     }
2146     else if (tmp >= (float)(1u << 11))
2147     {
2148         do
2149         {
2150             tmp /= 2.0f;
2151             exp++;
2152         } while (tmp >= (float)(1u << 11));
2153     }
2154 
2155     exp += 10;  /* Normalize the mantissa */
2156     exp += 15;  /* Exponent is encoded with excess 15 */
2157 
2158     origexp = exp;
2159 
2160     mantissa = (unsigned int) tmp;
2161     if ((tmp - mantissa == 0.5f && mantissa % 2 == 1) || /* round half to even */
2162         (tmp - mantissa > 0.5f))
2163     {
2164         mantissa++; /* round to nearest, away from zero */
2165     }
2166     if (mantissa == 2048)
2167     {
2168         mantissa = 1024;
2169         exp++;
2170     }
2171 
2172     if (exp > 31)
2173     {
2174         /* too big */
2175         ret = 0x7fff; /* INF */
2176     }
2177     else if (exp <= 0)
2178     {
2179         unsigned int rounding = 0;
2180 
2181         /* Denormalized half float */
2182 
2183         /* return 0x0000 (=0.0) for numbers too small to represent in half floats */
2184         if (exp < -11)
2185             return (sign ? 0x8000 : 0x0000);
2186 
2187         exp = origexp;
2188 
2189         /* the 13 extra bits from single precision are used for rounding */
2190         mantissa = (unsigned int)(tmp * (1u << 13));
2191         mantissa >>= 1 - exp; /* denormalize */
2192 
2193         mantissa -= ~(mantissa >> 13) & 1; /* round half to even */
2194         /* remove 13 least significant bits to get half float precision */
2195         mantissa >>= 12;
2196         rounding = mantissa & 1;
2197         mantissa >>= 1;
2198 
2199         ret = mantissa + rounding;
2200     }
2201     else
2202     {
2203         ret = (exp << 10) | (mantissa & 0x3ff);
2204     }
2205 
2206     ret |= ((sign ? 1 : 0) << 15); /* Add the sign */
2207     return ret;
2208 }
2209 
2210 D3DXFLOAT16 *WINAPI D3DXFloat32To16Array(D3DXFLOAT16 *pout, const FLOAT *pin, UINT n)
2211 {
2212     unsigned int i;
2213 
2214     TRACE("pout %p, pin %p, n %u\n", pout, pin, n);
2215 
2216     for (i = 0; i < n; ++i)
2217     {
2218         pout[i].value = float_32_to_16(pin[i]);
2219     }
2220 
2221     return pout;
2222 }
2223 
2224 /* Native d3dx9's D3DXFloat16to32Array lacks support for NaN and Inf. Specifically, e = 16 is treated as a
2225  * regular number - e.g., 0x7fff is converted to 131008.0 and 0xffff to -131008.0. */
2226 float float_16_to_32(const unsigned short in)
2227 {
2228     const unsigned short s = (in & 0x8000);
2229     const unsigned short e = (in & 0x7C00) >> 10;
2230     const unsigned short m = in & 0x3FF;
2231     const float sgn = (s ? -1.0f : 1.0f);
2232 
2233     if (e == 0)
2234     {
2235         if (m == 0) return sgn * 0.0f; /* +0.0 or -0.0 */
2236         else return sgn * powf(2, -14.0f) * (m / 1024.0f);
2237     }
2238     else
2239     {
2240         return sgn * powf(2, e - 15.0f) * (1.0f + (m / 1024.0f));
2241     }
2242 }
2243 
2244 FLOAT *WINAPI D3DXFloat16To32Array(FLOAT *pout, const D3DXFLOAT16 *pin, UINT n)
2245 {
2246     unsigned int i;
2247 
2248     TRACE("pout %p, pin %p, n %u\n", pout, pin, n);
2249 
2250     for (i = 0; i < n; ++i)
2251     {
2252         pout[i] = float_16_to_32(pin[i].value);
2253     }
2254 
2255     return pout;
2256 }
2257 
2258 /*_________________D3DXSH________________*/
2259 
2260 FLOAT* WINAPI D3DXSHAdd(FLOAT *out, UINT order, const FLOAT *a, const FLOAT *b)
2261 {
2262     UINT i;
2263 
2264     TRACE("out %p, order %u, a %p, b %p\n", out, order, a, b);
2265 
2266     for (i = 0; i < order * order; i++)
2267         out[i] = a[i] + b[i];
2268 
2269     return out;
2270 }
2271 
2272 FLOAT WINAPI D3DXSHDot(UINT order, const FLOAT *a, const FLOAT *b)
2273 {
2274     FLOAT s;
2275     UINT i;
2276 
2277     TRACE("order %u, a %p, b %p\n", order, a, b);
2278 
2279     s = a[0] * b[0];
2280     for (i = 1; i < order * order; i++)
2281         s += a[i] * b[i];
2282 
2283     return s;
2284 }
2285 
2286 static void weightedcapintegrale(FLOAT *out, UINT order, FLOAT angle)
2287 {
2288     FLOAT coeff[3];
2289 
2290     coeff[0] = cosf(angle);
2291 
2292     out[0] = 2.0f * D3DX_PI * (1.0f - coeff[0]);
2293     out[1] = D3DX_PI * sinf(angle) * sinf(angle);
2294     if (order <= 2)
2295         return;
2296 
2297     out[2] = coeff[0] * out[1];
2298     if (order == 3)
2299         return;
2300 
2301     coeff[1] = coeff[0] * coeff[0];
2302     coeff[2] = coeff[1] * coeff[1];
2303 
2304     out[3] = D3DX_PI * (-1.25f * coeff[2] + 1.5f * coeff[1] - 0.25f);
2305     if (order == 4)
2306         return;
2307 
2308     out[4] = -0.25f * D3DX_PI * coeff[0] * (7.0f * coeff[2] - 10.0f * coeff[1] + 3.0f);
2309     if (order == 5)
2310         return;
2311 
2312     out[5] = D3DX_PI * (-2.625f * coeff[2] * coeff[1] + 4.375f * coeff[2] - 1.875f * coeff[1] + 0.125f);
2313 }
2314 
2315 HRESULT WINAPI D3DXSHEvalConeLight(UINT order, const D3DXVECTOR3 *dir, FLOAT radius,
2316     FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *rout, FLOAT *gout, FLOAT *bout)
2317 {
2318     FLOAT cap[6], clamped_angle, norm, scale, temp;
2319     UINT i, index, j;
2320 
2321     TRACE("order %u, dir %p, radius %f, red %f, green %f, blue %f, rout %p, gout %p, bout %p\n",
2322         order, dir, radius, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2323 
2324     if (radius <= 0.0f)
2325         return D3DXSHEvalDirectionalLight(order, dir, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2326 
2327     clamped_angle = (radius > D3DX_PI / 2.0f) ? (D3DX_PI / 2.0f) : radius;
2328     norm = sinf(clamped_angle) * sinf(clamped_angle);
2329 
2330     if (order > D3DXSH_MAXORDER)
2331     {
2332         WARN("Order clamped at D3DXSH_MAXORDER\n");
2333         order = D3DXSH_MAXORDER;
2334     }
2335 
2336     weightedcapintegrale(cap, order, radius);
2337     D3DXSHEvalDirection(rout, order, dir);
2338 
2339     for (i = 0; i < order; i++)
2340     {
2341         scale = cap[i] / norm;
2342 
2343         for (j = 0; j < 2 * i + 1; j++)
2344         {
2345             index = i * i + j;
2346             temp = rout[index] * scale;
2347 
2348             rout[index] = temp * Rintensity;
2349             if (gout)
2350                 gout[index] = temp * Gintensity;
2351             if (bout)
2352                 bout[index] = temp * Bintensity;
2353         }
2354     }
2355 
2356     return D3D_OK;
2357 }
2358 
2359 FLOAT* WINAPI D3DXSHEvalDirection(FLOAT *out, UINT order, const D3DXVECTOR3 *dir)
2360 {
2361     const FLOAT dirxx = dir->x * dir->x;
2362     const FLOAT dirxy = dir->x * dir->y;
2363     const FLOAT dirxz = dir->x * dir->z;
2364     const FLOAT diryy = dir->y * dir->y;
2365     const FLOAT diryz = dir->y * dir->z;
2366     const FLOAT dirzz = dir->z * dir->z;
2367     const FLOAT dirxxxx = dirxx * dirxx;
2368     const FLOAT diryyyy = diryy * diryy;
2369     const FLOAT dirzzzz = dirzz * dirzz;
2370     const FLOAT dirxyxy = dirxy * dirxy;
2371 
2372     TRACE("out %p, order %u, dir %p\n", out, order, dir);
2373 
2374     if ((order < D3DXSH_MINORDER) || (order > D3DXSH_MAXORDER))
2375         return out;
2376 
2377     out[0] = 0.5f / sqrtf(D3DX_PI);
2378     out[1] = -0.5f / sqrtf(D3DX_PI / 3.0f) * dir->y;
2379     out[2] = 0.5f / sqrtf(D3DX_PI / 3.0f) * dir->z;
2380     out[3] = -0.5f / sqrtf(D3DX_PI / 3.0f) * dir->x;
2381     if (order == 2)
2382         return out;
2383 
2384     out[4] = 0.5f / sqrtf(D3DX_PI / 15.0f) * dirxy;
2385     out[5] = -0.5f / sqrtf(D3DX_PI / 15.0f) * diryz;
2386     out[6] = 0.25f / sqrtf(D3DX_PI / 5.0f) * (3.0f * dirzz - 1.0f);
2387     out[7] = -0.5f / sqrtf(D3DX_PI / 15.0f) * dirxz;
2388     out[8] = 0.25f / sqrtf(D3DX_PI / 15.0f) * (dirxx - diryy);
2389     if (order == 3)
2390         return out;
2391 
2392     out[9] = -sqrtf(70.0f / D3DX_PI) / 8.0f * dir->y * (3.0f * dirxx - diryy);
2393     out[10] = sqrtf(105.0f / D3DX_PI) / 2.0f * dirxy * dir->z;
2394     out[11] = -sqrtf(42.0f / D3DX_PI) / 8.0f * dir->y * (-1.0f + 5.0f * dirzz);
2395     out[12] = sqrtf(7.0f / D3DX_PI) / 4.0f * dir->z * (5.0f * dirzz - 3.0f);
2396     out[13] = sqrtf(42.0f / D3DX_PI) / 8.0f * dir->x * (1.0f - 5.0f * dirzz);
2397     out[14] = sqrtf(105.0f / D3DX_PI) / 4.0f * dir->z * (dirxx - diryy);
2398     out[15] = -sqrtf(70.0f / D3DX_PI) / 8.0f * dir->x * (dirxx - 3.0f * diryy);
2399     if (order == 4)
2400         return out;
2401 
2402     out[16] = 0.75f * sqrtf(35.0f / D3DX_PI) * dirxy * (dirxx - diryy);
2403     out[17] = 3.0f * dir->z * out[9];
2404     out[18] = 0.75f * sqrtf(5.0f / D3DX_PI) * dirxy * (7.0f * dirzz - 1.0f);
2405     out[19] = 0.375f * sqrtf(10.0f / D3DX_PI) * diryz * (3.0f - 7.0f * dirzz);
2406     out[20] = 3.0f / (16.0f * sqrtf(D3DX_PI)) * (35.0f * dirzzzz - 30.f * dirzz + 3.0f);
2407     out[21] = 0.375f * sqrtf(10.0f / D3DX_PI) * dirxz * (3.0f - 7.0f * dirzz);
2408     out[22] = 0.375f * sqrtf(5.0f / D3DX_PI) * (dirxx - diryy) * (7.0f * dirzz - 1.0f);
2409     out[23] = 3.0f * dir->z * out[15];
2410     out[24] = 3.0f / 16.0f * sqrtf(35.0f / D3DX_PI) * (dirxxxx - 6.0f * dirxyxy + diryyyy);
2411     if (order == 5)
2412         return out;
2413 
2414     out[25] = -3.0f/ 32.0f * sqrtf(154.0f / D3DX_PI) * dir->y * (5.0f * dirxxxx - 10.0f * dirxyxy + diryyyy);
2415     out[26] = 0.75f * sqrtf(385.0f / D3DX_PI) * dirxy * dir->z * (dirxx - diryy);
2416     out[27] = sqrtf(770.0f / D3DX_PI) / 32.0f * dir->y * (3.0f * dirxx - diryy) * (1.0f - 9.0f * dirzz);
2417     out[28] = sqrtf(1155.0f / D3DX_PI) / 4.0f * dirxy * dir->z * (3.0f * dirzz - 1.0f);
2418     out[29] = sqrtf(165.0f / D3DX_PI) / 16.0f * dir->y * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
2419     out[30] = sqrtf(11.0f / D3DX_PI) / 16.0f * dir->z * (63.0f * dirzzzz - 70.0f * dirzz + 15.0f);
2420     out[31] = sqrtf(165.0f / D3DX_PI) / 16.0f * dir->x * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
2421     out[32] = sqrtf(1155.0f / D3DX_PI) / 8.0f * dir->z * (dirxx - diryy) * (3.0f * dirzz - 1.0f);
2422     out[33] = sqrtf(770.0f / D3DX_PI) / 32.0f * dir->x * (dirxx - 3.0f * diryy) * (1.0f - 9.0f * dirzz);
2423     out[34] = 3.0f / 16.0f * sqrtf(385.0f / D3DX_PI) * dir->z * (dirxxxx - 6.0f * dirxyxy + diryyyy);
2424     out[35] = -3.0f/ 32.0f * sqrtf(154.0f / D3DX_PI) * dir->x * (dirxxxx - 10.0f * dirxyxy + 5.0f * diryyyy);
2425 
2426     return out;
2427 }
2428 
2429 HRESULT WINAPI D3DXSHEvalDirectionalLight(UINT order, const D3DXVECTOR3 *dir, FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *Rout, FLOAT *Gout, FLOAT *Bout)
2430 {
2431     FLOAT s, temp;
2432     UINT j;
2433 
2434     TRACE("Order %u, Vector %p, Red %f, Green %f, Blue %f, Rout %p, Gout %p, Bout %p\n", order, dir, Rintensity, Gintensity, Bintensity, Rout, Gout, Bout);
2435 
2436     s = 0.75f;
2437     if ( order > 2 )
2438         s += 5.0f / 16.0f;
2439     if ( order > 4 )
2440         s -= 3.0f / 32.0f;
2441     s /= D3DX_PI;
2442 
2443     D3DXSHEvalDirection(Rout, order, dir);
2444     for (j = 0; j < order * order; j++)
2445     {
2446         temp = Rout[j] / s;
2447 
2448         Rout[j] = Rintensity * temp;
2449         if ( Gout )
2450             Gout[j] = Gintensity * temp;
2451         if ( Bout )
2452             Bout[j] = Bintensity * temp;
2453     }
2454 
2455     return D3D_OK;
2456 }
2457 
2458 HRESULT WINAPI D3DXSHEvalHemisphereLight(UINT order, const D3DXVECTOR3 *dir, D3DXCOLOR top, D3DXCOLOR bottom,
2459     FLOAT *rout, FLOAT *gout, FLOAT *bout)
2460 {
2461     FLOAT a[2], temp[4];
2462     UINT i, j;
2463 
2464     TRACE("order %u, dir %p, rout %p, gout %p, bout %p\n", order, dir, rout, gout, bout);
2465 
2466     D3DXSHEvalDirection(temp, 2, dir);
2467 
2468     a[0] = (top.r + bottom.r) * 3.0f * D3DX_PI;
2469     a[1] = (top.r - bottom.r) * D3DX_PI;
2470     for (i = 0; i < order; i++)
2471         for (j = 0; j < 2 * i + 1; j++)
2472             if (i < 2)
2473                 rout[i * i + j] = temp[i * i + j] * a[i];
2474             else
2475                 rout[i * i + j] = 0.0f;
2476 
2477     if (gout)
2478     {
2479         a[0] = (top.g + bottom.g) * 3.0f * D3DX_PI;
2480         a[1] = (top.g - bottom.g) * D3DX_PI;
2481         for (i = 0; i < order; i++)
2482             for (j = 0; j < 2 * i + 1; j++)
2483                 if (i < 2)
2484                     gout[i * i + j] = temp[i * i + j] * a[i];
2485                 else
2486                     gout[i * i + j] = 0.0f;
2487     }
2488 
2489     if (bout)
2490     {
2491         a[0] = (top.b + bottom.b) * 3.0f * D3DX_PI;
2492         a[1] = (top.b - bottom.b) * D3DX_PI;
2493         for (i = 0; i < order; i++)
2494             for (j = 0; j < 2 * i + 1; j++)
2495                 if (i < 2)
2496                     bout[i * i + j] = temp[i * i + j] * a[i];
2497                 else
2498                     bout[i * i + j] = 0.0f;
2499     }
2500 
2501     return D3D_OK;
2502 }
2503 
2504 HRESULT WINAPI D3DXSHEvalSphericalLight(UINT order, const D3DXVECTOR3 *dir, FLOAT radius,
2505     FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *rout, FLOAT *gout, FLOAT *bout)
2506 {
2507     D3DXVECTOR3 normal;
2508     FLOAT cap[6], clamped_angle, dist, temp;
2509     UINT i, index, j;
2510 
2511     TRACE("order %u, dir %p, radius %f, red %f, green %f, blue %f, rout %p, gout %p, bout %p\n",
2512         order, dir, radius, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2513 
2514     if (order > D3DXSH_MAXORDER)
2515     {
2516         WARN("Order clamped at D3DXSH_MAXORDER\n");
2517         order = D3DXSH_MAXORDER;
2518     }
2519 
2520     if (radius < 0.0f)
2521         radius = -radius;
2522 
2523     dist = D3DXVec3Length(dir);
2524     clamped_angle = (dist <= radius) ? D3DX_PI / 2.0f : asinf(radius / dist);
2525 
2526     weightedcapintegrale(cap, order, clamped_angle);
2527     D3DXVec3Normalize(&normal, dir);
2528     D3DXSHEvalDirection(rout, order, &normal);
2529 
2530     for (i = 0; i < order; i++)
2531         for (j = 0; j < 2 * i + 1; j++)
2532         {
2533             index = i * i + j;
2534             temp = rout[index] * cap[i];
2535 
2536             rout[index] = temp * Rintensity;
2537             if (gout)
2538                 gout[index] = temp * Gintensity;
2539             if (bout)
2540                 bout[index] = temp * Bintensity;
2541         }
2542 
2543     return D3D_OK;
2544 }
2545 
2546 FLOAT * WINAPI D3DXSHMultiply2(FLOAT *out, const FLOAT *a, const FLOAT *b)
2547 {
2548     FLOAT ta, tb;
2549 
2550     TRACE("out %p, a %p, b %p\n", out, a, b);
2551 
2552     ta = 0.28209479f * a[0];
2553     tb = 0.28209479f * b[0];
2554 
2555     out[0] = 0.28209479f * D3DXSHDot(2, a, b);
2556     out[1] = ta * b[1] + tb * a[1];
2557     out[2] = ta * b[2] + tb * a[2];
2558     out[3] = ta * b[3] + tb * a[3];
2559 
2560     return out;
2561 }
2562 
2563 FLOAT * WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b)
2564 {
2565     FLOAT t, ta, tb;
2566 
2567     TRACE("out %p, a %p, b %p\n", out, a, b);
2568 
2569     out[0] = 0.28209479f * a[0] * b[0];
2570 
2571     ta = 0.28209479f * a[0] - 0.12615662f * a[6] - 0.21850968f * a[8];
2572     tb = 0.28209479f * b[0] - 0.12615662f * b[6] - 0.21850968f * b[8];
2573     out[1] = ta * b[1] + tb * a[1];
2574     t = a[1] * b[1];
2575     out[0] += 0.28209479f * t;
2576     out[6] = -0.12615662f * t;
2577     out[8] = -0.21850968f * t;
2578 
2579     ta = 0.21850968f * a[5];
2580     tb = 0.21850968f * b[5];
2581     out[1] += ta * b[2] + tb * a[2];
2582     out[2] = ta * b[1] + tb * a[1];
2583     t = a[1] * b[2] +a[2] * b[1];
2584     out[5] = 0.21850968f * t;
2585 
2586     ta = 0.21850968f * a[4];
2587     tb = 0.21850968f * b[4];
2588     out[1] += ta * b[3] + tb * a[3];
2589     out[3]  = ta * b[1] + tb * a[1];
2590     t = a[1] * b[3] + a[3] * b[1];
2591     out[4] = 0.21850968f * t;
2592 
2593     ta = 0.28209480f * a[0] + 0.25231326f * a[6];
2594     tb = 0.28209480f * b[0] + 0.25231326f * b[6];
2595     out[2] += ta * b[2] + tb * a[2];
2596     t = a[2] * b[2];
2597     out[0] += 0.28209480f * t;
2598     out[6] += 0.25231326f * t;
2599 
2600     ta = 0.21850969f * a[7];
2601     tb = 0.21850969f * b[7];
2602     out[2] += ta * b[3] + tb * a[3];
2603     out[3] += ta * b[2] + tb * a[2];
2604     t = a[2] * b[3] + a[3] * b[2];
2605     out[7] = 0.21850969f * t;
2606 
2607     ta = 0.28209479f * a[0] - 0.12615663f * a[6] + 0.21850969f * a[8];
2608     tb = 0.28209479f * b[0] - 0.12615663f * b[6] + 0.21850969f * b[8];
2609     out[3] += ta * b[3] + tb * a[3];
2610     t = a[3] * b[3];
2611     out[0] += 0.28209479f * t;
2612     out[6] -= 0.12615663f * t;
2613     out[8] += 0.21850969f * t;
2614 
2615     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2616     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2617     out[4] += ta * b[4] + tb * a[4];
2618     t = a[4] * b[4];
2619     out[0] += 0.28209479f * t;
2620     out[6] -= 0.18022375f * t;
2621 
2622     ta = 0.15607835f * a[7];
2623     tb = 0.15607835f * b[7];
2624     out[4] += ta * b[5] + tb * a[5];
2625     out[5] += ta * b[4] + tb * a[4];
2626     t = a[4] * b[5] + a[5] * b[4];
2627     out[7] += 0.15607834f * t;
2628 
2629     ta = 0.28209479f * a[0] + 0.09011186f * a[6] - 0.15607835f * a[8];
2630     tb = 0.28209479f * b[0] + 0.09011186f * b[6] - 0.15607835f * b[8];
2631     out[5] += ta * b[5] + tb * a[5];
2632     t = a[5] * b[5];
2633     out[0] += 0.28209479f * t;
2634     out[6] += 0.09011186f * t;
2635     out[8] -= 0.15607835f * t;
2636 
2637     ta = 0.28209480f * a[0];
2638     tb = 0.28209480f * b[0];
2639     out[6] += ta * b[6] + tb * a[6];
2640     t = a[6] * b[6];
2641     out[0] += 0.28209480f * t;
2642     out[6] += 0.18022376f * t;
2643 
2644     ta = 0.28209479f * a[0] + 0.09011186f * a[6] + 0.15607835f * a[8];
2645     tb = 0.28209479f * b[0] + 0.09011186f * b[6] + 0.15607835f * b[8];
2646     out[7] += ta * b[7] + tb * a[7];
2647     t = a[7] * b[7];
2648     out[0] += 0.28209479f * t;
2649     out[6] += 0.09011186f * t;
2650     out[8] += 0.15607835f * t;
2651 
2652     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2653     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2654     out[8] += ta * b[8] + tb * a[8];
2655     t = a[8] * b[8];
2656     out[0] += 0.28209479f * t;
2657     out[6] -= 0.18022375f * t;
2658 
2659     return out;
2660 }
2661 
2662 FLOAT * WINAPI D3DXSHMultiply4(FLOAT *out, const FLOAT *a, const FLOAT *b)
2663 {
2664     FLOAT ta, tb, t;
2665 
2666     TRACE("out %p, a %p, b %p\n", out, a, b);
2667 
2668     out[0] = 0.28209479f * a[0] * b[0];
2669 
2670     ta = 0.28209479f * a[0] - 0.12615663f * a[6] - 0.21850969f * a[8];
2671     tb = 0.28209479f * b[0] - 0.12615663f * b[6] - 0.21850969f * b[8];
2672     out[1] = ta * b[1] + tb * a[1];
2673     t = a[1] * b[1];
2674     out[0] += 0.28209479f * t;
2675     out[6] = -0.12615663f * t;
2676     out[8] = -0.21850969f * t;
2677 
2678     ta = 0.21850969f * a[3] - 0.05839917f * a[13] - 0.22617901f * a[15];
2679     tb = 0.21850969f * b[3] - 0.05839917f * b[13] - 0.22617901f * b[15];
2680     out[1] += ta * b[4] + tb * a[4];
2681     out[4] = ta * b[1] + tb * a[1];
2682     t = a[1] * b[4] + a[4] * b[1];
2683     out[3] = 0.21850969f * t;
2684     out[13] = -0.05839917f * t;
2685     out[15] = -0.22617901f * t;
2686 
2687     ta = 0.21850969f * a[2] - 0.14304817f * a[12] - 0.18467439f * a[14];
2688     tb = 0.21850969f * b[2] - 0.14304817f * b[12] - 0.18467439f * b[14];
2689     out[1] += ta * b[5] + tb * a[5];
2690     out[5] = ta * b[1] + tb * a[1];
2691     t = a[1] * b[5] + a[5] * b[1];
2692     out[2] = 0.21850969f * t;
2693     out[12] = -0.14304817f * t;
2694     out[14] = -0.18467439f * t;
2695 
2696     ta = 0.20230066f * a[11];
2697     tb = 0.20230066f * b[11];
2698     out[1] += ta * b[6] + tb * a[6];
2699     out[6] += ta * b[1] + tb * a[1];
2700     t = a[1] * b[6] + a[6] * b[1];
2701     out[11] = 0.20230066f * t;
2702 
2703     ta = 0.22617901f * a[9] + 0.05839917f * a[11];
2704     tb = 0.22617901f * b[9] + 0.05839917f * b[11];
2705     out[1] += ta * b[8] + tb * a[8];
2706     out[8] += ta * b[1] + tb * a[1];
2707     t = a[1] * b[8] + a[8] * b[1];
2708     out[9] = 0.22617901f * t;
2709     out[11] += 0.05839917f * t;
2710 
2711     ta = 0.28209480f * a[0] + 0.25231326f * a[6];
2712     tb = 0.28209480f * b[0] + 0.25231326f * b[6];
2713     out[2] += ta * b[2] + tb * a[2];
2714     t = a[2] * b[2];
2715     out[0] += 0.28209480f * t;
2716     out[6] += 0.25231326f * t;
2717 
2718     ta = 0.24776671f * a[12];
2719     tb = 0.24776671f * b[12];
2720     out[2] += ta * b[6] + tb * a[6];
2721     out[6] += ta * b[2] + tb * a[2];
2722     t = a[2] * b[6] + a[6] * b[2];
2723     out[12] += 0.24776671f * t;
2724 
2725     ta = 0.28209480f * a[0] - 0.12615663f * a[6] + 0.21850969f * a[8];
2726     tb = 0.28209480f * b[0] - 0.12615663f * b[6] + 0.21850969f * b[8];
2727     out[3] += ta * b[3] + tb * a[3];
2728     t = a[3] * b[3];
2729     out[0] += 0.28209480f * t;
2730     out[6] -= 0.12615663f * t;
2731     out[8] += 0.21850969f * t;
2732 
2733     ta = 0.20230066f * a[13];
2734     tb = 0.20230066f * b[13];
2735     out[3] += ta * b[6] + tb * a[6];
2736     out[6] += ta * b[3] + tb * a[3];
2737     t = a[3] * b[6] + a[6] * b[3];
2738     out[13] += 0.20230066f * t;
2739 
2740     ta = 0.21850969f * a[2] - 0.14304817f * a[12] + 0.18467439f * a[14];
2741     tb = 0.21850969f * b[2] - 0.14304817f * b[12] + 0.18467439f * b[14];
2742     out[3] += ta * b[7] + tb * a[7];
2743     out[7] = ta * b[3] + tb * a[3];
2744     t = a[3] * b[7] + a[7] * b[3];
2745     out[2] += 0.21850969f * t;
2746     out[12] -= 0.14304817f * t;
2747     out[14] += 0.18467439f * t;
2748 
2749     ta = -0.05839917f * a[13] + 0.22617901f * a[15];
2750     tb = -0.05839917f * b[13] + 0.22617901f * b[15];
2751     out[3] += ta * b[8] + tb * a[8];
2752     out[8] += ta * b[3] + tb * a[3];
2753     t = a[3] * b[8] + a[8] * b[3];
2754     out[13] -= 0.05839917f * t;
2755     out[15] += 0.22617901f * t;
2756 
2757     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2758     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2759     out[4] += ta * b[4] + tb * a[4];
2760     t = a[4] * b[4];
2761     out[0] += 0.28209479f * t;
2762     out[6] -= 0.18022375f * t;
2763 
2764     ta = 0.15607835f * a[7];
2765     tb = 0.15607835f * b[7];
2766     out[4] += ta * b[5] + tb * a[5];
2767     out[5] += ta * b[4] + tb * a[4];
2768     t = a[4] * b[5] + a[5] * b[4];
2769     out[7] += 0.15607835f * t;
2770 
2771     ta = 0.22617901f * a[3] - 0.09403160f * a[13];
2772     tb = 0.22617901f * b[3] - 0.09403160f * b[13];
2773     out[4] += ta * b[9] + tb * a[9];
2774     out[9] += ta * b[4] + tb * a[4];
2775     t = a[4] * b[9] + a[9] * b[4];
2776     out[3] += 0.22617901f * t;
2777     out[13] -= 0.09403160f * t;
2778 
2779     ta = 0.18467439f * a[2] - 0.18806319f * a[12];
2780     tb = 0.18467439f * b[2] - 0.18806319f * b[12];
2781     out[4] += ta * b[10] + tb * a [10];
2782     out[10] = ta * b[4] + tb * a[4];
2783     t = a[4] * b[10] + a[10] * b[4];
2784     out[2] += 0.18467439f * t;
2785     out[12] -= 0.18806319f * t;
2786 
2787     ta = -0.05839917f * a[3] + 0.14567312f * a[13] + 0.09403160f * a[15];
2788     tb = -0.05839917f * b[3] + 0.14567312f * b[13] + 0.09403160f * b[15];
2789     out[4] += ta * b[11] + tb * a[11];
2790     out[11] += ta * b[4] + tb * a[4];
2791     t = a[4] * b[11] + a[11] * b[4];
2792     out[3] -= 0.05839917f * t;
2793     out[13] += 0.14567312f * t;
2794     out[15] += 0.09403160f * t;
2795 
2796     ta = 0.28209479f * a[0] + 0.09011186f * a[6] - 0.15607835f * a[8];
2797     tb = 0.28209479f * b[0] + 0.09011186f * b[6] - 0.15607835f * b[8];
2798     out[5] += ta * b[5] + tb * a[5];
2799     t = a[5] * b[5];
2800     out[0] += 0.28209479f * t;
2801     out[6] += 0.09011186f * t;
2802     out[8] -= 0.15607835f * t;
2803 
2804     ta = 0.14867701f * a[14];
2805     tb = 0.14867701f * b[14];
2806     out[5] += ta * b[9] + tb * a[9];
2807     out[9] += ta * b[5] + tb * a[5];
2808     t = a[5] * b[9] + a[9] * b[5];
2809     out[14] += 0.14867701f * t;
2810 
2811     ta = 0.18467439f * a[3] + 0.11516472f * a[13] - 0.14867701f * a[15];
2812     tb = 0.18467439f * b[3] + 0.11516472f * b[13] - 0.14867701f * b[15];
2813     out[5] += ta * b[10] + tb * a[10];
2814     out[10] += ta * b[5] + tb * a[5];
2815     t = a[5] * b[10] + a[10] * b[5];
2816     out[3] += 0.18467439f * t;
2817     out[13] += 0.11516472f * t;
2818     out[15] -= 0.14867701f * t;
2819 
2820     ta = 0.23359668f * a[2] + 0.05947080f * a[12] - 0.11516472f * a[14];
2821     tb = 0.23359668f * b[2] + 0.05947080f * b[12] - 0.11516472f * b[14];
2822     out[5] += ta * b[11] + tb * a[11];
2823     out[11] += ta * b[5] + tb * a[5];
2824     t = a[5] * b[11] + a[11] * b[5];
2825     out[2] += 0.23359668f * t;
2826     out[12] += 0.05947080f * t;
2827     out[14] -= 0.11516472f * t;
2828 
2829     ta = 0.28209479f * a[0];
2830     tb = 0.28209479f * b[0];
2831     out[6] += ta * b[6] + tb * a[6];
2832     t = a[6] * b[6];
2833     out[0] += 0.28209479f * t;
2834     out[6] += 0.18022376f * t;
2835 
2836     ta = 0.09011186f * a[6] + 0.28209479f * a[0] + 0.15607835f * a[8];
2837     tb = 0.09011186f * b[6] + 0.28209479f * b[0] + 0.15607835f * b[8];
2838     out[7] += ta * b[7] + tb * a[7];
2839     t = a[7] * b[7];
2840     out[6] += 0.09011186f * t;
2841     out[0] += 0.28209479f * t;
2842     out[8] += 0.15607835f * t;
2843 
2844     ta = 0.14867701f * a[9] + 0.18467439f * a[1] + 0.11516472f * a[11];
2845     tb = 0.14867701f * b[9] + 0.18467439f * b[1] + 0.11516472f * b[11];
2846     out[7] += ta * b[10] + tb * a[10];
2847     out[10] += ta * b[7] + tb * a[7];
2848     t = a[7] * b[10] + a[10] * b[7];
2849     out[9] += 0.14867701f * t;
2850     out[1] += 0.18467439f * t;
2851     out[11] += 0.11516472f * t;
2852 
2853     ta = 0.05947080f * a[12] + 0.23359668f * a[2] + 0.11516472f * a[14];
2854     tb = 0.05947080f * b[12] + 0.23359668f * b[2] + 0.11516472f * b[14];
2855     out[7] += ta * b[13] + tb * a[13];
2856     out[13] += ta * b[7]+ tb * a[7];
2857     t = a[7] * b[13] + a[13] * b[7];
2858     out[12] += 0.05947080f * t;
2859     out[2] += 0.23359668f * t;
2860     out[14] += 0.11516472f * t;
2861 
2862     ta = 0.14867701f * a[15];
2863     tb = 0.14867701f * b[15];
2864     out[7] += ta * b[14] + tb * a[14];
2865     out[14] += ta * b[7] + tb * a[7];
2866     t = a[7] * b[14] + a[14] * b[7];
2867     out[15] += 0.14867701f * t;
2868 
2869     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2870     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2871     out[8] += ta * b[8] + tb * a[8];
2872     t = a[8] * b[8];
2873     out[0] += 0.28209479f * t;
2874     out[6] -= 0.18022375f * t;
2875 
2876     ta = -0.09403160f * a[11];
2877     tb = -0.09403160f * b[11];
2878     out[8] += ta * b[9] + tb * a[9];
2879     out[9] += ta * b[8] + tb * a[8];
2880     t = a[8] * b[9] + a[9] * b[8];
2881     out[11] -= 0.09403160f * t;
2882 
2883     ta = -0.09403160f * a[15];
2884     tb = -0.09403160f * b[15];
2885     out[8] += ta * b[13] + tb * a[13];
2886     out[13] += ta * b[8] + tb * a[8];
2887     t = a[8] * b[13] + a[13] * b[8];
2888     out[15] -= 0.09403160f * t;
2889 
2890     ta = 0.18467439f * a[2] - 0.18806319f * a[12];
2891     tb = 0.18467439f * b[2] - 0.18806319f * b[12];
2892     out[8] += ta * b[14] + tb * a[14];
2893     out[14] += ta * b[8] + tb * a[8];
2894     t = a[8] * b[14] + a[14] * b[8];
2895     out[2] += 0.18467439f * t;
2896     out[12] -= 0.18806319f * t;
2897 
2898     ta = -0.21026104f * a[6] + 0.28209479f * a[0];
2899     tb = -0.21026104f * b[6] + 0.28209479f * b[0];
2900     out[9] += ta * b[9] + tb * a[9];
2901     t = a[9] * b[9];
2902     out[6] -= 0.21026104f * t;
2903     out[0] += 0.28209479f * t;
2904 
2905     ta = 0.28209479f * a[0];
2906     tb = 0.28209479f * b[0];
2907     out[10] += ta * b[10] + tb * a[10];
2908     t = a[10] * b[10];
2909     out[0] += 0.28209479f * t;
2910 
2911     ta = 0.28209479f * a[0] + 0.12615663f * a[6] - 0.14567312f * a[8];
2912     tb = 0.28209479f * b[0] + 0.12615663f * b[6] - 0.14567312f * b[8];
2913     out[11] += ta * b[11] + tb * a[11];
2914     t = a[11] * b[11];
2915     out[0] += 0.28209479f * t;
2916     out[6] += 0.12615663f * t;
2917     out[8] -= 0.14567312f * t;
2918 
2919     ta = 0.28209479f * a[0] + 0.16820885f * a[6];
2920     tb = 0.28209479f * b[0] + 0.16820885f * b[6];
2921     out[12] += ta * b[12] + tb * a[12];
2922     t = a[12] * b[12];
2923     out[0] += 0.28209479f * t;
2924     out[6] += 0.16820885f * t;
2925 
2926     ta =0.28209479f * a[0] + 0.14567312f * a[8] + 0.12615663f * a[6];
2927     tb =0.28209479f * b[0] + 0.14567312f * b[8] + 0.12615663f * b[6];
2928     out[13] += ta * b[13] + tb * a[13];
2929     t = a[13] * b[13];
2930     out[0] += 0.28209479f * t;
2931     out[8] += 0.14567312f * t;
2932     out[6] += 0.12615663f * t;
2933 
2934     ta = 0.28209479f * a[0];
2935     tb = 0.28209479f * b[0];
2936     out[14] += ta * b[14] + tb * a[14];
2937     t = a[14] * b[14];
2938     out[0] += 0.28209479f * t;
2939 
2940     ta = 0.28209479f * a[0] - 0.21026104f * a[6];
2941     tb = 0.28209479f * b[0] - 0.21026104f * b[6];
2942     out[15] += ta * b[15] + tb * a[15];
2943     t = a[15] * b[15];
2944     out[0] += 0.28209479f * t;
2945     out[6] -= 0.21026104f * t;
2946 
2947     return out;
2948 }
2949 
2950 static void rotate_X(FLOAT *out, UINT order, FLOAT a, FLOAT *in)
2951 {
2952     out[0] = in[0];
2953 
2954     out[1] = a * in[2];
2955     out[2] = -a * in[1];
2956     out[3] = in[3];
2957 
2958     out[4] = a * in[7];
2959     out[5] = -in[5];
2960     out[6] = -0.5f * in[6] - 0.8660253882f * in[8];
2961     out[7] = -a * in[4];
2962     out[8] = -0.8660253882f * in[6] + 0.5f * in[8];
2963     out[9] = -a * 0.7905694842f * in[12] + a * 0.6123724580f * in[14];
2964 
2965     out[10] = -in[10];
2966     out[11] = -a * 0.6123724580f * in[12] - a * 0.7905694842f * in[14];
2967     out[12] = a * 0.7905694842f * in[9] + a * 0.6123724580f * in[11];
2968     out[13] = -0.25f * in[13] - 0.9682458639f * in[15];
2969     out[14] = -a * 0.6123724580f * in[9] + a * 0.7905694842f * in[11];
2970     out[15] = -0.9682458639f * in[13] + 0.25f * in[15];
2971     if (order == 4)
2972         return;
2973 
2974     out[16] = -a * 0.9354143739f * in[21] + a * 0.3535533845f * in[23];
2975     out[17] = -0.75f * in[17] + 0.6614378095f * in[19];
2976     out[18] = -a * 0.3535533845f * in[21] - a * 0.9354143739f * in[23];
2977     out[19] = 0.6614378095f * in[17] + 0.75f * in[19];
2978     out[20] = 0.375f * in[20] + 0.5590170026f * in[22] + 0.7395099998f * in[24];
2979     out[21] = a * 0.9354143739f * in[16] + a * 0.3535533845f * in[18];
2980     out[22] = 0.5590170026f * in[20] + 0.5f * in[22] - 0.6614378691f * in[24];
2981     out[23] = -a * 0.3535533845f * in[16] + a * 0.9354143739f * in[18];
2982     out[24] = 0.7395099998f * in[20] - 0.6614378691f * in[22] + 0.125f * in[24];
2983     if (order == 5)
2984         return;
2985 
2986     out[25] = a * 0.7015607357f * in[30] - a * 0.6846531630f * in[32] + a * 0.1976423711f * in[34];
2987     out[26] = -0.5f * in[26] + 0.8660253882f * in[28];
2988     out[27] = a * 0.5229125023f * in[30] + a * 0.3061861992f * in[32] - a * 0.7954951525f * in[34];
2989     out[28] = 0.8660253882f * in[26] + 0.5f * in[28];
2990     out[29] = a * 0.4841229022f * in[30] + a * 0.6614378691f * in[32] + a * 0.5728219748f * in[34];
2991     out[30] = -a * 0.7015607357f * in[25] - a * 0.5229125023f * in[27] - a * 0.4841229022f * in[29];
2992     out[31] = 0.125f * in[31] + 0.4050463140f * in[33] + 0.9057110548f * in[35];
2993     out[32] = a * 0.6846531630f * in[25] - a * 0.3061861992f * in[27] - a * 0.6614378691f * in[29];
2994     out[33] = 0.4050463140f * in[31] + 0.8125f * in[33] - 0.4192627370f * in[35];
2995     out[34] = -a * 0.1976423711f * in[25] + a * 0.7954951525f * in[27] - a * 0.5728219748f * in[29];
2996     out[35] = 0.9057110548f * in[31] - 0.4192627370f * in[33] + 0.0624999329f * in[35];
2997 }
2998 
2999 FLOAT* WINAPI D3DXSHRotate(FLOAT *out, UINT order, const D3DXMATRIX *matrix, const FLOAT *in)
3000 {
3001     FLOAT alpha, beta, gamma, sinb, temp[36], temp1[36];
3002 
3003     TRACE("out %p, order %u, matrix %p, in %p\n", out, order, matrix, in);
3004 
3005     out[0] = in[0];
3006 
3007     if ((order > D3DXSH_MAXORDER) || (order < D3DXSH_MINORDER))
3008         return out;
3009 
3010     if (order <= 3)
3011     {
3012         out[1] = matrix->u.m[1][1] * in[1] - matrix->u.m[2][1] * in[2] + matrix->u.m[0][1] * in[3];
3013         out[2] = -matrix->u.m[1][2] * in[1] + matrix->u.m[2][2] * in[2] - matrix->u.m[0][2] * in[3];
3014         out[3] = matrix->u.m[1][0] * in[1] - matrix->u.m[2][0] * in[2] + matrix->u.m[0][0] * in[3];
3015 
3016         if (order == 3)
3017         {
3018             FLOAT coeff[]={
3019                 matrix->u.m[1][0] * matrix->u.m[0][0], matrix->u.m[1][1] * matrix->u.m[0][1],
3020                 matrix->u.m[1][1] * matrix->u.m[2][1], matrix->u.m[1][0] * matrix->u.m[2][0],
3021                 matrix->u.m[2][0] * matrix->u.m[2][0], matrix->u.m[2][1] * matrix->u.m[2][1],
3022                 matrix->u.m[0][0] * matrix->u.m[2][0], matrix->u.m[0][1] * matrix->u.m[2][1],
3023                 matrix->u.m[0][1] * matrix->u.m[0][1], matrix->u.m[1][0] * matrix->u.m[1][0],
3024                 matrix->u.m[1][1] * matrix->u.m[1][1], matrix->u.m[0][0] * matrix->u.m[0][0], };
3025 
3026             out[4] = (matrix->u.m[1][1] * matrix->u.m[0][0] + matrix->u.m[0][1] * matrix->u.m[1][0]) * in[4];
3027             out[4] -= (matrix->u.m[1][0] * matrix->u.m[2][1] + matrix->u.m[1][1] * matrix->u.m[2][0]) * in[5];
3028             out[4] += 1.7320508076f * matrix->u.m[2][0] * matrix->u.m[2][1] * in[6];
3029             out[4] -= (matrix->u.m[0][1] * matrix->u.m[2][0] + matrix->u.m[0][0] * matrix->u.m[2][1]) * in[7];
3030             out[4] += (matrix->u.m[0][0] * matrix->u.m[0][1] - matrix->u.m[1][0] * matrix->u.m[1][1]) * in[8];
3031 
3032             out[5] = (matrix->u.m[1][1] * matrix->u.m[2][2] + matrix->u.m[1][2] * matrix->u.m[2][1]) * in[5];
3033             out[5] -= (matrix->u.m[1][1] * matrix->u.m[0][2] + matrix->u.m[1][2] * matrix->u.m[0][1]) * in[4];
3034             out[5] -= 1.7320508076f * matrix->u.m[2][2] * matrix->u.m[2][1] * in[6];
3035             out[5] += (matrix->u.m[0][2] * matrix->u.m[2][1] + matrix->u.m[0][1] * matrix->u.m[2][2]) * in[7];
3036             out[5] -= (matrix->u.m[0][1] * matrix->u.m[0][2] - matrix->u.m[1][1] * matrix->u.m[1][2]) * in[8];
3037 
3038             out[6] = (matrix->u.m[2][2] * matrix->u.m[2][2] - 0.5f * (coeff[4] + coeff[5])) * in[6];
3039             out[6] -= (0.5773502692f * (coeff[0] + coeff[1]) - 1.1547005384f * matrix->u.m[1][2] * matrix->u.m[0][2]) * in[4];
3040             out[6] += (0.5773502692f * (coeff[2] + coeff[3]) - 1.1547005384f * matrix->u.m[1][2] * matrix->u.m[2][2]) * in[5];
3041             out[6] += (0.5773502692f * (coeff[6] + coeff[7]) - 1.1547005384f * matrix->u.m[0][2] * matrix->u.m[2][2]) * in[7];
3042             out[6] += (0.2886751347f * (coeff[9] - coeff[8] + coeff[10] - coeff[11]) - 0.5773502692f *
3043                   (matrix->u.m[1][2] * matrix->u.m[1][2] - matrix->u.m[0][2] * matrix->u.m[0][2])) * in[8];
3044 
3045             out[7] = (matrix->u.m[0][0] * matrix->u.m[2][2] + matrix->u.m[0][2] * matrix->u.m[2][0]) * in[7];
3046             out[7] -= (matrix->u.m[1][0] * matrix->u.m[0][2] + matrix->u.m[1][2] * matrix->u.m[0][0]) * in[4];
3047             out[7] += (matrix->u.m[1][0] * matrix->u.m[2][2] + matrix->u.m[1][2] * matrix->u.m[2][0]) * in[5];
3048             out[7] -= 1.7320508076f * matrix->u.m[2][2] * matrix->u.m[2][0] * in[6];
3049             out[7] -= (matrix->u.m[0][0] * matrix->u.m[0][2] - matrix->u.m[1][0] * matrix->u.m[1][2]) * in[8];
3050 
3051             out[8] = 0.5f * (coeff[11] - coeff[8] - coeff[9] + coeff[10]) * in[8];
3052             out[8] += (coeff[0] - coeff[1]) * in[4];
3053             out[8] += (coeff[2] - coeff[3]) * in[5];
3054             out[8] += 0.86602540f * (coeff[4] - coeff[5]) * in[6];
3055             out[8] += (coeff[7] - coeff[6]) * in[7];
3056         }
3057 
3058         return out;
3059     }
3060 
3061     if (fabsf(matrix->u.m[2][2]) != 1.0f)
3062     {
3063         sinb = sqrtf(1.0f - matrix->u.m[2][2] * matrix->u.m[2][2]);
3064         alpha = atan2f(matrix->u.m[2][1] / sinb, matrix->u.m[2][0] / sinb);
3065         beta = atan2f(sinb, matrix->u.m[2][2]);
3066         gamma = atan2f(matrix->u.m[1][2] / sinb, -matrix->u.m[0][2] / sinb);
3067     }
3068     else
3069     {
3070         alpha = atan2f(matrix->u.m[0][1], matrix->u.m[0][0]);
3071         beta = 0.0f;
3072         gamma = 0.0f;
3073     }
3074 
3075     D3DXSHRotateZ(temp, order, gamma, in);
3076     rotate_X(temp1, order, 1.0f, temp);
3077     D3DXSHRotateZ(temp, order, beta, temp1);
3078     rotate_X(temp1, order, -1.0f, temp);
3079     D3DXSHRotateZ(out, order, alpha, temp1);
3080 
3081     return out;
3082 }
3083 
3084 FLOAT * WINAPI D3DXSHRotateZ(FLOAT *out, UINT order, FLOAT angle, const FLOAT *in)
3085 {
3086     UINT i, sum = 0;
3087     FLOAT c[5], s[5];
3088 
3089     TRACE("out %p, order %u, angle %f, in %p\n", out, order, angle, in);
3090 
3091     order = min(max(order, D3DXSH_MINORDER), D3DXSH_MAXORDER);
3092 
3093     out[0] = in[0];
3094 
3095     for (i = 1; i < order; i++)
3096     {
3097         UINT j;
3098 
3099         c[i - 1] = cosf(i * angle);
3100         s[i - 1] = sinf(i * angle);
3101         sum += i * 2;
3102 
3103         out[sum - i] = c[i - 1] * in[sum - i];
3104         out[sum - i] += s[i - 1] * in[sum + i];
3105         for (j = i - 1; j > 0; j--)
3106         {
3107             out[sum - j] = 0.0f;
3108             out[sum - j] = c[j - 1] * in[sum - j];
3109             out[sum - j] += s[j - 1] * in[sum + j];
3110         }
3111 
3112         if (in == out)
3113             out[sum] = 0.0f;
3114         else
3115             out[sum] = in[sum];
3116 
3117         for (j = 1; j < i; j++)
3118         {
3119             out[sum + j] = 0.0f;
3120             out[sum + j] = -s[j - 1] * in[sum - j];
3121             out[sum + j] += c[j - 1] * in[sum + j];
3122         }
3123         out[sum + i] = -s[i - 1] * in[sum - i];
3124         out[sum + i] += c[i - 1] * in[sum + i];
3125     }
3126 
3127     return out;
3128 }
3129 
3130 FLOAT* WINAPI D3DXSHScale(FLOAT *out, UINT order, const FLOAT *a, const FLOAT scale)
3131 {
3132     UINT i;
3133 
3134     TRACE("out %p, order %u, a %p, scale %f\n", out, order, a, scale);
3135 
3136     for (i = 0; i < order * order; i++)
3137         out[i] = a[i] * scale;
3138 
3139     return out;
3140 }
3141