xref: /reactos/dll/directx/wine/d3dx9_36/math.c (revision 23373acb)
1 /*
2  * Mathematical operations specific to D3DX9.
3  *
4  * Copyright (C) 2008 David Adam
5  * Copyright (C) 2008 Luis Busquets
6  * Copyright (C) 2008 Jérôme Gardou
7  * Copyright (C) 2008 Philip Nilsson
8  * Copyright (C) 2008 Henri Verbeet
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24 
25 #include "config.h"
26 #include "wine/port.h"
27 
28 #include "d3dx9_private.h"
29 
30 WINE_DEFAULT_DEBUG_CHANNEL(d3dx);
31 
32 struct ID3DXMatrixStackImpl
33 {
34   ID3DXMatrixStack ID3DXMatrixStack_iface;
35   LONG ref;
36 
37   unsigned int current;
38   unsigned int stack_size;
39   D3DXMATRIX *stack;
40 };
41 
42 static const unsigned int INITIAL_STACK_SIZE = 32;
43 
44 /*_________________D3DXColor____________________*/
45 
46 D3DXCOLOR* WINAPI D3DXColorAdjustContrast(D3DXCOLOR *pout, const D3DXCOLOR *pc, FLOAT s)
47 {
48     TRACE("pout %p, pc %p, s %f\n", pout, pc, s);
49 
50     pout->r = 0.5f + s * (pc->r - 0.5f);
51     pout->g = 0.5f + s * (pc->g - 0.5f);
52     pout->b = 0.5f + s * (pc->b - 0.5f);
53     pout->a = pc->a;
54     return pout;
55 }
56 
57 D3DXCOLOR* WINAPI D3DXColorAdjustSaturation(D3DXCOLOR *pout, const D3DXCOLOR *pc, FLOAT s)
58 {
59     FLOAT grey;
60 
61     TRACE("pout %p, pc %p, s %f\n", pout, pc, s);
62 
63     grey = pc->r * 0.2125f + pc->g * 0.7154f + pc->b * 0.0721f;
64     pout->r = grey + s * (pc->r - grey);
65     pout->g = grey + s * (pc->g - grey);
66     pout->b = grey + s * (pc->b - grey);
67     pout->a = pc->a;
68     return pout;
69 }
70 
71 /*_________________Misc__________________________*/
72 
73 FLOAT WINAPI D3DXFresnelTerm(FLOAT costheta, FLOAT refractionindex)
74 {
75     FLOAT a, d, g, result;
76 
77     TRACE("costheta %f, refractionindex %f\n", costheta, refractionindex);
78 
79     g = sqrtf(refractionindex * refractionindex + costheta * costheta - 1.0f);
80     a = g + costheta;
81     d = g - costheta;
82     result = (costheta * a - 1.0f) * (costheta * a - 1.0f) / ((costheta * d + 1.0f) * (costheta * d + 1.0f)) + 1.0f;
83     result *= 0.5f * d * d / (a * a);
84 
85     return result;
86 }
87 
88 /*_________________D3DXMatrix____________________*/
89 
90 D3DXMATRIX * WINAPI D3DXMatrixAffineTransformation(D3DXMATRIX *out, FLOAT scaling, const D3DXVECTOR3 *rotationcenter,
91         const D3DXQUATERNION *rotation, const D3DXVECTOR3 *translation)
92 {
93     TRACE("out %p, scaling %f, rotationcenter %p, rotation %p, translation %p\n",
94             out, scaling, rotationcenter, rotation, translation);
95 
96     D3DXMatrixIdentity(out);
97 
98     if (rotation)
99     {
100         FLOAT temp00, temp01, temp02, temp10, temp11, temp12, temp20, temp21, temp22;
101 
102         temp00 = 1.0f - 2.0f * (rotation->y * rotation->y + rotation->z * rotation->z);
103         temp01 = 2.0f * (rotation->x * rotation->y + rotation->z * rotation->w);
104         temp02 = 2.0f * (rotation->x * rotation->z - rotation->y * rotation->w);
105         temp10 = 2.0f * (rotation->x * rotation->y - rotation->z * rotation->w);
106         temp11 = 1.0f - 2.0f * (rotation->x * rotation->x + rotation->z * rotation->z);
107         temp12 = 2.0f * (rotation->y * rotation->z + rotation->x * rotation->w);
108         temp20 = 2.0f * (rotation->x * rotation->z + rotation->y * rotation->w);
109         temp21 = 2.0f * (rotation->y * rotation->z - rotation->x * rotation->w);
110         temp22 = 1.0f - 2.0f * (rotation->x * rotation->x + rotation->y * rotation->y);
111 
112         out->u.m[0][0] = scaling * temp00;
113         out->u.m[0][1] = scaling * temp01;
114         out->u.m[0][2] = scaling * temp02;
115         out->u.m[1][0] = scaling * temp10;
116         out->u.m[1][1] = scaling * temp11;
117         out->u.m[1][2] = scaling * temp12;
118         out->u.m[2][0] = scaling * temp20;
119         out->u.m[2][1] = scaling * temp21;
120         out->u.m[2][2] = scaling * temp22;
121 
122         if (rotationcenter)
123         {
124             out->u.m[3][0] = rotationcenter->x * (1.0f - temp00) - rotationcenter->y * temp10
125                     - rotationcenter->z * temp20;
126             out->u.m[3][1] = rotationcenter->y * (1.0f - temp11) - rotationcenter->x * temp01
127                     - rotationcenter->z * temp21;
128             out->u.m[3][2] = rotationcenter->z * (1.0f - temp22) - rotationcenter->x * temp02
129                     - rotationcenter->y * temp12;
130         }
131     }
132     else
133     {
134         out->u.m[0][0] = scaling;
135         out->u.m[1][1] = scaling;
136         out->u.m[2][2] = scaling;
137     }
138 
139     if (translation)
140     {
141         out->u.m[3][0] += translation->x;
142         out->u.m[3][1] += translation->y;
143         out->u.m[3][2] += translation->z;
144     }
145 
146     return out;
147 }
148 
149 D3DXMATRIX * WINAPI D3DXMatrixAffineTransformation2D(D3DXMATRIX *out, FLOAT scaling,
150         const D3DXVECTOR2 *rotationcenter, FLOAT rotation, const D3DXVECTOR2 *translation)
151 {
152     FLOAT tmp1, tmp2, s;
153 
154     TRACE("out %p, scaling %f, rotationcenter %p, rotation %f, translation %p\n",
155             out, scaling, rotationcenter, rotation, translation);
156 
157     s = sinf(rotation / 2.0f);
158     tmp1 = 1.0f - 2.0f * s * s;
159     tmp2 = 2.0f * s * cosf(rotation / 2.0f);
160 
161     D3DXMatrixIdentity(out);
162     out->u.m[0][0] = scaling * tmp1;
163     out->u.m[0][1] = scaling * tmp2;
164     out->u.m[1][0] = -scaling * tmp2;
165     out->u.m[1][1] = scaling * tmp1;
166 
167     if (rotationcenter)
168     {
169         FLOAT x, y;
170 
171         x = rotationcenter->x;
172         y = rotationcenter->y;
173 
174         out->u.m[3][0] = y * tmp2 - x * tmp1 + x;
175         out->u.m[3][1] = -x * tmp2 - y * tmp1 + y;
176     }
177 
178     if (translation)
179     {
180         out->u.m[3][0] += translation->x;
181         out->u.m[3][1] += translation->y;
182     }
183 
184     return out;
185 }
186 
187 HRESULT WINAPI D3DXMatrixDecompose(D3DXVECTOR3 *poutscale, D3DXQUATERNION *poutrotation, D3DXVECTOR3 *pouttranslation, const D3DXMATRIX *pm)
188 {
189     D3DXMATRIX normalized;
190     D3DXVECTOR3 vec;
191 
192     TRACE("poutscale %p, poutrotation %p, pouttranslation %p, pm %p\n", poutscale, poutrotation, pouttranslation, pm);
193 
194     /*Compute the scaling part.*/
195     vec.x=pm->u.m[0][0];
196     vec.y=pm->u.m[0][1];
197     vec.z=pm->u.m[0][2];
198     poutscale->x=D3DXVec3Length(&vec);
199 
200     vec.x=pm->u.m[1][0];
201     vec.y=pm->u.m[1][1];
202     vec.z=pm->u.m[1][2];
203     poutscale->y=D3DXVec3Length(&vec);
204 
205     vec.x=pm->u.m[2][0];
206     vec.y=pm->u.m[2][1];
207     vec.z=pm->u.m[2][2];
208     poutscale->z=D3DXVec3Length(&vec);
209 
210     /*Compute the translation part.*/
211     pouttranslation->x=pm->u.m[3][0];
212     pouttranslation->y=pm->u.m[3][1];
213     pouttranslation->z=pm->u.m[3][2];
214 
215     /*Let's calculate the rotation now*/
216     if ( (poutscale->x == 0.0f) || (poutscale->y == 0.0f) || (poutscale->z == 0.0f) ) return D3DERR_INVALIDCALL;
217 
218     normalized.u.m[0][0]=pm->u.m[0][0]/poutscale->x;
219     normalized.u.m[0][1]=pm->u.m[0][1]/poutscale->x;
220     normalized.u.m[0][2]=pm->u.m[0][2]/poutscale->x;
221     normalized.u.m[1][0]=pm->u.m[1][0]/poutscale->y;
222     normalized.u.m[1][1]=pm->u.m[1][1]/poutscale->y;
223     normalized.u.m[1][2]=pm->u.m[1][2]/poutscale->y;
224     normalized.u.m[2][0]=pm->u.m[2][0]/poutscale->z;
225     normalized.u.m[2][1]=pm->u.m[2][1]/poutscale->z;
226     normalized.u.m[2][2]=pm->u.m[2][2]/poutscale->z;
227 
228     D3DXQuaternionRotationMatrix(poutrotation,&normalized);
229     return S_OK;
230 }
231 
232 FLOAT WINAPI D3DXMatrixDeterminant(const D3DXMATRIX *pm)
233 {
234     FLOAT t[3], v[4];
235 
236     TRACE("pm %p\n", pm);
237 
238     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
239     t[1] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
240     t[2] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
241     v[0] = pm->u.m[1][1] * t[0] - pm->u.m[2][1] * t[1] + pm->u.m[3][1] * t[2];
242     v[1] = -pm->u.m[1][0] * t[0] + pm->u.m[2][0] * t[1] - pm->u.m[3][0] * t[2];
243 
244     t[0] = pm->u.m[1][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[1][1];
245     t[1] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
246     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
247     v[2] = pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1] + pm->u.m[1][3] * t[2];
248     v[3] = -pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] - pm->u.m[1][2] * t[2];
249 
250     return pm->u.m[0][0] * v[0] + pm->u.m[0][1] * v[1] +
251         pm->u.m[0][2] * v[2] + pm->u.m[0][3] * v[3];
252 }
253 
254 D3DXMATRIX* WINAPI D3DXMatrixInverse(D3DXMATRIX *pout, FLOAT *pdeterminant, const D3DXMATRIX *pm)
255 {
256     FLOAT det, t[3], v[16];
257     UINT i, j;
258 
259     TRACE("pout %p, pdeterminant %p, pm %p\n", pout, pdeterminant, pm);
260 
261     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
262     t[1] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
263     t[2] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
264     v[0] = pm->u.m[1][1] * t[0] - pm->u.m[2][1] * t[1] + pm->u.m[3][1] * t[2];
265     v[4] = -pm->u.m[1][0] * t[0] + pm->u.m[2][0] * t[1] - pm->u.m[3][0] * t[2];
266 
267     t[0] = pm->u.m[1][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[1][1];
268     t[1] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
269     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
270     v[8] = pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1] + pm->u.m[1][3] * t[2];
271     v[12] = -pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] - pm->u.m[1][2] * t[2];
272 
273     det = pm->u.m[0][0] * v[0] + pm->u.m[0][1] * v[4] +
274         pm->u.m[0][2] * v[8] + pm->u.m[0][3] * v[12];
275     if (det == 0.0f)
276         return NULL;
277     if (pdeterminant)
278         *pdeterminant = det;
279 
280     t[0] = pm->u.m[2][2] * pm->u.m[3][3] - pm->u.m[2][3] * pm->u.m[3][2];
281     t[1] = pm->u.m[0][2] * pm->u.m[3][3] - pm->u.m[0][3] * pm->u.m[3][2];
282     t[2] = pm->u.m[0][2] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][2];
283     v[1] = -pm->u.m[0][1] * t[0] + pm->u.m[2][1] * t[1] - pm->u.m[3][1] * t[2];
284     v[5] = pm->u.m[0][0] * t[0] - pm->u.m[2][0] * t[1] + pm->u.m[3][0] * t[2];
285 
286     t[0] = pm->u.m[0][0] * pm->u.m[2][1] - pm->u.m[2][0] * pm->u.m[0][1];
287     t[1] = pm->u.m[3][0] * pm->u.m[0][1] - pm->u.m[0][0] * pm->u.m[3][1];
288     t[2] = pm->u.m[2][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[2][1];
289     v[9] = -pm->u.m[3][3] * t[0] - pm->u.m[2][3] * t[1]- pm->u.m[0][3] * t[2];
290     v[13] = pm->u.m[3][2] * t[0] + pm->u.m[2][2] * t[1] + pm->u.m[0][2] * t[2];
291 
292     t[0] = pm->u.m[1][2] * pm->u.m[3][3] - pm->u.m[1][3] * pm->u.m[3][2];
293     t[1] = pm->u.m[0][2] * pm->u.m[3][3] - pm->u.m[0][3] * pm->u.m[3][2];
294     t[2] = pm->u.m[0][2] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][2];
295     v[2] = pm->u.m[0][1] * t[0] - pm->u.m[1][1] * t[1] + pm->u.m[3][1] * t[2];
296     v[6] = -pm->u.m[0][0] * t[0] + pm->u.m[1][0] * t[1] - pm->u.m[3][0] * t[2];
297 
298     t[0] = pm->u.m[0][0] * pm->u.m[1][1] - pm->u.m[1][0] * pm->u.m[0][1];
299     t[1] = pm->u.m[3][0] * pm->u.m[0][1] - pm->u.m[0][0] * pm->u.m[3][1];
300     t[2] = pm->u.m[1][0] * pm->u.m[3][1] - pm->u.m[3][0] * pm->u.m[1][1];
301     v[10] = pm->u.m[3][3] * t[0] + pm->u.m[1][3] * t[1] + pm->u.m[0][3] * t[2];
302     v[14] = -pm->u.m[3][2] * t[0] - pm->u.m[1][2] * t[1] - pm->u.m[0][2] * t[2];
303 
304     t[0] = pm->u.m[1][2] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][2];
305     t[1] = pm->u.m[0][2] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][2];
306     t[2] = pm->u.m[0][2] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][2];
307     v[3] = -pm->u.m[0][1] * t[0] + pm->u.m[1][1] * t[1] - pm->u.m[2][1] * t[2];
308     v[7] = pm->u.m[0][0] * t[0] - pm->u.m[1][0] * t[1] + pm->u.m[2][0] * t[2];
309 
310     v[11] = -pm->u.m[0][0] * (pm->u.m[1][1] * pm->u.m[2][3] - pm->u.m[1][3] * pm->u.m[2][1]) +
311         pm->u.m[1][0] * (pm->u.m[0][1] * pm->u.m[2][3] - pm->u.m[0][3] * pm->u.m[2][1]) -
312         pm->u.m[2][0] * (pm->u.m[0][1] * pm->u.m[1][3] - pm->u.m[0][3] * pm->u.m[1][1]);
313 
314     v[15] = pm->u.m[0][0] * (pm->u.m[1][1] * pm->u.m[2][2] - pm->u.m[1][2] * pm->u.m[2][1]) -
315         pm->u.m[1][0] * (pm->u.m[0][1] * pm->u.m[2][2] - pm->u.m[0][2] * pm->u.m[2][1]) +
316         pm->u.m[2][0] * (pm->u.m[0][1] * pm->u.m[1][2] - pm->u.m[0][2] * pm->u.m[1][1]);
317 
318     det = 1.0f / det;
319 
320     for (i = 0; i < 4; i++)
321         for (j = 0; j < 4; j++)
322             pout->u.m[i][j] = v[4 * i + j] * det;
323 
324     return pout;
325 }
326 
327 D3DXMATRIX * WINAPI D3DXMatrixLookAtLH(D3DXMATRIX *out, const D3DXVECTOR3 *eye, const D3DXVECTOR3 *at,
328         const D3DXVECTOR3 *up)
329 {
330     D3DXVECTOR3 right, upn, vec;
331 
332     TRACE("out %p, eye %p, at %p, up %p\n", out, eye, at, up);
333 
334     D3DXVec3Subtract(&vec, at, eye);
335     D3DXVec3Normalize(&vec, &vec);
336     D3DXVec3Cross(&right, up, &vec);
337     D3DXVec3Cross(&upn, &vec, &right);
338     D3DXVec3Normalize(&right, &right);
339     D3DXVec3Normalize(&upn, &upn);
340     out->u.m[0][0] = right.x;
341     out->u.m[1][0] = right.y;
342     out->u.m[2][0] = right.z;
343     out->u.m[3][0] = -D3DXVec3Dot(&right, eye);
344     out->u.m[0][1] = upn.x;
345     out->u.m[1][1] = upn.y;
346     out->u.m[2][1] = upn.z;
347     out->u.m[3][1] = -D3DXVec3Dot(&upn, eye);
348     out->u.m[0][2] = vec.x;
349     out->u.m[1][2] = vec.y;
350     out->u.m[2][2] = vec.z;
351     out->u.m[3][2] = -D3DXVec3Dot(&vec, eye);
352     out->u.m[0][3] = 0.0f;
353     out->u.m[1][3] = 0.0f;
354     out->u.m[2][3] = 0.0f;
355     out->u.m[3][3] = 1.0f;
356 
357     return out;
358 }
359 
360 D3DXMATRIX * WINAPI D3DXMatrixLookAtRH(D3DXMATRIX *out, const D3DXVECTOR3 *eye, const D3DXVECTOR3 *at,
361         const D3DXVECTOR3 *up)
362 {
363     D3DXVECTOR3 right, upn, vec;
364 
365     TRACE("out %p, eye %p, at %p, up %p\n", out, eye, at, up);
366 
367     D3DXVec3Subtract(&vec, at, eye);
368     D3DXVec3Normalize(&vec, &vec);
369     D3DXVec3Cross(&right, up, &vec);
370     D3DXVec3Cross(&upn, &vec, &right);
371     D3DXVec3Normalize(&right, &right);
372     D3DXVec3Normalize(&upn, &upn);
373     out->u.m[0][0] = -right.x;
374     out->u.m[1][0] = -right.y;
375     out->u.m[2][0] = -right.z;
376     out->u.m[3][0] = D3DXVec3Dot(&right, eye);
377     out->u.m[0][1] = upn.x;
378     out->u.m[1][1] = upn.y;
379     out->u.m[2][1] = upn.z;
380     out->u.m[3][1] = -D3DXVec3Dot(&upn, eye);
381     out->u.m[0][2] = -vec.x;
382     out->u.m[1][2] = -vec.y;
383     out->u.m[2][2] = -vec.z;
384     out->u.m[3][2] = D3DXVec3Dot(&vec, eye);
385     out->u.m[0][3] = 0.0f;
386     out->u.m[1][3] = 0.0f;
387     out->u.m[2][3] = 0.0f;
388     out->u.m[3][3] = 1.0f;
389 
390     return out;
391 }
392 
393 D3DXMATRIX* WINAPI D3DXMatrixMultiply(D3DXMATRIX *pout, const D3DXMATRIX *pm1, const D3DXMATRIX *pm2)
394 {
395     D3DXMATRIX out;
396     int i,j;
397 
398     TRACE("pout %p, pm1 %p, pm2 %p\n", pout, pm1, pm2);
399 
400     for (i=0; i<4; i++)
401     {
402         for (j=0; j<4; j++)
403         {
404             out.u.m[i][j] = pm1->u.m[i][0] * pm2->u.m[0][j] + pm1->u.m[i][1] * pm2->u.m[1][j] + pm1->u.m[i][2] * pm2->u.m[2][j] + pm1->u.m[i][3] * pm2->u.m[3][j];
405         }
406     }
407 
408     *pout = out;
409     return pout;
410 }
411 
412 D3DXMATRIX* WINAPI D3DXMatrixMultiplyTranspose(D3DXMATRIX *pout, const D3DXMATRIX *pm1, const D3DXMATRIX *pm2)
413 {
414     D3DXMATRIX temp;
415     int i, j;
416 
417     TRACE("pout %p, pm1 %p, pm2 %p\n", pout, pm1, pm2);
418 
419     for (i = 0; i < 4; i++)
420         for (j = 0; j < 4; j++)
421             temp.u.m[j][i] = pm1->u.m[i][0] * pm2->u.m[0][j] + pm1->u.m[i][1] * pm2->u.m[1][j] + pm1->u.m[i][2] * pm2->u.m[2][j] + pm1->u.m[i][3] * pm2->u.m[3][j];
422 
423     *pout = temp;
424     return pout;
425 }
426 
427 D3DXMATRIX* WINAPI D3DXMatrixOrthoLH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
428 {
429     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
430 
431     D3DXMatrixIdentity(pout);
432     pout->u.m[0][0] = 2.0f / w;
433     pout->u.m[1][1] = 2.0f / h;
434     pout->u.m[2][2] = 1.0f / (zf - zn);
435     pout->u.m[3][2] = zn / (zn - zf);
436     return pout;
437 }
438 
439 D3DXMATRIX* WINAPI D3DXMatrixOrthoOffCenterLH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
440 {
441     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
442 
443     D3DXMatrixIdentity(pout);
444     pout->u.m[0][0] = 2.0f / (r - l);
445     pout->u.m[1][1] = 2.0f / (t - b);
446     pout->u.m[2][2] = 1.0f / (zf -zn);
447     pout->u.m[3][0] = -1.0f -2.0f *l / (r - l);
448     pout->u.m[3][1] = 1.0f + 2.0f * t / (b - t);
449     pout->u.m[3][2] = zn / (zn -zf);
450     return pout;
451 }
452 
453 D3DXMATRIX* WINAPI D3DXMatrixOrthoOffCenterRH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
454 {
455     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
456 
457     D3DXMatrixIdentity(pout);
458     pout->u.m[0][0] = 2.0f / (r - l);
459     pout->u.m[1][1] = 2.0f / (t - b);
460     pout->u.m[2][2] = 1.0f / (zn -zf);
461     pout->u.m[3][0] = -1.0f -2.0f *l / (r - l);
462     pout->u.m[3][1] = 1.0f + 2.0f * t / (b - t);
463     pout->u.m[3][2] = zn / (zn -zf);
464     return pout;
465 }
466 
467 D3DXMATRIX* WINAPI D3DXMatrixOrthoRH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
468 {
469     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
470 
471     D3DXMatrixIdentity(pout);
472     pout->u.m[0][0] = 2.0f / w;
473     pout->u.m[1][1] = 2.0f / h;
474     pout->u.m[2][2] = 1.0f / (zn - zf);
475     pout->u.m[3][2] = zn / (zn - zf);
476     return pout;
477 }
478 
479 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveFovLH(D3DXMATRIX *pout, FLOAT fovy, FLOAT aspect, FLOAT zn, FLOAT zf)
480 {
481     TRACE("pout %p, fovy %f, aspect %f, zn %f, zf %f\n", pout, fovy, aspect, zn, zf);
482 
483     D3DXMatrixIdentity(pout);
484     pout->u.m[0][0] = 1.0f / (aspect * tanf(fovy/2.0f));
485     pout->u.m[1][1] = 1.0f / tanf(fovy/2.0f);
486     pout->u.m[2][2] = zf / (zf - zn);
487     pout->u.m[2][3] = 1.0f;
488     pout->u.m[3][2] = (zf * zn) / (zn - zf);
489     pout->u.m[3][3] = 0.0f;
490     return pout;
491 }
492 
493 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveFovRH(D3DXMATRIX *pout, FLOAT fovy, FLOAT aspect, FLOAT zn, FLOAT zf)
494 {
495     TRACE("pout %p, fovy %f, aspect %f, zn %f, zf %f\n", pout, fovy, aspect, zn, zf);
496 
497     D3DXMatrixIdentity(pout);
498     pout->u.m[0][0] = 1.0f / (aspect * tanf(fovy/2.0f));
499     pout->u.m[1][1] = 1.0f / tanf(fovy/2.0f);
500     pout->u.m[2][2] = zf / (zn - zf);
501     pout->u.m[2][3] = -1.0f;
502     pout->u.m[3][2] = (zf * zn) / (zn - zf);
503     pout->u.m[3][3] = 0.0f;
504     return pout;
505 }
506 
507 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveLH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
508 {
509     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
510 
511     D3DXMatrixIdentity(pout);
512     pout->u.m[0][0] = 2.0f * zn / w;
513     pout->u.m[1][1] = 2.0f * zn / h;
514     pout->u.m[2][2] = zf / (zf - zn);
515     pout->u.m[3][2] = (zn * zf) / (zn - zf);
516     pout->u.m[2][3] = 1.0f;
517     pout->u.m[3][3] = 0.0f;
518     return pout;
519 }
520 
521 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveOffCenterLH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
522 {
523     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
524 
525     D3DXMatrixIdentity(pout);
526     pout->u.m[0][0] = 2.0f * zn / (r - l);
527     pout->u.m[1][1] = -2.0f * zn / (b - t);
528     pout->u.m[2][0] = -1.0f - 2.0f * l / (r - l);
529     pout->u.m[2][1] = 1.0f + 2.0f * t / (b - t);
530     pout->u.m[2][2] = - zf / (zn - zf);
531     pout->u.m[3][2] = (zn * zf) / (zn -zf);
532     pout->u.m[2][3] = 1.0f;
533     pout->u.m[3][3] = 0.0f;
534     return pout;
535 }
536 
537 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveOffCenterRH(D3DXMATRIX *pout, FLOAT l, FLOAT r, FLOAT b, FLOAT t, FLOAT zn, FLOAT zf)
538 {
539     TRACE("pout %p, l %f, r %f, b %f, t %f, zn %f, zf %f\n", pout, l, r, b, t, zn, zf);
540 
541     D3DXMatrixIdentity(pout);
542     pout->u.m[0][0] = 2.0f * zn / (r - l);
543     pout->u.m[1][1] = -2.0f * zn / (b - t);
544     pout->u.m[2][0] = 1.0f + 2.0f * l / (r - l);
545     pout->u.m[2][1] = -1.0f -2.0f * t / (b - t);
546     pout->u.m[2][2] = zf / (zn - zf);
547     pout->u.m[3][2] = (zn * zf) / (zn -zf);
548     pout->u.m[2][3] = -1.0f;
549     pout->u.m[3][3] = 0.0f;
550     return pout;
551 }
552 
553 D3DXMATRIX* WINAPI D3DXMatrixPerspectiveRH(D3DXMATRIX *pout, FLOAT w, FLOAT h, FLOAT zn, FLOAT zf)
554 {
555     TRACE("pout %p, w %f, h %f, zn %f, zf %f\n", pout, w, h, zn, zf);
556 
557     D3DXMatrixIdentity(pout);
558     pout->u.m[0][0] = 2.0f * zn / w;
559     pout->u.m[1][1] = 2.0f * zn / h;
560     pout->u.m[2][2] = zf / (zn - zf);
561     pout->u.m[3][2] = (zn * zf) / (zn - zf);
562     pout->u.m[2][3] = -1.0f;
563     pout->u.m[3][3] = 0.0f;
564     return pout;
565 }
566 
567 D3DXMATRIX* WINAPI D3DXMatrixReflect(D3DXMATRIX *pout, const D3DXPLANE *pplane)
568 {
569     D3DXPLANE Nplane;
570 
571     TRACE("pout %p, pplane %p\n", pout, pplane);
572 
573     D3DXPlaneNormalize(&Nplane, pplane);
574     D3DXMatrixIdentity(pout);
575     pout->u.m[0][0] = 1.0f - 2.0f * Nplane.a * Nplane.a;
576     pout->u.m[0][1] = -2.0f * Nplane.a * Nplane.b;
577     pout->u.m[0][2] = -2.0f * Nplane.a * Nplane.c;
578     pout->u.m[1][0] = -2.0f * Nplane.a * Nplane.b;
579     pout->u.m[1][1] = 1.0f - 2.0f * Nplane.b * Nplane.b;
580     pout->u.m[1][2] = -2.0f * Nplane.b * Nplane.c;
581     pout->u.m[2][0] = -2.0f * Nplane.c * Nplane.a;
582     pout->u.m[2][1] = -2.0f * Nplane.c * Nplane.b;
583     pout->u.m[2][2] = 1.0f - 2.0f * Nplane.c * Nplane.c;
584     pout->u.m[3][0] = -2.0f * Nplane.d * Nplane.a;
585     pout->u.m[3][1] = -2.0f * Nplane.d * Nplane.b;
586     pout->u.m[3][2] = -2.0f * Nplane.d * Nplane.c;
587     return pout;
588 }
589 
590 D3DXMATRIX * WINAPI D3DXMatrixRotationAxis(D3DXMATRIX *out, const D3DXVECTOR3 *v, FLOAT angle)
591 {
592     D3DXVECTOR3 nv;
593     FLOAT sangle, cangle, cdiff;
594 
595     TRACE("out %p, v %p, angle %f\n", out, v, angle);
596 
597     D3DXVec3Normalize(&nv, v);
598     sangle = sinf(angle);
599     cangle = cosf(angle);
600     cdiff = 1.0f - cangle;
601 
602     out->u.m[0][0] = cdiff * nv.x * nv.x + cangle;
603     out->u.m[1][0] = cdiff * nv.x * nv.y - sangle * nv.z;
604     out->u.m[2][0] = cdiff * nv.x * nv.z + sangle * nv.y;
605     out->u.m[3][0] = 0.0f;
606     out->u.m[0][1] = cdiff * nv.y * nv.x + sangle * nv.z;
607     out->u.m[1][1] = cdiff * nv.y * nv.y + cangle;
608     out->u.m[2][1] = cdiff * nv.y * nv.z - sangle * nv.x;
609     out->u.m[3][1] = 0.0f;
610     out->u.m[0][2] = cdiff * nv.z * nv.x - sangle * nv.y;
611     out->u.m[1][2] = cdiff * nv.z * nv.y + sangle * nv.x;
612     out->u.m[2][2] = cdiff * nv.z * nv.z + cangle;
613     out->u.m[3][2] = 0.0f;
614     out->u.m[0][3] = 0.0f;
615     out->u.m[1][3] = 0.0f;
616     out->u.m[2][3] = 0.0f;
617     out->u.m[3][3] = 1.0f;
618 
619     return out;
620 }
621 
622 D3DXMATRIX* WINAPI D3DXMatrixRotationQuaternion(D3DXMATRIX *pout, const D3DXQUATERNION *pq)
623 {
624     TRACE("pout %p, pq %p\n", pout, pq);
625 
626     D3DXMatrixIdentity(pout);
627     pout->u.m[0][0] = 1.0f - 2.0f * (pq->y * pq->y + pq->z * pq->z);
628     pout->u.m[0][1] = 2.0f * (pq->x *pq->y + pq->z * pq->w);
629     pout->u.m[0][2] = 2.0f * (pq->x * pq->z - pq->y * pq->w);
630     pout->u.m[1][0] = 2.0f * (pq->x * pq->y - pq->z * pq->w);
631     pout->u.m[1][1] = 1.0f - 2.0f * (pq->x * pq->x + pq->z * pq->z);
632     pout->u.m[1][2] = 2.0f * (pq->y *pq->z + pq->x *pq->w);
633     pout->u.m[2][0] = 2.0f * (pq->x * pq->z + pq->y * pq->w);
634     pout->u.m[2][1] = 2.0f * (pq->y *pq->z - pq->x *pq->w);
635     pout->u.m[2][2] = 1.0f - 2.0f * (pq->x * pq->x + pq->y * pq->y);
636     return pout;
637 }
638 
639 D3DXMATRIX* WINAPI D3DXMatrixRotationX(D3DXMATRIX *pout, FLOAT angle)
640 {
641     TRACE("pout %p, angle %f\n", pout, angle);
642 
643     D3DXMatrixIdentity(pout);
644     pout->u.m[1][1] = cosf(angle);
645     pout->u.m[2][2] = cosf(angle);
646     pout->u.m[1][2] = sinf(angle);
647     pout->u.m[2][1] = -sinf(angle);
648     return pout;
649 }
650 
651 D3DXMATRIX* WINAPI D3DXMatrixRotationY(D3DXMATRIX *pout, FLOAT angle)
652 {
653     TRACE("pout %p, angle %f\n", pout, angle);
654 
655     D3DXMatrixIdentity(pout);
656     pout->u.m[0][0] = cosf(angle);
657     pout->u.m[2][2] = cosf(angle);
658     pout->u.m[0][2] = -sinf(angle);
659     pout->u.m[2][0] = sinf(angle);
660     return pout;
661 }
662 
663 D3DXMATRIX * WINAPI D3DXMatrixRotationYawPitchRoll(D3DXMATRIX *out, FLOAT yaw, FLOAT pitch, FLOAT roll)
664 {
665     FLOAT sroll, croll, spitch, cpitch, syaw, cyaw;
666 
667     TRACE("out %p, yaw %f, pitch %f, roll %f\n", out, yaw, pitch, roll);
668 
669     sroll = sinf(roll);
670     croll = cosf(roll);
671     spitch = sinf(pitch);
672     cpitch = cosf(pitch);
673     syaw = sinf(yaw);
674     cyaw = cosf(yaw);
675 
676     out->u.m[0][0] = sroll * spitch * syaw + croll * cyaw;
677     out->u.m[0][1] = sroll * cpitch;
678     out->u.m[0][2] = sroll * spitch * cyaw - croll * syaw;
679     out->u.m[0][3] = 0.0f;
680     out->u.m[1][0] = croll * spitch * syaw - sroll * cyaw;
681     out->u.m[1][1] = croll * cpitch;
682     out->u.m[1][2] = croll * spitch * cyaw + sroll * syaw;
683     out->u.m[1][3] = 0.0f;
684     out->u.m[2][0] = cpitch * syaw;
685     out->u.m[2][1] = -spitch;
686     out->u.m[2][2] = cpitch * cyaw;
687     out->u.m[2][3] = 0.0f;
688     out->u.m[3][0] = 0.0f;
689     out->u.m[3][1] = 0.0f;
690     out->u.m[3][2] = 0.0f;
691     out->u.m[3][3] = 1.0f;
692 
693     return out;
694 }
695 
696 D3DXMATRIX* WINAPI D3DXMatrixRotationZ(D3DXMATRIX *pout, FLOAT angle)
697 {
698     TRACE("pout %p, angle %f\n", pout, angle);
699 
700     D3DXMatrixIdentity(pout);
701     pout->u.m[0][0] = cosf(angle);
702     pout->u.m[1][1] = cosf(angle);
703     pout->u.m[0][1] = sinf(angle);
704     pout->u.m[1][0] = -sinf(angle);
705     return pout;
706 }
707 
708 D3DXMATRIX* WINAPI D3DXMatrixScaling(D3DXMATRIX *pout, FLOAT sx, FLOAT sy, FLOAT sz)
709 {
710     TRACE("pout %p, sx %f, sy %f, sz %f\n", pout, sx, sy, sz);
711 
712     D3DXMatrixIdentity(pout);
713     pout->u.m[0][0] = sx;
714     pout->u.m[1][1] = sy;
715     pout->u.m[2][2] = sz;
716     return pout;
717 }
718 
719 D3DXMATRIX* WINAPI D3DXMatrixShadow(D3DXMATRIX *pout, const D3DXVECTOR4 *plight, const D3DXPLANE *pplane)
720 {
721     D3DXPLANE Nplane;
722     FLOAT dot;
723 
724     TRACE("pout %p, plight %p, pplane %p\n", pout, plight, pplane);
725 
726     D3DXPlaneNormalize(&Nplane, pplane);
727     dot = D3DXPlaneDot(&Nplane, plight);
728     pout->u.m[0][0] = dot - Nplane.a * plight->x;
729     pout->u.m[0][1] = -Nplane.a * plight->y;
730     pout->u.m[0][2] = -Nplane.a * plight->z;
731     pout->u.m[0][3] = -Nplane.a * plight->w;
732     pout->u.m[1][0] = -Nplane.b * plight->x;
733     pout->u.m[1][1] = dot - Nplane.b * plight->y;
734     pout->u.m[1][2] = -Nplane.b * plight->z;
735     pout->u.m[1][3] = -Nplane.b * plight->w;
736     pout->u.m[2][0] = -Nplane.c * plight->x;
737     pout->u.m[2][1] = -Nplane.c * plight->y;
738     pout->u.m[2][2] = dot - Nplane.c * plight->z;
739     pout->u.m[2][3] = -Nplane.c * plight->w;
740     pout->u.m[3][0] = -Nplane.d * plight->x;
741     pout->u.m[3][1] = -Nplane.d * plight->y;
742     pout->u.m[3][2] = -Nplane.d * plight->z;
743     pout->u.m[3][3] = dot - Nplane.d * plight->w;
744     return pout;
745 }
746 
747 D3DXMATRIX* WINAPI D3DXMatrixTransformation(D3DXMATRIX *pout, const D3DXVECTOR3 *pscalingcenter, const D3DXQUATERNION *pscalingrotation, const D3DXVECTOR3 *pscaling, const D3DXVECTOR3 *protationcenter, const D3DXQUATERNION *protation, const D3DXVECTOR3 *ptranslation)
748 {
749     D3DXMATRIX m1, m2, m3, m4, m5, m6, m7;
750     D3DXQUATERNION prc;
751     D3DXVECTOR3 psc, pt;
752 
753     TRACE("pout %p, pscalingcenter %p, pscalingrotation %p, pscaling %p, protationcentr %p, protation %p, ptranslation %p\n",
754         pout, pscalingcenter, pscalingrotation, pscaling, protationcenter, protation, ptranslation);
755 
756     if ( !pscalingcenter )
757     {
758         psc.x = 0.0f;
759         psc.y = 0.0f;
760         psc.z = 0.0f;
761     }
762     else
763     {
764         psc.x = pscalingcenter->x;
765         psc.y = pscalingcenter->y;
766         psc.z = pscalingcenter->z;
767     }
768 
769     if ( !protationcenter )
770     {
771         prc.x = 0.0f;
772         prc.y = 0.0f;
773         prc.z = 0.0f;
774     }
775     else
776     {
777         prc.x = protationcenter->x;
778         prc.y = protationcenter->y;
779         prc.z = protationcenter->z;
780     }
781 
782     if ( !ptranslation )
783     {
784         pt.x = 0.0f;
785         pt.y = 0.0f;
786         pt.z = 0.0f;
787     }
788     else
789     {
790         pt.x = ptranslation->x;
791         pt.y = ptranslation->y;
792         pt.z = ptranslation->z;
793     }
794 
795     D3DXMatrixTranslation(&m1, -psc.x, -psc.y, -psc.z);
796 
797     if ( !pscalingrotation || !pscaling )
798     {
799         D3DXMatrixIdentity(&m2);
800         D3DXMatrixIdentity(&m4);
801     }
802     else
803     {
804         D3DXQUATERNION temp;
805 
806         D3DXMatrixRotationQuaternion(&m4, pscalingrotation);
807         temp.w =  pscalingrotation->w;
808         temp.x = -pscalingrotation->x;
809         temp.y = -pscalingrotation->y;
810         temp.z = -pscalingrotation->z;
811         D3DXMatrixRotationQuaternion(&m2, &temp);
812     }
813 
814     if ( !pscaling )
815         D3DXMatrixIdentity(&m3);
816     else
817         D3DXMatrixScaling(&m3, pscaling->x, pscaling->y, pscaling->z);
818 
819     if ( !protation )
820         D3DXMatrixIdentity(&m6);
821     else
822         D3DXMatrixRotationQuaternion(&m6, protation);
823 
824     D3DXMatrixTranslation(&m5, psc.x - prc.x, psc.y - prc.y, psc.z - prc.z);
825     D3DXMatrixTranslation(&m7, prc.x + pt.x, prc.y + pt.y, prc.z + pt.z);
826     D3DXMatrixMultiply(&m1, &m1, &m2);
827     D3DXMatrixMultiply(&m1, &m1, &m3);
828     D3DXMatrixMultiply(&m1, &m1, &m4);
829     D3DXMatrixMultiply(&m1, &m1, &m5);
830     D3DXMatrixMultiply(&m1, &m1, &m6);
831     D3DXMatrixMultiply(pout, &m1, &m7);
832     return pout;
833 }
834 
835 D3DXMATRIX* WINAPI D3DXMatrixTransformation2D(D3DXMATRIX *pout, const D3DXVECTOR2 *pscalingcenter, FLOAT scalingrotation, const D3DXVECTOR2 *pscaling, const D3DXVECTOR2 *protationcenter, FLOAT rotation, const D3DXVECTOR2 *ptranslation)
836 {
837     D3DXQUATERNION rot, sca_rot;
838     D3DXVECTOR3 rot_center, sca, sca_center, trans;
839 
840     TRACE("pout %p, pscalingcenter %p, scalingrotation %f, pscaling %p, protztioncenter %p, rotation %f, ptranslation %p\n",
841         pout, pscalingcenter, scalingrotation, pscaling, protationcenter, rotation, ptranslation);
842 
843     if ( pscalingcenter )
844     {
845         sca_center.x=pscalingcenter->x;
846         sca_center.y=pscalingcenter->y;
847         sca_center.z=0.0f;
848     }
849     else
850     {
851         sca_center.x=0.0f;
852         sca_center.y=0.0f;
853         sca_center.z=0.0f;
854     }
855 
856     if ( pscaling )
857     {
858         sca.x=pscaling->x;
859         sca.y=pscaling->y;
860         sca.z=1.0f;
861     }
862     else
863     {
864         sca.x=1.0f;
865         sca.y=1.0f;
866         sca.z=1.0f;
867     }
868 
869     if ( protationcenter )
870     {
871         rot_center.x=protationcenter->x;
872         rot_center.y=protationcenter->y;
873         rot_center.z=0.0f;
874     }
875     else
876     {
877         rot_center.x=0.0f;
878         rot_center.y=0.0f;
879         rot_center.z=0.0f;
880     }
881 
882     if ( ptranslation )
883     {
884         trans.x=ptranslation->x;
885         trans.y=ptranslation->y;
886         trans.z=0.0f;
887     }
888     else
889     {
890         trans.x=0.0f;
891         trans.y=0.0f;
892         trans.z=0.0f;
893     }
894 
895     rot.w=cosf(rotation/2.0f);
896     rot.x=0.0f;
897     rot.y=0.0f;
898     rot.z=sinf(rotation/2.0f);
899 
900     sca_rot.w=cosf(scalingrotation/2.0f);
901     sca_rot.x=0.0f;
902     sca_rot.y=0.0f;
903     sca_rot.z=sinf(scalingrotation/2.0f);
904 
905     D3DXMatrixTransformation(pout, &sca_center, &sca_rot, &sca, &rot_center, &rot, &trans);
906 
907     return pout;
908 }
909 
910 D3DXMATRIX* WINAPI D3DXMatrixTranslation(D3DXMATRIX *pout, FLOAT x, FLOAT y, FLOAT z)
911 {
912     TRACE("pout %p, x %f, y %f, z %f\n", pout, x, y, z);
913 
914     D3DXMatrixIdentity(pout);
915     pout->u.m[3][0] = x;
916     pout->u.m[3][1] = y;
917     pout->u.m[3][2] = z;
918     return pout;
919 }
920 
921 D3DXMATRIX* WINAPI D3DXMatrixTranspose(D3DXMATRIX *pout, const D3DXMATRIX *pm)
922 {
923     const D3DXMATRIX m = *pm;
924     int i,j;
925 
926     TRACE("pout %p, pm %p\n", pout, pm);
927 
928     for (i=0; i<4; i++)
929         for (j=0; j<4; j++) pout->u.m[i][j] = m.u.m[j][i];
930 
931     return pout;
932 }
933 
934 /*_________________D3DXMatrixStack____________________*/
935 
936 
937 static inline struct ID3DXMatrixStackImpl *impl_from_ID3DXMatrixStack(ID3DXMatrixStack *iface)
938 {
939   return CONTAINING_RECORD(iface, struct ID3DXMatrixStackImpl, ID3DXMatrixStack_iface);
940 }
941 
942 static HRESULT WINAPI ID3DXMatrixStackImpl_QueryInterface(ID3DXMatrixStack *iface, REFIID riid, void **out)
943 {
944     TRACE("iface %p, riid %s, out %p.\n", iface, debugstr_guid(riid), out);
945 
946     if (IsEqualGUID(riid, &IID_ID3DXMatrixStack)
947             || IsEqualGUID(riid, &IID_IUnknown))
948     {
949         ID3DXMatrixStack_AddRef(iface);
950         *out = iface;
951         return S_OK;
952     }
953 
954     WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(riid));
955 
956     *out = NULL;
957     return E_NOINTERFACE;
958 }
959 
960 static ULONG WINAPI ID3DXMatrixStackImpl_AddRef(ID3DXMatrixStack *iface)
961 {
962     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
963     ULONG ref = InterlockedIncrement(&This->ref);
964     TRACE("(%p) : AddRef from %d\n", This, ref - 1);
965     return ref;
966 }
967 
968 static ULONG WINAPI ID3DXMatrixStackImpl_Release(ID3DXMatrixStack *iface)
969 {
970     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
971     ULONG ref = InterlockedDecrement(&This->ref);
972     if (!ref)
973     {
974         HeapFree(GetProcessHeap(), 0, This->stack);
975         HeapFree(GetProcessHeap(), 0, This);
976     }
977     TRACE("(%p) : ReleaseRef to %d\n", This, ref);
978     return ref;
979 }
980 
981 static D3DXMATRIX* WINAPI ID3DXMatrixStackImpl_GetTop(ID3DXMatrixStack *iface)
982 {
983     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
984 
985     TRACE("iface %p\n", iface);
986 
987     return &This->stack[This->current];
988 }
989 
990 static HRESULT WINAPI ID3DXMatrixStackImpl_LoadIdentity(ID3DXMatrixStack *iface)
991 {
992     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
993 
994     TRACE("iface %p\n", iface);
995 
996     D3DXMatrixIdentity(&This->stack[This->current]);
997 
998     return D3D_OK;
999 }
1000 
1001 static HRESULT WINAPI ID3DXMatrixStackImpl_LoadMatrix(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
1002 {
1003     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1004 
1005     TRACE("iface %p, pm %p\n", iface, pm);
1006 
1007     This->stack[This->current] = *pm;
1008 
1009     return D3D_OK;
1010 }
1011 
1012 static HRESULT WINAPI ID3DXMatrixStackImpl_MultMatrix(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
1013 {
1014     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1015 
1016     TRACE("iface %p, pm %p\n", iface, pm);
1017 
1018     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], pm);
1019 
1020     return D3D_OK;
1021 }
1022 
1023 static HRESULT WINAPI ID3DXMatrixStackImpl_MultMatrixLocal(ID3DXMatrixStack *iface, const D3DXMATRIX *pm)
1024 {
1025     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1026 
1027     TRACE("iface %p, pm %p\n", iface, pm);
1028 
1029     D3DXMatrixMultiply(&This->stack[This->current], pm, &This->stack[This->current]);
1030 
1031     return D3D_OK;
1032 }
1033 
1034 static HRESULT WINAPI ID3DXMatrixStackImpl_Pop(ID3DXMatrixStack *iface)
1035 {
1036     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1037 
1038     TRACE("iface %p\n", iface);
1039 
1040     /* Popping the last element on the stack returns D3D_OK, but does nothing. */
1041     if (!This->current) return D3D_OK;
1042 
1043     if (This->current <= This->stack_size / 4 && This->stack_size >= INITIAL_STACK_SIZE * 2)
1044     {
1045         unsigned int new_size;
1046         D3DXMATRIX *new_stack;
1047 
1048         new_size = This->stack_size / 2;
1049         new_stack = HeapReAlloc(GetProcessHeap(), 0, This->stack, new_size * sizeof(*new_stack));
1050         if (new_stack)
1051         {
1052             This->stack_size = new_size;
1053             This->stack = new_stack;
1054         }
1055     }
1056 
1057     --This->current;
1058 
1059     return D3D_OK;
1060 }
1061 
1062 static HRESULT WINAPI ID3DXMatrixStackImpl_Push(ID3DXMatrixStack *iface)
1063 {
1064     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1065 
1066     TRACE("iface %p\n", iface);
1067 
1068     if (This->current == This->stack_size - 1)
1069     {
1070         unsigned int new_size;
1071         D3DXMATRIX *new_stack;
1072 
1073         if (This->stack_size > UINT_MAX / 2) return E_OUTOFMEMORY;
1074 
1075         new_size = This->stack_size * 2;
1076         new_stack = HeapReAlloc(GetProcessHeap(), 0, This->stack, new_size * sizeof(*new_stack));
1077         if (!new_stack) return E_OUTOFMEMORY;
1078 
1079         This->stack_size = new_size;
1080         This->stack = new_stack;
1081     }
1082 
1083     ++This->current;
1084     This->stack[This->current] = This->stack[This->current - 1];
1085 
1086     return D3D_OK;
1087 }
1088 
1089 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateAxis(ID3DXMatrixStack *iface, const D3DXVECTOR3 *pv, FLOAT angle)
1090 {
1091     D3DXMATRIX temp;
1092     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1093 
1094     TRACE("iface %p, pv %p, angle %f\n", iface, pv, angle);
1095 
1096     D3DXMatrixRotationAxis(&temp, pv, angle);
1097     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1098 
1099     return D3D_OK;
1100 }
1101 
1102 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateAxisLocal(ID3DXMatrixStack *iface, const D3DXVECTOR3 *pv, FLOAT angle)
1103 {
1104     D3DXMATRIX temp;
1105     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1106 
1107     TRACE("iface %p, pv %p, angle %f\n", iface, pv, angle);
1108 
1109     D3DXMatrixRotationAxis(&temp, pv, angle);
1110     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1111 
1112     return D3D_OK;
1113 }
1114 
1115 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateYawPitchRoll(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1116 {
1117     D3DXMATRIX temp;
1118     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1119 
1120     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1121 
1122     D3DXMatrixRotationYawPitchRoll(&temp, x, y, z);
1123     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1124 
1125     return D3D_OK;
1126 }
1127 
1128 static HRESULT WINAPI ID3DXMatrixStackImpl_RotateYawPitchRollLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1129 {
1130     D3DXMATRIX temp;
1131     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1132 
1133     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1134 
1135     D3DXMatrixRotationYawPitchRoll(&temp, x, y, z);
1136     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1137 
1138     return D3D_OK;
1139 }
1140 
1141 static HRESULT WINAPI ID3DXMatrixStackImpl_Scale(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1142 {
1143     D3DXMATRIX temp;
1144     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1145 
1146     TRACE("iface %p,x %f, y %f, z %f\n", iface, x, y, z);
1147 
1148     D3DXMatrixScaling(&temp, x, y, z);
1149     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1150 
1151     return D3D_OK;
1152 }
1153 
1154 static HRESULT WINAPI ID3DXMatrixStackImpl_ScaleLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1155 {
1156     D3DXMATRIX temp;
1157     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1158 
1159     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1160 
1161     D3DXMatrixScaling(&temp, x, y, z);
1162     D3DXMatrixMultiply(&This->stack[This->current], &temp, &This->stack[This->current]);
1163 
1164     return D3D_OK;
1165 }
1166 
1167 static HRESULT WINAPI ID3DXMatrixStackImpl_Translate(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1168 {
1169     D3DXMATRIX temp;
1170     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1171 
1172     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1173 
1174     D3DXMatrixTranslation(&temp, x, y, z);
1175     D3DXMatrixMultiply(&This->stack[This->current], &This->stack[This->current], &temp);
1176 
1177     return D3D_OK;
1178 }
1179 
1180 static HRESULT WINAPI ID3DXMatrixStackImpl_TranslateLocal(ID3DXMatrixStack *iface, FLOAT x, FLOAT y, FLOAT z)
1181 {
1182     D3DXMATRIX temp;
1183     struct ID3DXMatrixStackImpl *This = impl_from_ID3DXMatrixStack(iface);
1184 
1185     TRACE("iface %p, x %f, y %f, z %f\n", iface, x, y, z);
1186 
1187     D3DXMatrixTranslation(&temp, x, y, z);
1188     D3DXMatrixMultiply(&This->stack[This->current], &temp,&This->stack[This->current]);
1189 
1190     return D3D_OK;
1191 }
1192 
1193 static const ID3DXMatrixStackVtbl ID3DXMatrixStack_Vtbl =
1194 {
1195     ID3DXMatrixStackImpl_QueryInterface,
1196     ID3DXMatrixStackImpl_AddRef,
1197     ID3DXMatrixStackImpl_Release,
1198     ID3DXMatrixStackImpl_Pop,
1199     ID3DXMatrixStackImpl_Push,
1200     ID3DXMatrixStackImpl_LoadIdentity,
1201     ID3DXMatrixStackImpl_LoadMatrix,
1202     ID3DXMatrixStackImpl_MultMatrix,
1203     ID3DXMatrixStackImpl_MultMatrixLocal,
1204     ID3DXMatrixStackImpl_RotateAxis,
1205     ID3DXMatrixStackImpl_RotateAxisLocal,
1206     ID3DXMatrixStackImpl_RotateYawPitchRoll,
1207     ID3DXMatrixStackImpl_RotateYawPitchRollLocal,
1208     ID3DXMatrixStackImpl_Scale,
1209     ID3DXMatrixStackImpl_ScaleLocal,
1210     ID3DXMatrixStackImpl_Translate,
1211     ID3DXMatrixStackImpl_TranslateLocal,
1212     ID3DXMatrixStackImpl_GetTop
1213 };
1214 
1215 HRESULT WINAPI D3DXCreateMatrixStack(DWORD flags, ID3DXMatrixStack **stack)
1216 {
1217     struct ID3DXMatrixStackImpl *object;
1218 
1219     TRACE("flags %#x, stack %p.\n", flags, stack);
1220 
1221     if (!(object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object))))
1222     {
1223         *stack = NULL;
1224         return E_OUTOFMEMORY;
1225     }
1226     object->ID3DXMatrixStack_iface.lpVtbl = &ID3DXMatrixStack_Vtbl;
1227     object->ref = 1;
1228 
1229     if (!(object->stack = HeapAlloc(GetProcessHeap(), 0, INITIAL_STACK_SIZE * sizeof(*object->stack))))
1230     {
1231         HeapFree(GetProcessHeap(), 0, object);
1232         *stack = NULL;
1233         return E_OUTOFMEMORY;
1234     }
1235 
1236     object->current = 0;
1237     object->stack_size = INITIAL_STACK_SIZE;
1238     D3DXMatrixIdentity(&object->stack[0]);
1239 
1240     TRACE("Created matrix stack %p.\n", object);
1241 
1242     *stack = &object->ID3DXMatrixStack_iface;
1243     return D3D_OK;
1244 }
1245 
1246 /*_________________D3DXPLANE________________*/
1247 
1248 D3DXPLANE* WINAPI D3DXPlaneFromPointNormal(D3DXPLANE *pout, const D3DXVECTOR3 *pvpoint, const D3DXVECTOR3 *pvnormal)
1249 {
1250     TRACE("pout %p, pvpoint %p, pvnormal %p\n", pout, pvpoint, pvnormal);
1251 
1252     pout->a = pvnormal->x;
1253     pout->b = pvnormal->y;
1254     pout->c = pvnormal->z;
1255     pout->d = -D3DXVec3Dot(pvpoint, pvnormal);
1256     return pout;
1257 }
1258 
1259 D3DXPLANE* WINAPI D3DXPlaneFromPoints(D3DXPLANE *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3)
1260 {
1261     D3DXVECTOR3 edge1, edge2, normal, Nnormal;
1262 
1263     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p\n", pout, pv1, pv2, pv3);
1264 
1265     edge1.x = 0.0f; edge1.y = 0.0f; edge1.z = 0.0f;
1266     edge2.x = 0.0f; edge2.y = 0.0f; edge2.z = 0.0f;
1267     D3DXVec3Subtract(&edge1, pv2, pv1);
1268     D3DXVec3Subtract(&edge2, pv3, pv1);
1269     D3DXVec3Cross(&normal, &edge1, &edge2);
1270     D3DXVec3Normalize(&Nnormal, &normal);
1271     D3DXPlaneFromPointNormal(pout, pv1, &Nnormal);
1272     return pout;
1273 }
1274 
1275 D3DXVECTOR3* WINAPI D3DXPlaneIntersectLine(D3DXVECTOR3 *pout, const D3DXPLANE *pp, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2)
1276 {
1277     D3DXVECTOR3 direction, normal;
1278     FLOAT dot, temp;
1279 
1280     TRACE("pout %p, pp %p, pv1 %p, pv2 %p\n", pout, pp, pv1, pv2);
1281 
1282     normal.x = pp->a;
1283     normal.y = pp->b;
1284     normal.z = pp->c;
1285     direction.x = pv2->x - pv1->x;
1286     direction.y = pv2->y - pv1->y;
1287     direction.z = pv2->z - pv1->z;
1288     dot = D3DXVec3Dot(&normal, &direction);
1289     if ( !dot ) return NULL;
1290     temp = ( pp->d + D3DXVec3Dot(&normal, pv1) ) / dot;
1291     pout->x = pv1->x - temp * direction.x;
1292     pout->y = pv1->y - temp * direction.y;
1293     pout->z = pv1->z - temp * direction.z;
1294     return pout;
1295 }
1296 
1297 D3DXPLANE * WINAPI D3DXPlaneNormalize(D3DXPLANE *out, const D3DXPLANE *p)
1298 {
1299     FLOAT norm;
1300 
1301     TRACE("out %p, p %p\n", out, p);
1302 
1303     norm = sqrtf(p->a * p->a + p->b * p->b + p->c * p->c);
1304     if (norm)
1305     {
1306         out->a = p->a / norm;
1307         out->b = p->b / norm;
1308         out->c = p->c / norm;
1309         out->d = p->d / norm;
1310     }
1311     else
1312     {
1313         out->a = 0.0f;
1314         out->b = 0.0f;
1315         out->c = 0.0f;
1316         out->d = 0.0f;
1317     }
1318 
1319     return out;
1320 }
1321 
1322 D3DXPLANE* WINAPI D3DXPlaneTransform(D3DXPLANE *pout, const D3DXPLANE *pplane, const D3DXMATRIX *pm)
1323 {
1324     const D3DXPLANE plane = *pplane;
1325 
1326     TRACE("pout %p, pplane %p, pm %p\n", pout, pplane, pm);
1327 
1328     pout->a = pm->u.m[0][0] * plane.a + pm->u.m[1][0] * plane.b + pm->u.m[2][0] * plane.c + pm->u.m[3][0] * plane.d;
1329     pout->b = pm->u.m[0][1] * plane.a + pm->u.m[1][1] * plane.b + pm->u.m[2][1] * plane.c + pm->u.m[3][1] * plane.d;
1330     pout->c = pm->u.m[0][2] * plane.a + pm->u.m[1][2] * plane.b + pm->u.m[2][2] * plane.c + pm->u.m[3][2] * plane.d;
1331     pout->d = pm->u.m[0][3] * plane.a + pm->u.m[1][3] * plane.b + pm->u.m[2][3] * plane.c + pm->u.m[3][3] * plane.d;
1332     return pout;
1333 }
1334 
1335 D3DXPLANE* WINAPI D3DXPlaneTransformArray(D3DXPLANE* out, UINT outstride, const D3DXPLANE* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1336 {
1337     UINT i;
1338 
1339     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1340 
1341     for (i = 0; i < elements; ++i) {
1342         D3DXPlaneTransform(
1343             (D3DXPLANE*)((char*)out + outstride * i),
1344             (const D3DXPLANE*)((const char*)in + instride * i),
1345             matrix);
1346     }
1347     return out;
1348 }
1349 
1350 /*_________________D3DXQUATERNION________________*/
1351 
1352 D3DXQUATERNION* WINAPI D3DXQuaternionBaryCentric(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3, FLOAT f, FLOAT g)
1353 {
1354     D3DXQUATERNION temp1, temp2;
1355 
1356      TRACE("pout %p, pq1 %p, pq2 %p, pq3 %p, f %f, g %f\n", pout, pq1, pq2, pq3, f, g);
1357 
1358     D3DXQuaternionSlerp(pout, D3DXQuaternionSlerp(&temp1, pq1, pq2, f + g), D3DXQuaternionSlerp(&temp2, pq1, pq3, f+g), g / (f + g));
1359     return pout;
1360 }
1361 
1362 D3DXQUATERNION * WINAPI D3DXQuaternionExp(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1363 {
1364     FLOAT norm;
1365 
1366     TRACE("out %p, q %p\n", out, q);
1367 
1368     norm = sqrtf(q->x * q->x + q->y * q->y + q->z * q->z);
1369     if (norm)
1370     {
1371         out->x = sinf(norm) * q->x / norm;
1372         out->y = sinf(norm) * q->y / norm;
1373         out->z = sinf(norm) * q->z / norm;
1374         out->w = cosf(norm);
1375     }
1376     else
1377     {
1378         out->x = 0.0f;
1379         out->y = 0.0f;
1380         out->z = 0.0f;
1381         out->w = 1.0f;
1382     }
1383 
1384     return out;
1385 }
1386 
1387 D3DXQUATERNION* WINAPI D3DXQuaternionInverse(D3DXQUATERNION *pout, const D3DXQUATERNION *pq)
1388 {
1389     FLOAT norm;
1390 
1391     TRACE("pout %p, pq %p\n", pout, pq);
1392 
1393     norm = D3DXQuaternionLengthSq(pq);
1394 
1395     pout->x = -pq->x / norm;
1396     pout->y = -pq->y / norm;
1397     pout->z = -pq->z / norm;
1398     pout->w = pq->w / norm;
1399     return pout;
1400 }
1401 
1402 D3DXQUATERNION * WINAPI D3DXQuaternionLn(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1403 {
1404     FLOAT t;
1405 
1406     TRACE("out %p, q %p\n", out, q);
1407 
1408     if ((q->w >= 1.0f) || (q->w == -1.0f))
1409         t = 1.0f;
1410     else
1411         t = acosf(q->w) / sqrtf(1.0f - q->w * q->w);
1412 
1413     out->x = t * q->x;
1414     out->y = t * q->y;
1415     out->z = t * q->z;
1416     out->w = 0.0f;
1417 
1418     return out;
1419 }
1420 
1421 D3DXQUATERNION* WINAPI D3DXQuaternionMultiply(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2)
1422 {
1423     D3DXQUATERNION out;
1424 
1425     TRACE("pout %p, pq1 %p, pq2 %p\n", pout, pq1, pq2);
1426 
1427     out.x = pq2->w * pq1->x + pq2->x * pq1->w + pq2->y * pq1->z - pq2->z * pq1->y;
1428     out.y = pq2->w * pq1->y - pq2->x * pq1->z + pq2->y * pq1->w + pq2->z * pq1->x;
1429     out.z = pq2->w * pq1->z + pq2->x * pq1->y - pq2->y * pq1->x + pq2->z * pq1->w;
1430     out.w = pq2->w * pq1->w - pq2->x * pq1->x - pq2->y * pq1->y - pq2->z * pq1->z;
1431     *pout = out;
1432     return pout;
1433 }
1434 
1435 D3DXQUATERNION * WINAPI D3DXQuaternionNormalize(D3DXQUATERNION *out, const D3DXQUATERNION *q)
1436 {
1437     FLOAT norm;
1438 
1439     TRACE("out %p, q %p\n", out, q);
1440 
1441     norm = D3DXQuaternionLength(q);
1442 
1443     out->x = q->x / norm;
1444     out->y = q->y / norm;
1445     out->z = q->z / norm;
1446     out->w = q->w / norm;
1447 
1448     return out;
1449 }
1450 
1451 D3DXQUATERNION * WINAPI D3DXQuaternionRotationAxis(D3DXQUATERNION *out, const D3DXVECTOR3 *v, FLOAT angle)
1452 {
1453     D3DXVECTOR3 temp;
1454 
1455     TRACE("out %p, v %p, angle %f\n", out, v, angle);
1456 
1457     D3DXVec3Normalize(&temp, v);
1458 
1459     out->x = sinf(angle / 2.0f) * temp.x;
1460     out->y = sinf(angle / 2.0f) * temp.y;
1461     out->z = sinf(angle / 2.0f) * temp.z;
1462     out->w = cosf(angle / 2.0f);
1463 
1464     return out;
1465 }
1466 
1467 D3DXQUATERNION * WINAPI D3DXQuaternionRotationMatrix(D3DXQUATERNION *out, const D3DXMATRIX *m)
1468 {
1469     FLOAT s, trace;
1470 
1471     TRACE("out %p, m %p\n", out, m);
1472 
1473     trace = m->u.m[0][0] + m->u.m[1][1] + m->u.m[2][2] + 1.0f;
1474     if (trace > 1.0f)
1475     {
1476         s = 2.0f * sqrtf(trace);
1477         out->x = (m->u.m[1][2] - m->u.m[2][1]) / s;
1478         out->y = (m->u.m[2][0] - m->u.m[0][2]) / s;
1479         out->z = (m->u.m[0][1] - m->u.m[1][0]) / s;
1480         out->w = 0.25f * s;
1481     }
1482     else
1483     {
1484         int i, maxi = 0;
1485 
1486         for (i = 1; i < 3; i++)
1487         {
1488             if (m->u.m[i][i] > m->u.m[maxi][maxi])
1489                 maxi = i;
1490         }
1491 
1492         switch (maxi)
1493         {
1494             case 0:
1495                 s = 2.0f * sqrtf(1.0f + m->u.m[0][0] - m->u.m[1][1] - m->u.m[2][2]);
1496                 out->x = 0.25f * s;
1497                 out->y = (m->u.m[0][1] + m->u.m[1][0]) / s;
1498                 out->z = (m->u.m[0][2] + m->u.m[2][0]) / s;
1499                 out->w = (m->u.m[1][2] - m->u.m[2][1]) / s;
1500                 break;
1501 
1502             case 1:
1503                 s = 2.0f * sqrtf(1.0f + m->u.m[1][1] - m->u.m[0][0] - m->u.m[2][2]);
1504                 out->x = (m->u.m[0][1] + m->u.m[1][0]) / s;
1505                 out->y = 0.25f * s;
1506                 out->z = (m->u.m[1][2] + m->u.m[2][1]) / s;
1507                 out->w = (m->u.m[2][0] - m->u.m[0][2]) / s;
1508                 break;
1509 
1510             case 2:
1511                 s = 2.0f * sqrtf(1.0f + m->u.m[2][2] - m->u.m[0][0] - m->u.m[1][1]);
1512                 out->x = (m->u.m[0][2] + m->u.m[2][0]) / s;
1513                 out->y = (m->u.m[1][2] + m->u.m[2][1]) / s;
1514                 out->z = 0.25f * s;
1515                 out->w = (m->u.m[0][1] - m->u.m[1][0]) / s;
1516                 break;
1517         }
1518     }
1519 
1520     return out;
1521 }
1522 
1523 D3DXQUATERNION * WINAPI D3DXQuaternionRotationYawPitchRoll(D3DXQUATERNION *out, FLOAT yaw, FLOAT pitch, FLOAT roll)
1524 {
1525     FLOAT syaw, cyaw, spitch, cpitch, sroll, croll;
1526 
1527     TRACE("out %p, yaw %f, pitch %f, roll %f\n", out, yaw, pitch, roll);
1528 
1529     syaw = sinf(yaw / 2.0f);
1530     cyaw = cosf(yaw / 2.0f);
1531     spitch = sinf(pitch / 2.0f);
1532     cpitch = cosf(pitch / 2.0f);
1533     sroll = sinf(roll / 2.0f);
1534     croll = cosf(roll / 2.0f);
1535 
1536     out->x = syaw * cpitch * sroll + cyaw * spitch * croll;
1537     out->y = syaw * cpitch * croll - cyaw * spitch * sroll;
1538     out->z = cyaw * cpitch * sroll - syaw * spitch * croll;
1539     out->w = cyaw * cpitch * croll + syaw * spitch * sroll;
1540 
1541     return out;
1542 }
1543 
1544 D3DXQUATERNION * WINAPI D3DXQuaternionSlerp(D3DXQUATERNION *out, const D3DXQUATERNION *q1,
1545         const D3DXQUATERNION *q2, FLOAT t)
1546 {
1547     FLOAT dot, temp;
1548 
1549     TRACE("out %p, q1 %p, q2 %p, t %f\n", out, q1, q2, t);
1550 
1551     temp = 1.0f - t;
1552     dot = D3DXQuaternionDot(q1, q2);
1553     if (dot < 0.0f)
1554     {
1555         t = -t;
1556         dot = -dot;
1557     }
1558 
1559     if (1.0f - dot > 0.001f)
1560     {
1561         FLOAT theta = acosf(dot);
1562 
1563         temp = sinf(theta * temp) / sinf(theta);
1564         t = sinf(theta * t) / sinf(theta);
1565     }
1566 
1567     out->x = temp * q1->x + t * q2->x;
1568     out->y = temp * q1->y + t * q2->y;
1569     out->z = temp * q1->z + t * q2->z;
1570     out->w = temp * q1->w + t * q2->w;
1571 
1572     return out;
1573 }
1574 
1575 D3DXQUATERNION* WINAPI D3DXQuaternionSquad(D3DXQUATERNION *pout, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3, const D3DXQUATERNION *pq4, FLOAT t)
1576 {
1577     D3DXQUATERNION temp1, temp2;
1578 
1579     TRACE("pout %p, pq1 %p, pq2 %p, pq3 %p, pq4 %p, t %f\n", pout, pq1, pq2, pq3, pq4, t);
1580 
1581     D3DXQuaternionSlerp(pout, D3DXQuaternionSlerp(&temp1, pq1, pq4, t), D3DXQuaternionSlerp(&temp2, pq2, pq3, t), 2.0f * t * (1.0f - t));
1582     return pout;
1583 }
1584 
1585 static D3DXQUATERNION add_diff(const D3DXQUATERNION *q1, const D3DXQUATERNION *q2, const FLOAT add)
1586 {
1587     D3DXQUATERNION temp;
1588 
1589     temp.x = q1->x + add * q2->x;
1590     temp.y = q1->y + add * q2->y;
1591     temp.z = q1->z + add * q2->z;
1592     temp.w = q1->w + add * q2->w;
1593 
1594     return temp;
1595 }
1596 
1597 void WINAPI D3DXQuaternionSquadSetup(D3DXQUATERNION *paout, D3DXQUATERNION *pbout, D3DXQUATERNION *pcout, const D3DXQUATERNION *pq0, const D3DXQUATERNION *pq1, const D3DXQUATERNION *pq2, const D3DXQUATERNION *pq3)
1598 {
1599     D3DXQUATERNION q, temp1, temp2, temp3, zero;
1600     D3DXQUATERNION aout, cout;
1601 
1602     TRACE("paout %p, pbout %p, pcout %p, pq0 %p, pq1 %p, pq2 %p, pq3 %p\n", paout, pbout, pcout, pq0, pq1, pq2, pq3);
1603 
1604     zero.x = 0.0f;
1605     zero.y = 0.0f;
1606     zero.z = 0.0f;
1607     zero.w = 0.0f;
1608 
1609     if (D3DXQuaternionDot(pq0, pq1) < 0.0f)
1610         temp2 = add_diff(&zero, pq0, -1.0f);
1611     else
1612         temp2 = *pq0;
1613 
1614     if (D3DXQuaternionDot(pq1, pq2) < 0.0f)
1615         cout = add_diff(&zero, pq2, -1.0f);
1616     else
1617         cout = *pq2;
1618 
1619     if (D3DXQuaternionDot(&cout, pq3) < 0.0f)
1620         temp3 = add_diff(&zero, pq3, -1.0f);
1621     else
1622         temp3 = *pq3;
1623 
1624     D3DXQuaternionInverse(&temp1, pq1);
1625     D3DXQuaternionMultiply(&temp2, &temp1, &temp2);
1626     D3DXQuaternionLn(&temp2, &temp2);
1627     D3DXQuaternionMultiply(&q, &temp1, &cout);
1628     D3DXQuaternionLn(&q, &q);
1629     temp1 = add_diff(&temp2, &q, 1.0f);
1630     temp1.x *= -0.25f;
1631     temp1.y *= -0.25f;
1632     temp1.z *= -0.25f;
1633     temp1.w *= -0.25f;
1634     D3DXQuaternionExp(&temp1, &temp1);
1635     D3DXQuaternionMultiply(&aout, pq1, &temp1);
1636 
1637     D3DXQuaternionInverse(&temp1, &cout);
1638     D3DXQuaternionMultiply(&temp2, &temp1, pq1);
1639     D3DXQuaternionLn(&temp2, &temp2);
1640     D3DXQuaternionMultiply(&q, &temp1, &temp3);
1641     D3DXQuaternionLn(&q, &q);
1642     temp1 = add_diff(&temp2, &q, 1.0f);
1643     temp1.x *= -0.25f;
1644     temp1.y *= -0.25f;
1645     temp1.z *= -0.25f;
1646     temp1.w *= -0.25f;
1647     D3DXQuaternionExp(&temp1, &temp1);
1648     D3DXQuaternionMultiply(pbout, &cout, &temp1);
1649     *paout = aout;
1650     *pcout = cout;
1651 }
1652 
1653 void WINAPI D3DXQuaternionToAxisAngle(const D3DXQUATERNION *pq, D3DXVECTOR3 *paxis, FLOAT *pangle)
1654 {
1655     TRACE("pq %p, paxis %p, pangle %p\n", pq, paxis, pangle);
1656 
1657     if (paxis)
1658     {
1659         paxis->x = pq->x;
1660         paxis->y = pq->y;
1661         paxis->z = pq->z;
1662     }
1663     if (pangle)
1664         *pangle = 2.0f * acosf(pq->w);
1665 }
1666 
1667 /*_________________D3DXVec2_____________________*/
1668 
1669 D3DXVECTOR2* WINAPI D3DXVec2BaryCentric(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pv3, FLOAT f, FLOAT g)
1670 {
1671     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
1672 
1673     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
1674     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
1675     return pout;
1676 }
1677 
1678 D3DXVECTOR2* WINAPI D3DXVec2CatmullRom(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv0, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pv3, FLOAT s)
1679 {
1680     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
1681 
1682     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
1683     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
1684     return pout;
1685 }
1686 
1687 D3DXVECTOR2* WINAPI D3DXVec2Hermite(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv1, const D3DXVECTOR2 *pt1, const D3DXVECTOR2 *pv2, const D3DXVECTOR2 *pt2, FLOAT s)
1688 {
1689     FLOAT h1, h2, h3, h4;
1690 
1691     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
1692 
1693     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
1694     h2 = s * s * s - 2.0f * s * s + s;
1695     h3 = -2.0f * s * s * s + 3.0f * s * s;
1696     h4 = s * s * s - s * s;
1697 
1698     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
1699     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
1700     return pout;
1701 }
1702 
1703 D3DXVECTOR2* WINAPI D3DXVec2Normalize(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv)
1704 {
1705     FLOAT norm;
1706 
1707     TRACE("pout %p, pv %p\n", pout, pv);
1708 
1709     norm = D3DXVec2Length(pv);
1710     if ( !norm )
1711     {
1712         pout->x = 0.0f;
1713         pout->y = 0.0f;
1714     }
1715     else
1716     {
1717         pout->x = pv->x / norm;
1718         pout->y = pv->y / norm;
1719     }
1720 
1721     return pout;
1722 }
1723 
1724 D3DXVECTOR4* WINAPI D3DXVec2Transform(D3DXVECTOR4 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1725 {
1726     D3DXVECTOR4 out;
1727 
1728     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1729 
1730     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y  + pm->u.m[3][0];
1731     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y  + pm->u.m[3][1];
1732     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y  + pm->u.m[3][2];
1733     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y  + pm->u.m[3][3];
1734     *pout = out;
1735     return pout;
1736 }
1737 
1738 D3DXVECTOR4* WINAPI D3DXVec2TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR2* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1739 {
1740     UINT i;
1741 
1742     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1743 
1744     for (i = 0; i < elements; ++i) {
1745         D3DXVec2Transform(
1746             (D3DXVECTOR4*)((char*)out + outstride * i),
1747             (const D3DXVECTOR2*)((const char*)in + instride * i),
1748             matrix);
1749     }
1750     return out;
1751 }
1752 
1753 D3DXVECTOR2* WINAPI D3DXVec2TransformCoord(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1754 {
1755     D3DXVECTOR2 v;
1756     FLOAT norm;
1757 
1758     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1759 
1760     v = *pv;
1761     norm = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[3][3];
1762 
1763     pout->x = (pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y + pm->u.m[3][0]) / norm;
1764     pout->y = (pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y + pm->u.m[3][1]) / norm;
1765 
1766     return pout;
1767 }
1768 
1769 D3DXVECTOR2* WINAPI D3DXVec2TransformCoordArray(D3DXVECTOR2* out, UINT outstride, const D3DXVECTOR2* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1770 {
1771     UINT i;
1772 
1773     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1774 
1775     for (i = 0; i < elements; ++i) {
1776         D3DXVec2TransformCoord(
1777             (D3DXVECTOR2*)((char*)out + outstride * i),
1778             (const D3DXVECTOR2*)((const char*)in + instride * i),
1779             matrix);
1780     }
1781     return out;
1782 }
1783 
1784 D3DXVECTOR2* WINAPI D3DXVec2TransformNormal(D3DXVECTOR2 *pout, const D3DXVECTOR2 *pv, const D3DXMATRIX *pm)
1785 {
1786     const D3DXVECTOR2 v = *pv;
1787 
1788     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1789 
1790     pout->x = pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y;
1791     pout->y = pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y;
1792     return pout;
1793 }
1794 
1795 D3DXVECTOR2* WINAPI D3DXVec2TransformNormalArray(D3DXVECTOR2* out, UINT outstride, const D3DXVECTOR2 *in, UINT instride, const D3DXMATRIX *matrix, UINT elements)
1796 {
1797     UINT i;
1798 
1799     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1800 
1801     for (i = 0; i < elements; ++i) {
1802         D3DXVec2TransformNormal(
1803             (D3DXVECTOR2*)((char*)out + outstride * i),
1804             (const D3DXVECTOR2*)((const char*)in + instride * i),
1805             matrix);
1806     }
1807     return out;
1808 }
1809 
1810 /*_________________D3DXVec3_____________________*/
1811 
1812 D3DXVECTOR3* WINAPI D3DXVec3BaryCentric(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3, FLOAT f, FLOAT g)
1813 {
1814     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
1815 
1816     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
1817     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
1818     pout->z = (1.0f-f-g) * (pv1->z) + f * (pv2->z) + g * (pv3->z);
1819     return pout;
1820 }
1821 
1822 D3DXVECTOR3* WINAPI D3DXVec3CatmullRom( D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv0, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pv3, FLOAT s)
1823 {
1824     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
1825 
1826     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
1827     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
1828     pout->z = 0.5f * (2.0f * pv1->z + (pv2->z - pv0->z) *s + (2.0f *pv0->z - 5.0f * pv1->z + 4.0f * pv2->z - pv3->z) * s * s + (pv3->z -3.0f * pv2->z + 3.0f * pv1->z - pv0->z) * s * s * s);
1829     return pout;
1830 }
1831 
1832 D3DXVECTOR3* WINAPI D3DXVec3Hermite(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv1, const D3DXVECTOR3 *pt1, const D3DXVECTOR3 *pv2, const D3DXVECTOR3 *pt2, FLOAT s)
1833 {
1834     FLOAT h1, h2, h3, h4;
1835 
1836     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
1837 
1838     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
1839     h2 = s * s * s - 2.0f * s * s + s;
1840     h3 = -2.0f * s * s * s + 3.0f * s * s;
1841     h4 = s * s * s - s * s;
1842 
1843     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
1844     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
1845     pout->z = h1 * (pv1->z) + h2 * (pt1->z) + h3 * (pv2->z) + h4 * (pt2->z);
1846     return pout;
1847 }
1848 
1849 D3DXVECTOR3* WINAPI D3DXVec3Normalize(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv)
1850 {
1851     FLOAT norm;
1852 
1853     TRACE("pout %p, pv %p\n", pout, pv);
1854 
1855     norm = D3DXVec3Length(pv);
1856     if ( !norm )
1857     {
1858         pout->x = 0.0f;
1859         pout->y = 0.0f;
1860         pout->z = 0.0f;
1861     }
1862     else
1863     {
1864         pout->x = pv->x / norm;
1865         pout->y = pv->y / norm;
1866         pout->z = pv->z / norm;
1867     }
1868 
1869     return pout;
1870 }
1871 
1872 D3DXVECTOR3* WINAPI D3DXVec3Project(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DVIEWPORT9 *pviewport, const D3DXMATRIX *pprojection, const D3DXMATRIX *pview, const D3DXMATRIX *pworld)
1873 {
1874     D3DXMATRIX m;
1875 
1876     TRACE("pout %p, pv %p, pviewport %p, pprojection %p, pview %p, pworld %p\n", pout, pv, pviewport, pprojection, pview, pworld);
1877 
1878     D3DXMatrixIdentity(&m);
1879     if (pworld) D3DXMatrixMultiply(&m, &m, pworld);
1880     if (pview) D3DXMatrixMultiply(&m, &m, pview);
1881     if (pprojection) D3DXMatrixMultiply(&m, &m, pprojection);
1882 
1883     D3DXVec3TransformCoord(pout, pv, &m);
1884 
1885     if (pviewport)
1886     {
1887         pout->x = pviewport->X +  ( 1.0f + pout->x ) * pviewport->Width / 2.0f;
1888         pout->y = pviewport->Y +  ( 1.0f - pout->y ) * pviewport->Height / 2.0f;
1889         pout->z = pviewport->MinZ + pout->z * ( pviewport->MaxZ - pviewport->MinZ );
1890     }
1891     return pout;
1892 }
1893 
1894 D3DXVECTOR3* WINAPI D3DXVec3ProjectArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DVIEWPORT9* viewport, const D3DXMATRIX* projection, const D3DXMATRIX* view, const D3DXMATRIX* world, UINT elements)
1895 {
1896     UINT i;
1897 
1898     TRACE("out %p, outstride %u, in %p, instride %u, viewport %p, projection %p, view %p, world %p, elements %u\n",
1899         out, outstride, in, instride, viewport, projection, view, world, elements);
1900 
1901     for (i = 0; i < elements; ++i) {
1902         D3DXVec3Project(
1903             (D3DXVECTOR3*)((char*)out + outstride * i),
1904             (const D3DXVECTOR3*)((const char*)in + instride * i),
1905             viewport, projection, view, world);
1906     }
1907     return out;
1908 }
1909 
1910 D3DXVECTOR4* WINAPI D3DXVec3Transform(D3DXVECTOR4 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1911 {
1912     D3DXVECTOR4 out;
1913 
1914     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1915 
1916     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0];
1917     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1];
1918     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2];
1919     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] * pv->z + pm->u.m[3][3];
1920     *pout = out;
1921     return pout;
1922 }
1923 
1924 D3DXVECTOR4* WINAPI D3DXVec3TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1925 {
1926     UINT i;
1927 
1928     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1929 
1930     for (i = 0; i < elements; ++i) {
1931         D3DXVec3Transform(
1932             (D3DXVECTOR4*)((char*)out + outstride * i),
1933             (const D3DXVECTOR3*)((const char*)in + instride * i),
1934             matrix);
1935     }
1936     return out;
1937 }
1938 
1939 D3DXVECTOR3* WINAPI D3DXVec3TransformCoord(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1940 {
1941     D3DXVECTOR3 out;
1942     FLOAT norm;
1943 
1944     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1945 
1946     norm = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] *pv->z + pm->u.m[3][3];
1947 
1948     out.x = (pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0]) / norm;
1949     out.y = (pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1]) / norm;
1950     out.z = (pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2]) / norm;
1951 
1952     *pout = out;
1953 
1954     return pout;
1955 }
1956 
1957 D3DXVECTOR3* WINAPI D3DXVec3TransformCoordArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1958 {
1959     UINT i;
1960 
1961     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1962 
1963     for (i = 0; i < elements; ++i) {
1964         D3DXVec3TransformCoord(
1965             (D3DXVECTOR3*)((char*)out + outstride * i),
1966             (const D3DXVECTOR3*)((const char*)in + instride * i),
1967             matrix);
1968     }
1969     return out;
1970 }
1971 
1972 D3DXVECTOR3* WINAPI D3DXVec3TransformNormal(D3DXVECTOR3 *pout, const D3DXVECTOR3 *pv, const D3DXMATRIX *pm)
1973 {
1974     const D3DXVECTOR3 v = *pv;
1975 
1976     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
1977 
1978     pout->x = pm->u.m[0][0] * v.x + pm->u.m[1][0] * v.y + pm->u.m[2][0] * v.z;
1979     pout->y = pm->u.m[0][1] * v.x + pm->u.m[1][1] * v.y + pm->u.m[2][1] * v.z;
1980     pout->z = pm->u.m[0][2] * v.x + pm->u.m[1][2] * v.y + pm->u.m[2][2] * v.z;
1981     return pout;
1982 
1983 }
1984 
1985 D3DXVECTOR3* WINAPI D3DXVec3TransformNormalArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
1986 {
1987     UINT i;
1988 
1989     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
1990 
1991     for (i = 0; i < elements; ++i) {
1992         D3DXVec3TransformNormal(
1993             (D3DXVECTOR3*)((char*)out + outstride * i),
1994             (const D3DXVECTOR3*)((const char*)in + instride * i),
1995             matrix);
1996     }
1997     return out;
1998 }
1999 
2000 D3DXVECTOR3 * WINAPI D3DXVec3Unproject(D3DXVECTOR3 *out, const D3DXVECTOR3 *v,
2001         const D3DVIEWPORT9 *viewport, const D3DXMATRIX *projection, const D3DXMATRIX *view,
2002         const D3DXMATRIX *world)
2003 {
2004     D3DXMATRIX m;
2005 
2006     TRACE("out %p, v %p, viewport %p, projection %p, view %p, world %p.\n",
2007             out, v, viewport, projection, view, world);
2008 
2009     D3DXMatrixIdentity(&m);
2010     if (world)
2011         D3DXMatrixMultiply(&m, &m, world);
2012     if (view)
2013         D3DXMatrixMultiply(&m, &m, view);
2014     if (projection)
2015         D3DXMatrixMultiply(&m, &m, projection);
2016     D3DXMatrixInverse(&m, NULL, &m);
2017 
2018     *out = *v;
2019     if (viewport)
2020     {
2021         out->x = 2.0f * (out->x - viewport->X) / viewport->Width - 1.0f;
2022         out->y = 1.0f - 2.0f * (out->y - viewport->Y) / viewport->Height;
2023         out->z = (out->z - viewport->MinZ) / (viewport->MaxZ - viewport->MinZ);
2024     }
2025     D3DXVec3TransformCoord(out, out, &m);
2026     return out;
2027 }
2028 
2029 D3DXVECTOR3* WINAPI D3DXVec3UnprojectArray(D3DXVECTOR3* out, UINT outstride, const D3DXVECTOR3* in, UINT instride, const D3DVIEWPORT9* viewport, const D3DXMATRIX* projection, const D3DXMATRIX* view, const D3DXMATRIX* world, UINT elements)
2030 {
2031     UINT i;
2032 
2033     TRACE("out %p, outstride %u, in %p, instride %u, viewport %p, projection %p, view %p, world %p, elements %u\n",
2034         out, outstride, in, instride, viewport, projection, view, world, elements);
2035 
2036     for (i = 0; i < elements; ++i) {
2037         D3DXVec3Unproject(
2038             (D3DXVECTOR3*)((char*)out + outstride * i),
2039             (const D3DXVECTOR3*)((const char*)in + instride * i),
2040             viewport, projection, view, world);
2041     }
2042     return out;
2043 }
2044 
2045 /*_________________D3DXVec4_____________________*/
2046 
2047 D3DXVECTOR4* WINAPI D3DXVec4BaryCentric(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3, FLOAT f, FLOAT g)
2048 {
2049     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p, f %f, g %f\n", pout, pv1, pv2, pv3, f, g);
2050 
2051     pout->x = (1.0f-f-g) * (pv1->x) + f * (pv2->x) + g * (pv3->x);
2052     pout->y = (1.0f-f-g) * (pv1->y) + f * (pv2->y) + g * (pv3->y);
2053     pout->z = (1.0f-f-g) * (pv1->z) + f * (pv2->z) + g * (pv3->z);
2054     pout->w = (1.0f-f-g) * (pv1->w) + f * (pv2->w) + g * (pv3->w);
2055     return pout;
2056 }
2057 
2058 D3DXVECTOR4* WINAPI D3DXVec4CatmullRom(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv0, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3, FLOAT s)
2059 {
2060     TRACE("pout %p, pv0 %p, pv1 %p, pv2 %p, pv3 %p, s %f\n", pout, pv0, pv1, pv2, pv3, s);
2061 
2062     pout->x = 0.5f * (2.0f * pv1->x + (pv2->x - pv0->x) *s + (2.0f *pv0->x - 5.0f * pv1->x + 4.0f * pv2->x - pv3->x) * s * s + (pv3->x -3.0f * pv2->x + 3.0f * pv1->x - pv0->x) * s * s * s);
2063     pout->y = 0.5f * (2.0f * pv1->y + (pv2->y - pv0->y) *s + (2.0f *pv0->y - 5.0f * pv1->y + 4.0f * pv2->y - pv3->y) * s * s + (pv3->y -3.0f * pv2->y + 3.0f * pv1->y - pv0->y) * s * s * s);
2064     pout->z = 0.5f * (2.0f * pv1->z + (pv2->z - pv0->z) *s + (2.0f *pv0->z - 5.0f * pv1->z + 4.0f * pv2->z - pv3->z) * s * s + (pv3->z -3.0f * pv2->z + 3.0f * pv1->z - pv0->z) * s * s * s);
2065     pout->w = 0.5f * (2.0f * pv1->w + (pv2->w - pv0->w) *s + (2.0f *pv0->w - 5.0f * pv1->w + 4.0f * pv2->w - pv3->w) * s * s + (pv3->w -3.0f * pv2->w + 3.0f * pv1->w - pv0->w) * s * s * s);
2066     return pout;
2067 }
2068 
2069 D3DXVECTOR4* WINAPI D3DXVec4Cross(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pv3)
2070 {
2071     D3DXVECTOR4 out;
2072 
2073     TRACE("pout %p, pv1 %p, pv2 %p, pv3 %p\n", pout, pv1, pv2, pv3);
2074 
2075     out.x = pv1->y * (pv2->z * pv3->w - pv3->z * pv2->w) - pv1->z * (pv2->y * pv3->w - pv3->y * pv2->w) + pv1->w * (pv2->y * pv3->z - pv2->z *pv3->y);
2076     out.y = -(pv1->x * (pv2->z * pv3->w - pv3->z * pv2->w) - pv1->z * (pv2->x * pv3->w - pv3->x * pv2->w) + pv1->w * (pv2->x * pv3->z - pv3->x * pv2->z));
2077     out.z = pv1->x * (pv2->y * pv3->w - pv3->y * pv2->w) - pv1->y * (pv2->x *pv3->w - pv3->x * pv2->w) + pv1->w * (pv2->x * pv3->y - pv3->x * pv2->y);
2078     out.w = -(pv1->x * (pv2->y * pv3->z - pv3->y * pv2->z) - pv1->y * (pv2->x * pv3->z - pv3->x *pv2->z) + pv1->z * (pv2->x * pv3->y - pv3->x * pv2->y));
2079     *pout = out;
2080     return pout;
2081 }
2082 
2083 D3DXVECTOR4* WINAPI D3DXVec4Hermite(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv1, const D3DXVECTOR4 *pt1, const D3DXVECTOR4 *pv2, const D3DXVECTOR4 *pt2, FLOAT s)
2084 {
2085     FLOAT h1, h2, h3, h4;
2086 
2087     TRACE("pout %p, pv1 %p, pt1 %p, pv2 %p, pt2 %p, s %f\n", pout, pv1, pt1, pv2, pt2, s);
2088 
2089     h1 = 2.0f * s * s * s - 3.0f * s * s + 1.0f;
2090     h2 = s * s * s - 2.0f * s * s + s;
2091     h3 = -2.0f * s * s * s + 3.0f * s * s;
2092     h4 = s * s * s - s * s;
2093 
2094     pout->x = h1 * (pv1->x) + h2 * (pt1->x) + h3 * (pv2->x) + h4 * (pt2->x);
2095     pout->y = h1 * (pv1->y) + h2 * (pt1->y) + h3 * (pv2->y) + h4 * (pt2->y);
2096     pout->z = h1 * (pv1->z) + h2 * (pt1->z) + h3 * (pv2->z) + h4 * (pt2->z);
2097     pout->w = h1 * (pv1->w) + h2 * (pt1->w) + h3 * (pv2->w) + h4 * (pt2->w);
2098     return pout;
2099 }
2100 
2101 D3DXVECTOR4* WINAPI D3DXVec4Normalize(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv)
2102 {
2103     FLOAT norm;
2104 
2105     TRACE("pout %p, pv %p\n", pout, pv);
2106 
2107     norm = D3DXVec4Length(pv);
2108 
2109     pout->x = pv->x / norm;
2110     pout->y = pv->y / norm;
2111     pout->z = pv->z / norm;
2112     pout->w = pv->w / norm;
2113 
2114     return pout;
2115 }
2116 
2117 D3DXVECTOR4* WINAPI D3DXVec4Transform(D3DXVECTOR4 *pout, const D3DXVECTOR4 *pv, const D3DXMATRIX *pm)
2118 {
2119     D3DXVECTOR4 out;
2120 
2121     TRACE("pout %p, pv %p, pm %p\n", pout, pv, pm);
2122 
2123     out.x = pm->u.m[0][0] * pv->x + pm->u.m[1][0] * pv->y + pm->u.m[2][0] * pv->z + pm->u.m[3][0] * pv->w;
2124     out.y = pm->u.m[0][1] * pv->x + pm->u.m[1][1] * pv->y + pm->u.m[2][1] * pv->z + pm->u.m[3][1] * pv->w;
2125     out.z = pm->u.m[0][2] * pv->x + pm->u.m[1][2] * pv->y + pm->u.m[2][2] * pv->z + pm->u.m[3][2] * pv->w;
2126     out.w = pm->u.m[0][3] * pv->x + pm->u.m[1][3] * pv->y + pm->u.m[2][3] * pv->z + pm->u.m[3][3] * pv->w;
2127     *pout = out;
2128     return pout;
2129 }
2130 
2131 D3DXVECTOR4* WINAPI D3DXVec4TransformArray(D3DXVECTOR4* out, UINT outstride, const D3DXVECTOR4* in, UINT instride, const D3DXMATRIX* matrix, UINT elements)
2132 {
2133     UINT i;
2134 
2135     TRACE("out %p, outstride %u, in %p, instride %u, matrix %p, elements %u\n", out, outstride, in, instride, matrix, elements);
2136 
2137     for (i = 0; i < elements; ++i) {
2138         D3DXVec4Transform(
2139             (D3DXVECTOR4*)((char*)out + outstride * i),
2140             (const D3DXVECTOR4*)((const char*)in + instride * i),
2141             matrix);
2142     }
2143     return out;
2144 }
2145 
2146 unsigned short float_32_to_16(const float in)
2147 {
2148     int exp = 0, origexp;
2149     float tmp = fabsf(in);
2150     int sign = (copysignf(1, in) < 0);
2151     unsigned int mantissa;
2152     unsigned short ret;
2153 
2154     /* Deal with special numbers */
2155     if (isinf(in)) return (sign ? 0xffff : 0x7fff);
2156     if (isnan(in)) return (sign ? 0xffff : 0x7fff);
2157     if (in == 0.0f) return (sign ? 0x8000 : 0x0000);
2158 
2159     if (tmp < (float)(1u << 10))
2160     {
2161         do
2162         {
2163             tmp *= 2.0f;
2164             exp--;
2165         } while (tmp < (float)(1u << 10));
2166     }
2167     else if (tmp >= (float)(1u << 11))
2168     {
2169         do
2170         {
2171             tmp /= 2.0f;
2172             exp++;
2173         } while (tmp >= (float)(1u << 11));
2174     }
2175 
2176     exp += 10;  /* Normalize the mantissa */
2177     exp += 15;  /* Exponent is encoded with excess 15 */
2178 
2179     origexp = exp;
2180 
2181     mantissa = (unsigned int) tmp;
2182     if ((tmp - mantissa == 0.5f && mantissa % 2 == 1) || /* round half to even */
2183         (tmp - mantissa > 0.5f))
2184     {
2185         mantissa++; /* round to nearest, away from zero */
2186     }
2187     if (mantissa == 2048)
2188     {
2189         mantissa = 1024;
2190         exp++;
2191     }
2192 
2193     if (exp > 31)
2194     {
2195         /* too big */
2196         ret = 0x7fff; /* INF */
2197     }
2198     else if (exp <= 0)
2199     {
2200         unsigned int rounding = 0;
2201 
2202         /* Denormalized half float */
2203 
2204         /* return 0x0000 (=0.0) for numbers too small to represent in half floats */
2205         if (exp < -11)
2206             return (sign ? 0x8000 : 0x0000);
2207 
2208         exp = origexp;
2209 
2210         /* the 13 extra bits from single precision are used for rounding */
2211         mantissa = (unsigned int)(tmp * (1u << 13));
2212         mantissa >>= 1 - exp; /* denormalize */
2213 
2214         mantissa -= ~(mantissa >> 13) & 1; /* round half to even */
2215         /* remove 13 least significant bits to get half float precision */
2216         mantissa >>= 12;
2217         rounding = mantissa & 1;
2218         mantissa >>= 1;
2219 
2220         ret = mantissa + rounding;
2221     }
2222     else
2223     {
2224         ret = (exp << 10) | (mantissa & 0x3ff);
2225     }
2226 
2227     ret |= ((sign ? 1 : 0) << 15); /* Add the sign */
2228     return ret;
2229 }
2230 
2231 D3DXFLOAT16 *WINAPI D3DXFloat32To16Array(D3DXFLOAT16 *pout, const FLOAT *pin, UINT n)
2232 {
2233     unsigned int i;
2234 
2235     TRACE("pout %p, pin %p, n %u\n", pout, pin, n);
2236 
2237     for (i = 0; i < n; ++i)
2238     {
2239         pout[i].value = float_32_to_16(pin[i]);
2240     }
2241 
2242     return pout;
2243 }
2244 
2245 /* Native d3dx9's D3DXFloat16to32Array lacks support for NaN and Inf. Specifically, e = 16 is treated as a
2246  * regular number - e.g., 0x7fff is converted to 131008.0 and 0xffff to -131008.0. */
2247 float float_16_to_32(const unsigned short in)
2248 {
2249     const unsigned short s = (in & 0x8000);
2250     const unsigned short e = (in & 0x7C00) >> 10;
2251     const unsigned short m = in & 0x3FF;
2252     const float sgn = (s ? -1.0f : 1.0f);
2253 
2254     if (e == 0)
2255     {
2256         if (m == 0) return sgn * 0.0f; /* +0.0 or -0.0 */
2257         else return sgn * powf(2, -14.0f) * (m / 1024.0f);
2258     }
2259     else
2260     {
2261         return sgn * powf(2, e - 15.0f) * (1.0f + (m / 1024.0f));
2262     }
2263 }
2264 
2265 FLOAT *WINAPI D3DXFloat16To32Array(FLOAT *pout, const D3DXFLOAT16 *pin, UINT n)
2266 {
2267     unsigned int i;
2268 
2269     TRACE("pout %p, pin %p, n %u\n", pout, pin, n);
2270 
2271     for (i = 0; i < n; ++i)
2272     {
2273         pout[i] = float_16_to_32(pin[i].value);
2274     }
2275 
2276     return pout;
2277 }
2278 
2279 /*_________________D3DXSH________________*/
2280 
2281 FLOAT* WINAPI D3DXSHAdd(FLOAT *out, UINT order, const FLOAT *a, const FLOAT *b)
2282 {
2283     UINT i;
2284 
2285     TRACE("out %p, order %u, a %p, b %p\n", out, order, a, b);
2286 
2287     for (i = 0; i < order * order; i++)
2288         out[i] = a[i] + b[i];
2289 
2290     return out;
2291 }
2292 
2293 FLOAT WINAPI D3DXSHDot(UINT order, const FLOAT *a, const FLOAT *b)
2294 {
2295     FLOAT s;
2296     UINT i;
2297 
2298     TRACE("order %u, a %p, b %p\n", order, a, b);
2299 
2300     s = a[0] * b[0];
2301     for (i = 1; i < order * order; i++)
2302         s += a[i] * b[i];
2303 
2304     return s;
2305 }
2306 
2307 static void weightedcapintegrale(FLOAT *out, UINT order, FLOAT angle)
2308 {
2309     FLOAT coeff[3];
2310 
2311     coeff[0] = cosf(angle);
2312 
2313     out[0] = 2.0f * D3DX_PI * (1.0f - coeff[0]);
2314     out[1] = D3DX_PI * sinf(angle) * sinf(angle);
2315     if (order <= 2)
2316         return;
2317 
2318     out[2] = coeff[0] * out[1];
2319     if (order == 3)
2320         return;
2321 
2322     coeff[1] = coeff[0] * coeff[0];
2323     coeff[2] = coeff[1] * coeff[1];
2324 
2325     out[3] = D3DX_PI * (-1.25f * coeff[2] + 1.5f * coeff[1] - 0.25f);
2326     if (order == 4)
2327         return;
2328 
2329     out[4] = -0.25f * D3DX_PI * coeff[0] * (7.0f * coeff[2] - 10.0f * coeff[1] + 3.0f);
2330     if (order == 5)
2331         return;
2332 
2333     out[5] = D3DX_PI * (-2.625f * coeff[2] * coeff[1] + 4.375f * coeff[2] - 1.875f * coeff[1] + 0.125f);
2334 }
2335 
2336 HRESULT WINAPI D3DXSHEvalConeLight(UINT order, const D3DXVECTOR3 *dir, FLOAT radius,
2337     FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *rout, FLOAT *gout, FLOAT *bout)
2338 {
2339     FLOAT cap[6], clamped_angle, norm, scale, temp;
2340     UINT i, index, j;
2341 
2342     TRACE("order %u, dir %p, radius %f, red %f, green %f, blue %f, rout %p, gout %p, bout %p\n",
2343         order, dir, radius, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2344 
2345     if (radius <= 0.0f)
2346         return D3DXSHEvalDirectionalLight(order, dir, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2347 
2348     clamped_angle = (radius > D3DX_PI / 2.0f) ? (D3DX_PI / 2.0f) : radius;
2349     norm = sinf(clamped_angle) * sinf(clamped_angle);
2350 
2351     if (order > D3DXSH_MAXORDER)
2352     {
2353         WARN("Order clamped at D3DXSH_MAXORDER\n");
2354         order = D3DXSH_MAXORDER;
2355     }
2356 
2357     weightedcapintegrale(cap, order, radius);
2358     D3DXSHEvalDirection(rout, order, dir);
2359 
2360     for (i = 0; i < order; i++)
2361     {
2362         scale = cap[i] / norm;
2363 
2364         for (j = 0; j < 2 * i + 1; j++)
2365         {
2366             index = i * i + j;
2367             temp = rout[index] * scale;
2368 
2369             rout[index] = temp * Rintensity;
2370             if (gout)
2371                 gout[index] = temp * Gintensity;
2372             if (bout)
2373                 bout[index] = temp * Bintensity;
2374         }
2375     }
2376 
2377     return D3D_OK;
2378 }
2379 
2380 FLOAT* WINAPI D3DXSHEvalDirection(FLOAT *out, UINT order, const D3DXVECTOR3 *dir)
2381 {
2382     const FLOAT dirxx = dir->x * dir->x;
2383     const FLOAT dirxy = dir->x * dir->y;
2384     const FLOAT dirxz = dir->x * dir->z;
2385     const FLOAT diryy = dir->y * dir->y;
2386     const FLOAT diryz = dir->y * dir->z;
2387     const FLOAT dirzz = dir->z * dir->z;
2388     const FLOAT dirxxxx = dirxx * dirxx;
2389     const FLOAT diryyyy = diryy * diryy;
2390     const FLOAT dirzzzz = dirzz * dirzz;
2391     const FLOAT dirxyxy = dirxy * dirxy;
2392 
2393     TRACE("out %p, order %u, dir %p\n", out, order, dir);
2394 
2395     if ((order < D3DXSH_MINORDER) || (order > D3DXSH_MAXORDER))
2396         return out;
2397 
2398     out[0] = 0.5f / sqrtf(D3DX_PI);
2399     out[1] = -0.5f / sqrtf(D3DX_PI / 3.0f) * dir->y;
2400     out[2] = 0.5f / sqrtf(D3DX_PI / 3.0f) * dir->z;
2401     out[3] = -0.5f / sqrtf(D3DX_PI / 3.0f) * dir->x;
2402     if (order == 2)
2403         return out;
2404 
2405     out[4] = 0.5f / sqrtf(D3DX_PI / 15.0f) * dirxy;
2406     out[5] = -0.5f / sqrtf(D3DX_PI / 15.0f) * diryz;
2407     out[6] = 0.25f / sqrtf(D3DX_PI / 5.0f) * (3.0f * dirzz - 1.0f);
2408     out[7] = -0.5f / sqrtf(D3DX_PI / 15.0f) * dirxz;
2409     out[8] = 0.25f / sqrtf(D3DX_PI / 15.0f) * (dirxx - diryy);
2410     if (order == 3)
2411         return out;
2412 
2413     out[9] = -sqrtf(70.0f / D3DX_PI) / 8.0f * dir->y * (3.0f * dirxx - diryy);
2414     out[10] = sqrtf(105.0f / D3DX_PI) / 2.0f * dirxy * dir->z;
2415     out[11] = -sqrtf(42.0f / D3DX_PI) / 8.0f * dir->y * (-1.0f + 5.0f * dirzz);
2416     out[12] = sqrtf(7.0f / D3DX_PI) / 4.0f * dir->z * (5.0f * dirzz - 3.0f);
2417     out[13] = sqrtf(42.0f / D3DX_PI) / 8.0f * dir->x * (1.0f - 5.0f * dirzz);
2418     out[14] = sqrtf(105.0f / D3DX_PI) / 4.0f * dir->z * (dirxx - diryy);
2419     out[15] = -sqrtf(70.0f / D3DX_PI) / 8.0f * dir->x * (dirxx - 3.0f * diryy);
2420     if (order == 4)
2421         return out;
2422 
2423     out[16] = 0.75f * sqrtf(35.0f / D3DX_PI) * dirxy * (dirxx - diryy);
2424     out[17] = 3.0f * dir->z * out[9];
2425     out[18] = 0.75f * sqrtf(5.0f / D3DX_PI) * dirxy * (7.0f * dirzz - 1.0f);
2426     out[19] = 0.375f * sqrtf(10.0f / D3DX_PI) * diryz * (3.0f - 7.0f * dirzz);
2427     out[20] = 3.0f / (16.0f * sqrtf(D3DX_PI)) * (35.0f * dirzzzz - 30.f * dirzz + 3.0f);
2428     out[21] = 0.375f * sqrtf(10.0f / D3DX_PI) * dirxz * (3.0f - 7.0f * dirzz);
2429     out[22] = 0.375f * sqrtf(5.0f / D3DX_PI) * (dirxx - diryy) * (7.0f * dirzz - 1.0f);
2430     out[23] = 3.0f * dir->z * out[15];
2431     out[24] = 3.0f / 16.0f * sqrtf(35.0f / D3DX_PI) * (dirxxxx - 6.0f * dirxyxy + diryyyy);
2432     if (order == 5)
2433         return out;
2434 
2435     out[25] = -3.0f/ 32.0f * sqrtf(154.0f / D3DX_PI) * dir->y * (5.0f * dirxxxx - 10.0f * dirxyxy + diryyyy);
2436     out[26] = 0.75f * sqrtf(385.0f / D3DX_PI) * dirxy * dir->z * (dirxx - diryy);
2437     out[27] = sqrtf(770.0f / D3DX_PI) / 32.0f * dir->y * (3.0f * dirxx - diryy) * (1.0f - 9.0f * dirzz);
2438     out[28] = sqrtf(1155.0f / D3DX_PI) / 4.0f * dirxy * dir->z * (3.0f * dirzz - 1.0f);
2439     out[29] = sqrtf(165.0f / D3DX_PI) / 16.0f * dir->y * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
2440     out[30] = sqrtf(11.0f / D3DX_PI) / 16.0f * dir->z * (63.0f * dirzzzz - 70.0f * dirzz + 15.0f);
2441     out[31] = sqrtf(165.0f / D3DX_PI) / 16.0f * dir->x * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
2442     out[32] = sqrtf(1155.0f / D3DX_PI) / 8.0f * dir->z * (dirxx - diryy) * (3.0f * dirzz - 1.0f);
2443     out[33] = sqrtf(770.0f / D3DX_PI) / 32.0f * dir->x * (dirxx - 3.0f * diryy) * (1.0f - 9.0f * dirzz);
2444     out[34] = 3.0f / 16.0f * sqrtf(385.0f / D3DX_PI) * dir->z * (dirxxxx - 6.0f * dirxyxy + diryyyy);
2445     out[35] = -3.0f/ 32.0f * sqrtf(154.0f / D3DX_PI) * dir->x * (dirxxxx - 10.0f * dirxyxy + 5.0f * diryyyy);
2446 
2447     return out;
2448 }
2449 
2450 HRESULT WINAPI D3DXSHEvalDirectionalLight(UINT order, const D3DXVECTOR3 *dir, FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *Rout, FLOAT *Gout, FLOAT *Bout)
2451 {
2452     FLOAT s, temp;
2453     UINT j;
2454 
2455     TRACE("Order %u, Vector %p, Red %f, Green %f, Blue %f, Rout %p, Gout %p, Bout %p\n", order, dir, Rintensity, Gintensity, Bintensity, Rout, Gout, Bout);
2456 
2457     s = 0.75f;
2458     if ( order > 2 )
2459         s += 5.0f / 16.0f;
2460     if ( order > 4 )
2461         s -= 3.0f / 32.0f;
2462     s /= D3DX_PI;
2463 
2464     D3DXSHEvalDirection(Rout, order, dir);
2465     for (j = 0; j < order * order; j++)
2466     {
2467         temp = Rout[j] / s;
2468 
2469         Rout[j] = Rintensity * temp;
2470         if ( Gout )
2471             Gout[j] = Gintensity * temp;
2472         if ( Bout )
2473             Bout[j] = Bintensity * temp;
2474     }
2475 
2476     return D3D_OK;
2477 }
2478 
2479 HRESULT WINAPI D3DXSHEvalHemisphereLight(UINT order, const D3DXVECTOR3 *dir, D3DXCOLOR top, D3DXCOLOR bottom,
2480     FLOAT *rout, FLOAT *gout, FLOAT *bout)
2481 {
2482     FLOAT a[2], temp[4];
2483     UINT i, j;
2484 
2485     TRACE("order %u, dir %p, rout %p, gout %p, bout %p\n", order, dir, rout, gout, bout);
2486 
2487     D3DXSHEvalDirection(temp, 2, dir);
2488 
2489     a[0] = (top.r + bottom.r) * 3.0f * D3DX_PI;
2490     a[1] = (top.r - bottom.r) * D3DX_PI;
2491     for (i = 0; i < order; i++)
2492         for (j = 0; j < 2 * i + 1; j++)
2493             if (i < 2)
2494                 rout[i * i + j] = temp[i * i + j] * a[i];
2495             else
2496                 rout[i * i + j] = 0.0f;
2497 
2498     if (gout)
2499     {
2500         a[0] = (top.g + bottom.g) * 3.0f * D3DX_PI;
2501         a[1] = (top.g - bottom.g) * D3DX_PI;
2502         for (i = 0; i < order; i++)
2503             for (j = 0; j < 2 * i + 1; j++)
2504                 if (i < 2)
2505                     gout[i * i + j] = temp[i * i + j] * a[i];
2506                 else
2507                     gout[i * i + j] = 0.0f;
2508     }
2509 
2510     if (bout)
2511     {
2512         a[0] = (top.b + bottom.b) * 3.0f * D3DX_PI;
2513         a[1] = (top.b - bottom.b) * D3DX_PI;
2514         for (i = 0; i < order; i++)
2515             for (j = 0; j < 2 * i + 1; j++)
2516                 if (i < 2)
2517                     bout[i * i + j] = temp[i * i + j] * a[i];
2518                 else
2519                     bout[i * i + j] = 0.0f;
2520     }
2521 
2522     return D3D_OK;
2523 }
2524 
2525 HRESULT WINAPI D3DXSHEvalSphericalLight(UINT order, const D3DXVECTOR3 *dir, FLOAT radius,
2526     FLOAT Rintensity, FLOAT Gintensity, FLOAT Bintensity, FLOAT *rout, FLOAT *gout, FLOAT *bout)
2527 {
2528     D3DXVECTOR3 normal;
2529     FLOAT cap[6], clamped_angle, dist, temp;
2530     UINT i, index, j;
2531 
2532     TRACE("order %u, dir %p, radius %f, red %f, green %f, blue %f, rout %p, gout %p, bout %p\n",
2533         order, dir, radius, Rintensity, Gintensity, Bintensity, rout, gout, bout);
2534 
2535     if (order > D3DXSH_MAXORDER)
2536     {
2537         WARN("Order clamped at D3DXSH_MAXORDER\n");
2538         order = D3DXSH_MAXORDER;
2539     }
2540 
2541     if (radius < 0.0f)
2542         radius = -radius;
2543 
2544     dist = D3DXVec3Length(dir);
2545     clamped_angle = (dist <= radius) ? D3DX_PI / 2.0f : asinf(radius / dist);
2546 
2547     weightedcapintegrale(cap, order, clamped_angle);
2548     D3DXVec3Normalize(&normal, dir);
2549     D3DXSHEvalDirection(rout, order, &normal);
2550 
2551     for (i = 0; i < order; i++)
2552         for (j = 0; j < 2 * i + 1; j++)
2553         {
2554             index = i * i + j;
2555             temp = rout[index] * cap[i];
2556 
2557             rout[index] = temp * Rintensity;
2558             if (gout)
2559                 gout[index] = temp * Gintensity;
2560             if (bout)
2561                 bout[index] = temp * Bintensity;
2562         }
2563 
2564     return D3D_OK;
2565 }
2566 
2567 FLOAT * WINAPI D3DXSHMultiply2(FLOAT *out, const FLOAT *a, const FLOAT *b)
2568 {
2569     FLOAT ta, tb;
2570 
2571     TRACE("out %p, a %p, b %p\n", out, a, b);
2572 
2573     ta = 0.28209479f * a[0];
2574     tb = 0.28209479f * b[0];
2575 
2576     out[0] = 0.28209479f * D3DXSHDot(2, a, b);
2577     out[1] = ta * b[1] + tb * a[1];
2578     out[2] = ta * b[2] + tb * a[2];
2579     out[3] = ta * b[3] + tb * a[3];
2580 
2581     return out;
2582 }
2583 
2584 FLOAT * WINAPI D3DXSHMultiply3(FLOAT *out, const FLOAT *a, const FLOAT *b)
2585 {
2586     FLOAT t, ta, tb;
2587 
2588     TRACE("out %p, a %p, b %p\n", out, a, b);
2589 
2590     out[0] = 0.28209479f * a[0] * b[0];
2591 
2592     ta = 0.28209479f * a[0] - 0.12615663f * a[6] - 0.21850969f * a[8];
2593     tb = 0.28209479f * b[0] - 0.12615663f * b[6] - 0.21850969f * b[8];
2594     out[1] = ta * b[1] + tb * a[1];
2595     t = a[1] * b[1];
2596     out[0] += 0.28209479f * t;
2597     out[6] = -0.12615663f * t;
2598     out[8] = -0.21850969f * t;
2599 
2600     ta = 0.21850969f * a[5];
2601     tb = 0.21850969f * b[5];
2602     out[1] += ta * b[2] + tb * a[2];
2603     out[2] = ta * b[1] + tb * a[1];
2604     t = a[1] * b[2] +a[2] * b[1];
2605     out[5] = 0.21850969f * t;
2606 
2607     ta = 0.21850969f * a[4];
2608     tb = 0.21850969f * b[4];
2609     out[1] += ta * b[3] + tb * a[3];
2610     out[3]  = ta * b[1] + tb * a[1];
2611     t = a[1] * b[3] + a[3] * b[1];
2612     out[4] = 0.21850969f * t;
2613 
2614     ta = 0.28209480f * a[0] + 0.25231326f * a[6];
2615     tb = 0.28209480f * b[0] + 0.25231326f * b[6];
2616     out[2] += ta * b[2] + tb * a[2];
2617     t = a[2] * b[2];
2618     out[0] += 0.28209480f * t;
2619     out[6] += 0.25231326f * t;
2620 
2621     ta = 0.21850969f * a[7];
2622     tb = 0.21850969f * b[7];
2623     out[2] += ta * b[3] + tb * a[3];
2624     out[3] += ta * b[2] + tb * a[2];
2625     t = a[2] * b[3] + a[3] * b[2];
2626     out[7] = 0.21850969f * t;
2627 
2628     ta = 0.28209479f * a[0] - 0.12615663f * a[6] + 0.21850969f * a[8];
2629     tb = 0.28209479f * b[0] - 0.12615663f * b[6] + 0.21850969f * b[8];
2630     out[3] += ta * b[3] + tb * a[3];
2631     t = a[3] * b[3];
2632     out[0] += 0.28209479f * t;
2633     out[6] -= 0.12615663f * t;
2634     out[8] += 0.21850969f * t;
2635 
2636     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2637     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2638     out[4] += ta * b[4] + tb * a[4];
2639     t = a[4] * b[4];
2640     out[0] += 0.28209479f * t;
2641     out[6] -= 0.18022375f * t;
2642 
2643     ta = 0.15607835f * a[7];
2644     tb = 0.15607835f * b[7];
2645     out[4] += ta * b[5] + tb * a[5];
2646     out[5] += ta * b[4] + tb * a[4];
2647     t = a[4] * b[5] + a[5] * b[4];
2648     out[7] += 0.15607835f * t;
2649 
2650     ta = 0.28209479f * a[0] + 0.09011188f * a[6] - 0.15607835f * a[8];
2651     tb = 0.28209479f * b[0] + 0.09011188f * b[6] - 0.15607835f * b[8];
2652     out[5] += ta * b[5] + tb * a[5];
2653     t = a[5] * b[5];
2654     out[0] += 0.28209479f * t;
2655     out[6] += 0.09011188f * t;
2656     out[8] -= 0.15607835f * t;
2657 
2658     ta = 0.28209480f * a[0];
2659     tb = 0.28209480f * b[0];
2660     out[6] += ta * b[6] + tb * a[6];
2661     t = a[6] * b[6];
2662     out[0] += 0.28209480f * t;
2663     out[6] += 0.18022376f * t;
2664 
2665     ta = 0.28209479f * a[0] + 0.09011188f * a[6] + 0.15607835f * a[8];
2666     tb = 0.28209479f * b[0] + 0.09011188f * b[6] + 0.15607835f * b[8];
2667     out[7] += ta * b[7] + tb * a[7];
2668     t = a[7] * b[7];
2669     out[0] += 0.28209479f * t;
2670     out[6] += 0.09011188f * t;
2671     out[8] += 0.15607835f * t;
2672 
2673     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2674     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2675     out[8] += ta * b[8] + tb * a[8];
2676     t = a[8] * b[8];
2677     out[0] += 0.28209479f * t;
2678     out[6] -= 0.18022375f * t;
2679 
2680     return out;
2681 }
2682 
2683 FLOAT * WINAPI D3DXSHMultiply4(FLOAT *out, const FLOAT *a, const FLOAT *b)
2684 {
2685     FLOAT ta, tb, t;
2686 
2687     TRACE("out %p, a %p, b %p\n", out, a, b);
2688 
2689     out[0] = 0.28209479f * a[0] * b[0];
2690 
2691     ta = 0.28209479f * a[0] - 0.12615663f * a[6] - 0.21850969f * a[8];
2692     tb = 0.28209479f * b[0] - 0.12615663f * b[6] - 0.21850969f * b[8];
2693     out[1] = ta * b[1] + tb * a[1];
2694     t = a[1] * b[1];
2695     out[0] += 0.28209479f * t;
2696     out[6] = -0.12615663f * t;
2697     out[8] = -0.21850969f * t;
2698 
2699     ta = 0.21850969f * a[3] - 0.05839917f * a[13] - 0.22617901f * a[15];
2700     tb = 0.21850969f * b[3] - 0.05839917f * b[13] - 0.22617901f * b[15];
2701     out[1] += ta * b[4] + tb * a[4];
2702     out[4] = ta * b[1] + tb * a[1];
2703     t = a[1] * b[4] + a[4] * b[1];
2704     out[3] = 0.21850969f * t;
2705     out[13] = -0.05839917f * t;
2706     out[15] = -0.22617901f * t;
2707 
2708     ta = 0.21850969f * a[2] - 0.14304817f * a[12] - 0.18467439f * a[14];
2709     tb = 0.21850969f * b[2] - 0.14304817f * b[12] - 0.18467439f * b[14];
2710     out[1] += ta * b[5] + tb * a[5];
2711     out[5] = ta * b[1] + tb * a[1];
2712     t = a[1] * b[5] + a[5] * b[1];
2713     out[2] = 0.21850969f * t;
2714     out[12] = -0.14304817f * t;
2715     out[14] = -0.18467439f * t;
2716 
2717     ta = 0.20230066f * a[11];
2718     tb = 0.20230066f * b[11];
2719     out[1] += ta * b[6] + tb * a[6];
2720     out[6] += ta * b[1] + tb * a[1];
2721     t = a[1] * b[6] + a[6] * b[1];
2722     out[11] = 0.20230066f * t;
2723 
2724     ta = 0.22617901f * a[9] + 0.05839917f * a[11];
2725     tb = 0.22617901f * b[9] + 0.05839917f * b[11];
2726     out[1] += ta * b[8] + tb * a[8];
2727     out[8] += ta * b[1] + tb * a[1];
2728     t = a[1] * b[8] + a[8] * b[1];
2729     out[9] = 0.22617901f * t;
2730     out[11] += 0.05839917f * t;
2731 
2732     ta = 0.28209480f * a[0] + 0.25231326f * a[6];
2733     tb = 0.28209480f * b[0] + 0.25231326f * b[6];
2734     out[2] += ta * b[2] + tb * a[2];
2735     t = a[2] * b[2];
2736     out[0] += 0.28209480f * t;
2737     out[6] += 0.25231326f * t;
2738 
2739     ta = 0.24776671f * a[12];
2740     tb = 0.24776671f * b[12];
2741     out[2] += ta * b[6] + tb * a[6];
2742     out[6] += ta * b[2] + tb * a[2];
2743     t = a[2] * b[6] + a[6] * b[2];
2744     out[12] += 0.24776671f * t;
2745 
2746     ta = 0.28209480f * a[0] - 0.12615663f * a[6] + 0.21850969f * a[8];
2747     tb = 0.28209480f * b[0] - 0.12615663f * b[6] + 0.21850969f * b[8];
2748     out[3] += ta * b[3] + tb * a[3];
2749     t = a[3] * b[3];
2750     out[0] += 0.28209480f * t;
2751     out[6] -= 0.12615663f * t;
2752     out[8] += 0.21850969f * t;
2753 
2754     ta = 0.20230066f * a[13];
2755     tb = 0.20230066f * b[13];
2756     out[3] += ta * b[6] + tb * a[6];
2757     out[6] += ta * b[3] + tb * a[3];
2758     t = a[3] * b[6] + a[6] * b[3];
2759     out[13] += 0.20230066f * t;
2760 
2761     ta = 0.21850969f * a[2] - 0.14304817f * a[12] + 0.18467439f * a[14];
2762     tb = 0.21850969f * b[2] - 0.14304817f * b[12] + 0.18467439f * b[14];
2763     out[3] += ta * b[7] + tb * a[7];
2764     out[7] = ta * b[3] + tb * a[3];
2765     t = a[3] * b[7] + a[7] * b[3];
2766     out[2] += 0.21850969f * t;
2767     out[12] -= 0.14304817f * t;
2768     out[14] += 0.18467439f * t;
2769 
2770     ta = -0.05839917f * a[13] + 0.22617901f * a[15];
2771     tb = -0.05839917f * b[13] + 0.22617901f * b[15];
2772     out[3] += ta * b[8] + tb * a[8];
2773     out[8] += ta * b[3] + tb * a[3];
2774     t = a[3] * b[8] + a[8] * b[3];
2775     out[13] -= 0.05839917f * t;
2776     out[15] += 0.22617901f * t;
2777 
2778     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2779     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2780     out[4] += ta * b[4] + tb * a[4];
2781     t = a[4] * b[4];
2782     out[0] += 0.28209479f * t;
2783     out[6] -= 0.18022375f * t;
2784 
2785     ta = 0.15607835f * a[7];
2786     tb = 0.15607835f * b[7];
2787     out[4] += ta * b[5] + tb * a[5];
2788     out[5] += ta * b[4] + tb * a[4];
2789     t = a[4] * b[5] + a[5] * b[4];
2790     out[7] += 0.15607835f * t;
2791 
2792     ta = 0.22617901f * a[3] - 0.09403160f * a[13];
2793     tb = 0.22617901f * b[3] - 0.09403160f * b[13];
2794     out[4] += ta * b[9] + tb * a[9];
2795     out[9] += ta * b[4] + tb * a[4];
2796     t = a[4] * b[9] + a[9] * b[4];
2797     out[3] += 0.22617901f * t;
2798     out[13] -= 0.09403160f * t;
2799 
2800     ta = 0.18467439f * a[2] - 0.18806319f * a[12];
2801     tb = 0.18467439f * b[2] - 0.18806319f * b[12];
2802     out[4] += ta * b[10] + tb * a [10];
2803     out[10] = ta * b[4] + tb * a[4];
2804     t = a[4] * b[10] + a[10] * b[4];
2805     out[2] += 0.18467439f * t;
2806     out[12] -= 0.18806319f * t;
2807 
2808     ta = -0.05839917f * a[3] + 0.14567312f * a[13] + 0.09403160f * a[15];
2809     tb = -0.05839917f * b[3] + 0.14567312f * b[13] + 0.09403160f * b[15];
2810     out[4] += ta * b[11] + tb * a[11];
2811     out[11] += ta * b[4] + tb * a[4];
2812     t = a[4] * b[11] + a[11] * b[4];
2813     out[3] -= 0.05839917f * t;
2814     out[13] += 0.14567312f * t;
2815     out[15] += 0.09403160f * t;
2816 
2817     ta = 0.28209479f * a[0] + 0.09011186f * a[6] - 0.15607835f * a[8];
2818     tb = 0.28209479f * b[0] + 0.09011186f * b[6] - 0.15607835f * b[8];
2819     out[5] += ta * b[5] + tb * a[5];
2820     t = a[5] * b[5];
2821     out[0] += 0.28209479f * t;
2822     out[6] += 0.09011186f * t;
2823     out[8] -= 0.15607835f * t;
2824 
2825     ta = 0.14867701f * a[14];
2826     tb = 0.14867701f * b[14];
2827     out[5] += ta * b[9] + tb * a[9];
2828     out[9] += ta * b[5] + tb * a[5];
2829     t = a[5] * b[9] + a[9] * b[5];
2830     out[14] += 0.14867701f * t;
2831 
2832     ta = 0.18467439f * a[3] + 0.11516472f * a[13] - 0.14867701f * a[15];
2833     tb = 0.18467439f * b[3] + 0.11516472f * b[13] - 0.14867701f * b[15];
2834     out[5] += ta * b[10] + tb * a[10];
2835     out[10] += ta * b[5] + tb * a[5];
2836     t = a[5] * b[10] + a[10] * b[5];
2837     out[3] += 0.18467439f * t;
2838     out[13] += 0.11516472f * t;
2839     out[15] -= 0.14867701f * t;
2840 
2841     ta = 0.23359668f * a[2] + 0.05947080f * a[12] - 0.11516472f * a[14];
2842     tb = 0.23359668f * b[2] + 0.05947080f * b[12] - 0.11516472f * b[14];
2843     out[5] += ta * b[11] + tb * a[11];
2844     out[11] += ta * b[5] + tb * a[5];
2845     t = a[5] * b[11] + a[11] * b[5];
2846     out[2] += 0.23359668f * t;
2847     out[12] += 0.05947080f * t;
2848     out[14] -= 0.11516472f * t;
2849 
2850     ta = 0.28209479f * a[0];
2851     tb = 0.28209479f * b[0];
2852     out[6] += ta * b[6] + tb * a[6];
2853     t = a[6] * b[6];
2854     out[0] += 0.28209479f * t;
2855     out[6] += 0.18022376f * t;
2856 
2857     ta = 0.09011186f * a[6] + 0.28209479f * a[0] + 0.15607835f * a[8];
2858     tb = 0.09011186f * b[6] + 0.28209479f * b[0] + 0.15607835f * b[8];
2859     out[7] += ta * b[7] + tb * a[7];
2860     t = a[7] * b[7];
2861     out[6] += 0.09011186f * t;
2862     out[0] += 0.28209479f * t;
2863     out[8] += 0.15607835f * t;
2864 
2865     ta = 0.14867701f * a[9] + 0.18467439f * a[1] + 0.11516472f * a[11];
2866     tb = 0.14867701f * b[9] + 0.18467439f * b[1] + 0.11516472f * b[11];
2867     out[7] += ta * b[10] + tb * a[10];
2868     out[10] += ta * b[7] + tb * a[7];
2869     t = a[7] * b[10] + a[10] * b[7];
2870     out[9] += 0.14867701f * t;
2871     out[1] += 0.18467439f * t;
2872     out[11] += 0.11516472f * t;
2873 
2874     ta = 0.05947080f * a[12] + 0.23359668f * a[2] + 0.11516472f * a[14];
2875     tb = 0.05947080f * b[12] + 0.23359668f * b[2] + 0.11516472f * b[14];
2876     out[7] += ta * b[13] + tb * a[13];
2877     out[13] += ta * b[7]+ tb * a[7];
2878     t = a[7] * b[13] + a[13] * b[7];
2879     out[12] += 0.05947080f * t;
2880     out[2] += 0.23359668f * t;
2881     out[14] += 0.11516472f * t;
2882 
2883     ta = 0.14867701f * a[15];
2884     tb = 0.14867701f * b[15];
2885     out[7] += ta * b[14] + tb * a[14];
2886     out[14] += ta * b[7] + tb * a[7];
2887     t = a[7] * b[14] + a[14] * b[7];
2888     out[15] += 0.14867701f * t;
2889 
2890     ta = 0.28209479f * a[0] - 0.18022375f * a[6];
2891     tb = 0.28209479f * b[0] - 0.18022375f * b[6];
2892     out[8] += ta * b[8] + tb * a[8];
2893     t = a[8] * b[8];
2894     out[0] += 0.28209479f * t;
2895     out[6] -= 0.18022375f * t;
2896 
2897     ta = -0.09403160f * a[11];
2898     tb = -0.09403160f * b[11];
2899     out[8] += ta * b[9] + tb * a[9];
2900     out[9] += ta * b[8] + tb * a[8];
2901     t = a[8] * b[9] + a[9] * b[8];
2902     out[11] -= 0.09403160f * t;
2903 
2904     ta = -0.09403160f * a[15];
2905     tb = -0.09403160f * b[15];
2906     out[8] += ta * b[13] + tb * a[13];
2907     out[13] += ta * b[8] + tb * a[8];
2908     t = a[8] * b[13] + a[13] * b[8];
2909     out[15] -= 0.09403160f * t;
2910 
2911     ta = 0.18467439f * a[2] - 0.18806319f * a[12];
2912     tb = 0.18467439f * b[2] - 0.18806319f * b[12];
2913     out[8] += ta * b[14] + tb * a[14];
2914     out[14] += ta * b[8] + tb * a[8];
2915     t = a[8] * b[14] + a[14] * b[8];
2916     out[2] += 0.18467439f * t;
2917     out[12] -= 0.18806319f * t;
2918 
2919     ta = -0.21026104f * a[6] + 0.28209479f * a[0];
2920     tb = -0.21026104f * b[6] + 0.28209479f * b[0];
2921     out[9] += ta * b[9] + tb * a[9];
2922     t = a[9] * b[9];
2923     out[6] -= 0.21026104f * t;
2924     out[0] += 0.28209479f * t;
2925 
2926     ta = 0.28209479f * a[0];
2927     tb = 0.28209479f * b[0];
2928     out[10] += ta * b[10] + tb * a[10];
2929     t = a[10] * b[10];
2930     out[0] += 0.28209479f * t;
2931 
2932     ta = 0.28209479f * a[0] + 0.12615663f * a[6] - 0.14567312f * a[8];
2933     tb = 0.28209479f * b[0] + 0.12615663f * b[6] - 0.14567312f * b[8];
2934     out[11] += ta * b[11] + tb * a[11];
2935     t = a[11] * b[11];
2936     out[0] += 0.28209479f * t;
2937     out[6] += 0.12615663f * t;
2938     out[8] -= 0.14567312f * t;
2939 
2940     ta = 0.28209479f * a[0] + 0.16820885f * a[6];
2941     tb = 0.28209479f * b[0] + 0.16820885f * b[6];
2942     out[12] += ta * b[12] + tb * a[12];
2943     t = a[12] * b[12];
2944     out[0] += 0.28209479f * t;
2945     out[6] += 0.16820885f * t;
2946 
2947     ta =0.28209479f * a[0] + 0.14567312f * a[8] + 0.12615663f * a[6];
2948     tb =0.28209479f * b[0] + 0.14567312f * b[8] + 0.12615663f * b[6];
2949     out[13] += ta * b[13] + tb * a[13];
2950     t = a[13] * b[13];
2951     out[0] += 0.28209479f * t;
2952     out[8] += 0.14567312f * t;
2953     out[6] += 0.12615663f * t;
2954 
2955     ta = 0.28209479f * a[0];
2956     tb = 0.28209479f * b[0];
2957     out[14] += ta * b[14] + tb * a[14];
2958     t = a[14] * b[14];
2959     out[0] += 0.28209479f * t;
2960 
2961     ta = 0.28209479f * a[0] - 0.21026104f * a[6];
2962     tb = 0.28209479f * b[0] - 0.21026104f * b[6];
2963     out[15] += ta * b[15] + tb * a[15];
2964     t = a[15] * b[15];
2965     out[0] += 0.28209479f * t;
2966     out[6] -= 0.21026104f * t;
2967 
2968     return out;
2969 }
2970 
2971 static void rotate_X(FLOAT *out, UINT order, FLOAT a, FLOAT *in)
2972 {
2973     out[0] = in[0];
2974 
2975     out[1] = a * in[2];
2976     out[2] = -a * in[1];
2977     out[3] = in[3];
2978 
2979     out[4] = a * in[7];
2980     out[5] = -in[5];
2981     out[6] = -0.5f * in[6] - 0.8660253882f * in[8];
2982     out[7] = -a * in[4];
2983     out[8] = -0.8660253882f * in[6] + 0.5f * in[8];
2984     out[9] = -a * 0.7905694842f * in[12] + a * 0.6123724580f * in[14];
2985 
2986     out[10] = -in[10];
2987     out[11] = -a * 0.6123724580f * in[12] - a * 0.7905694842f * in[14];
2988     out[12] = a * 0.7905694842f * in[9] + a * 0.6123724580f * in[11];
2989     out[13] = -0.25f * in[13] - 0.9682458639f * in[15];
2990     out[14] = -a * 0.6123724580f * in[9] + a * 0.7905694842f * in[11];
2991     out[15] = -0.9682458639f * in[13] + 0.25f * in[15];
2992     if (order == 4)
2993         return;
2994 
2995     out[16] = -a * 0.9354143739f * in[21] + a * 0.3535533845f * in[23];
2996     out[17] = -0.75f * in[17] + 0.6614378095f * in[19];
2997     out[18] = -a * 0.3535533845f * in[21] - a * 0.9354143739f * in[23];
2998     out[19] = 0.6614378095f * in[17] + 0.75f * in[19];
2999     out[20] = 0.375f * in[20] + 0.5590170026f * in[22] + 0.7395099998f * in[24];
3000     out[21] = a * 0.9354143739f * in[16] + a * 0.3535533845f * in[18];
3001     out[22] = 0.5590170026f * in[20] + 0.5f * in[22] - 0.6614378691f * in[24];
3002     out[23] = -a * 0.3535533845f * in[16] + a * 0.9354143739f * in[18];
3003     out[24] = 0.7395099998f * in[20] - 0.6614378691f * in[22] + 0.125f * in[24];
3004     if (order == 5)
3005         return;
3006 
3007     out[25] = a * 0.7015607357f * in[30] - a * 0.6846531630f * in[32] + a * 0.1976423711f * in[34];
3008     out[26] = -0.5f * in[26] + 0.8660253882f * in[28];
3009     out[27] = a * 0.5229125023f * in[30] + a * 0.3061861992f * in[32] - a * 0.7954951525f * in[34];
3010     out[28] = 0.8660253882f * in[26] + 0.5f * in[28];
3011     out[29] = a * 0.4841229022f * in[30] + a * 0.6614378691f * in[32] + a * 0.5728219748f * in[34];
3012     out[30] = -a * 0.7015607357f * in[25] - a * 0.5229125023f * in[27] - a * 0.4841229022f * in[29];
3013     out[31] = 0.125f * in[31] + 0.4050463140f * in[33] + 0.9057110548f * in[35];
3014     out[32] = a * 0.6846531630f * in[25] - a * 0.3061861992f * in[27] - a * 0.6614378691f * in[29];
3015     out[33] = 0.4050463140f * in[31] + 0.8125f * in[33] - 0.4192627370f * in[35];
3016     out[34] = -a * 0.1976423711f * in[25] + a * 0.7954951525f * in[27] - a * 0.5728219748f * in[29];
3017     out[35] = 0.9057110548f * in[31] - 0.4192627370f * in[33] + 0.0624999329f * in[35];
3018 }
3019 
3020 FLOAT* WINAPI D3DXSHRotate(FLOAT *out, UINT order, const D3DXMATRIX *matrix, const FLOAT *in)
3021 {
3022     FLOAT alpha, beta, gamma, sinb, temp[36], temp1[36];
3023 
3024     TRACE("out %p, order %u, matrix %p, in %p\n", out, order, matrix, in);
3025 
3026     out[0] = in[0];
3027 
3028     if ((order > D3DXSH_MAXORDER) || (order < D3DXSH_MINORDER))
3029         return out;
3030 
3031     if (order <= 3)
3032     {
3033         out[1] = matrix->u.m[1][1] * in[1] - matrix->u.m[2][1] * in[2] + matrix->u.m[0][1] * in[3];
3034         out[2] = -matrix->u.m[1][2] * in[1] + matrix->u.m[2][2] * in[2] - matrix->u.m[0][2] * in[3];
3035         out[3] = matrix->u.m[1][0] * in[1] - matrix->u.m[2][0] * in[2] + matrix->u.m[0][0] * in[3];
3036 
3037         if (order == 3)
3038         {
3039             FLOAT coeff[]={
3040                 matrix->u.m[1][0] * matrix->u.m[0][0], matrix->u.m[1][1] * matrix->u.m[0][1],
3041                 matrix->u.m[1][1] * matrix->u.m[2][1], matrix->u.m[1][0] * matrix->u.m[2][0],
3042                 matrix->u.m[2][0] * matrix->u.m[2][0], matrix->u.m[2][1] * matrix->u.m[2][1],
3043                 matrix->u.m[0][0] * matrix->u.m[2][0], matrix->u.m[0][1] * matrix->u.m[2][1],
3044                 matrix->u.m[0][1] * matrix->u.m[0][1], matrix->u.m[1][0] * matrix->u.m[1][0],
3045                 matrix->u.m[1][1] * matrix->u.m[1][1], matrix->u.m[0][0] * matrix->u.m[0][0], };
3046 
3047             out[4] = (matrix->u.m[1][1] * matrix->u.m[0][0] + matrix->u.m[0][1] * matrix->u.m[1][0]) * in[4];
3048             out[4] -= (matrix->u.m[1][0] * matrix->u.m[2][1] + matrix->u.m[1][1] * matrix->u.m[2][0]) * in[5];
3049             out[4] += 1.7320508076f * matrix->u.m[2][0] * matrix->u.m[2][1] * in[6];
3050             out[4] -= (matrix->u.m[0][1] * matrix->u.m[2][0] + matrix->u.m[0][0] * matrix->u.m[2][1]) * in[7];
3051             out[4] += (matrix->u.m[0][0] * matrix->u.m[0][1] - matrix->u.m[1][0] * matrix->u.m[1][1]) * in[8];
3052 
3053             out[5] = (matrix->u.m[1][1] * matrix->u.m[2][2] + matrix->u.m[1][2] * matrix->u.m[2][1]) * in[5];
3054             out[5] -= (matrix->u.m[1][1] * matrix->u.m[0][2] + matrix->u.m[1][2] * matrix->u.m[0][1]) * in[4];
3055             out[5] -= 1.7320508076f * matrix->u.m[2][2] * matrix->u.m[2][1] * in[6];
3056             out[5] += (matrix->u.m[0][2] * matrix->u.m[2][1] + matrix->u.m[0][1] * matrix->u.m[2][2]) * in[7];
3057             out[5] -= (matrix->u.m[0][1] * matrix->u.m[0][2] - matrix->u.m[1][1] * matrix->u.m[1][2]) * in[8];
3058 
3059             out[6] = (matrix->u.m[2][2] * matrix->u.m[2][2] - 0.5f * (coeff[4] + coeff[5])) * in[6];
3060             out[6] -= (0.5773502692f * (coeff[0] + coeff[1]) - 1.1547005384f * matrix->u.m[1][2] * matrix->u.m[0][2]) * in[4];
3061             out[6] += (0.5773502692f * (coeff[2] + coeff[3]) - 1.1547005384f * matrix->u.m[1][2] * matrix->u.m[2][2]) * in[5];
3062             out[6] += (0.5773502692f * (coeff[6] + coeff[7]) - 1.1547005384f * matrix->u.m[0][2] * matrix->u.m[2][2]) * in[7];
3063             out[6] += (0.2886751347f * (coeff[9] - coeff[8] + coeff[10] - coeff[11]) - 0.5773502692f *
3064                   (matrix->u.m[1][2] * matrix->u.m[1][2] - matrix->u.m[0][2] * matrix->u.m[0][2])) * in[8];
3065 
3066             out[7] = (matrix->u.m[0][0] * matrix->u.m[2][2] + matrix->u.m[0][2] * matrix->u.m[2][0]) * in[7];
3067             out[7] -= (matrix->u.m[1][0] * matrix->u.m[0][2] + matrix->u.m[1][2] * matrix->u.m[0][0]) * in[4];
3068             out[7] += (matrix->u.m[1][0] * matrix->u.m[2][2] + matrix->u.m[1][2] * matrix->u.m[2][0]) * in[5];
3069             out[7] -= 1.7320508076f * matrix->u.m[2][2] * matrix->u.m[2][0] * in[6];
3070             out[7] -= (matrix->u.m[0][0] * matrix->u.m[0][2] - matrix->u.m[1][0] * matrix->u.m[1][2]) * in[8];
3071 
3072             out[8] = 0.5f * (coeff[11] - coeff[8] - coeff[9] + coeff[10]) * in[8];
3073             out[8] += (coeff[0] - coeff[1]) * in[4];
3074             out[8] += (coeff[2] - coeff[3]) * in[5];
3075             out[8] += 0.86602540f * (coeff[4] - coeff[5]) * in[6];
3076             out[8] += (coeff[7] - coeff[6]) * in[7];
3077         }
3078 
3079         return out;
3080     }
3081 
3082     if (fabsf(matrix->u.m[2][2]) != 1.0f)
3083     {
3084         sinb = sqrtf(1.0f - matrix->u.m[2][2] * matrix->u.m[2][2]);
3085         alpha = atan2f(matrix->u.m[2][1] / sinb, matrix->u.m[2][0] / sinb);
3086         beta = atan2f(sinb, matrix->u.m[2][2]);
3087         gamma = atan2f(matrix->u.m[1][2] / sinb, -matrix->u.m[0][2] / sinb);
3088     }
3089     else
3090     {
3091         alpha = atan2f(matrix->u.m[0][1], matrix->u.m[0][0]);
3092         beta = 0.0f;
3093         gamma = 0.0f;
3094     }
3095 
3096     D3DXSHRotateZ(temp, order, gamma, in);
3097     rotate_X(temp1, order, 1.0f, temp);
3098     D3DXSHRotateZ(temp, order, beta, temp1);
3099     rotate_X(temp1, order, -1.0f, temp);
3100     D3DXSHRotateZ(out, order, alpha, temp1);
3101 
3102     return out;
3103 }
3104 
3105 FLOAT * WINAPI D3DXSHRotateZ(FLOAT *out, UINT order, FLOAT angle, const FLOAT *in)
3106 {
3107     UINT i, sum = 0;
3108     FLOAT c[5], s[5];
3109 
3110     TRACE("out %p, order %u, angle %f, in %p\n", out, order, angle, in);
3111 
3112     order = min(max(order, D3DXSH_MINORDER), D3DXSH_MAXORDER);
3113 
3114     out[0] = in[0];
3115 
3116     for (i = 1; i < order; i++)
3117     {
3118         UINT j;
3119 
3120         c[i - 1] = cosf(i * angle);
3121         s[i - 1] = sinf(i * angle);
3122         sum += i * 2;
3123 
3124         out[sum - i] = c[i - 1] * in[sum - i];
3125         out[sum - i] += s[i - 1] * in[sum + i];
3126         for (j = i - 1; j > 0; j--)
3127         {
3128             out[sum - j] = 0.0f;
3129             out[sum - j] = c[j - 1] * in[sum - j];
3130             out[sum - j] += s[j - 1] * in[sum + j];
3131         }
3132 
3133         if (in == out)
3134             out[sum] = 0.0f;
3135         else
3136             out[sum] = in[sum];
3137 
3138         for (j = 1; j < i; j++)
3139         {
3140             out[sum + j] = 0.0f;
3141             out[sum + j] = -s[j - 1] * in[sum - j];
3142             out[sum + j] += c[j - 1] * in[sum + j];
3143         }
3144         out[sum + i] = -s[i - 1] * in[sum - i];
3145         out[sum + i] += c[i - 1] * in[sum + i];
3146     }
3147 
3148     return out;
3149 }
3150 
3151 FLOAT* WINAPI D3DXSHScale(FLOAT *out, UINT order, const FLOAT *a, const FLOAT scale)
3152 {
3153     UINT i;
3154 
3155     TRACE("out %p, order %u, a %p, scale %f\n", out, order, a, scale);
3156 
3157     for (i = 0; i < order * order; i++)
3158         out[i] = a[i] * scale;
3159 
3160     return out;
3161 }
3162