xref: /reactos/sdk/lib/crt/math/libm_sse2/simd.h (revision 9e8ed3f8)
1 /***********************************************************************************/
2 /** MIT License **/
3 /** ----------- **/
4 /** **/
5 /** Copyright (c) 2002-2019 Advanced Micro Devices, Inc. **/
6 /** **/
7 /** Permission is hereby granted, free of charge, to any person obtaining a copy **/
8 /** of this Software and associated documentaon files (the "Software"), to deal **/
9 /** in the Software without restriction, including without limitation the rights **/
10 /** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell **/
11 /** copies of the Software, and to permit persons to whom the Software is **/
12 /** furnished to do so, subject to the following conditions: **/
13 /** **/
14 /** The above copyright notice and this permission notice shall be included in **/
15 /** all copies or substantial portions of the Software. **/
16 /** **/
17 /** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR **/
18 /** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, **/
19 /** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE **/
20 /** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER **/
21 /** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, **/
22 /** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN **/
23 /** THE SOFTWARE. **/
24 /***********************************************************************************/
25 
26 /*
27 ******************************************************************************
28  * Source File  : simd.h
29  * Archive File : $Archive: $
30  * Date		    : 6/04/01
31  * Description  : The include file for  the AMD SIMD exception filter routine
32  *                for  Microsoft Structured Exception Handling
33  *
34  *
35 $Revision:$
36 $Name:$
37 $Date:$
38 $Author:$
39 $History: simd.h $
40  *
41  */
42 
43 #include <emmintrin.h>
44 
45 // simd.h
46 // This file contains structure definitions to provide
47 // convenient access to SIMD and MMX data as unsigned
48 // integer data.
49 
50 // change the following define to a 1 to print terse output
51 #define DO_PRINT 0
52 
53 // can't use the 3DNOW SDK as written with 64 bit tools
54 #if !defined (_AMD64_)
55 #define USE_3DNOW_SDK 1
56 #define SUPPORTS_FTZ 1
57 #endif
58 
59 
60 /*****************************************************************/
61 
62 // Basic type definitions
63 
64 typedef UINT_PTR AWORD; //  x86-64 safe
65 
66 typedef union
67 {
68 	float f;
69     unsigned long long l;
70 } LFLOAT;
71 
72 //typedef struct
73 //{
74 //	DWORD dw[2];
75 //}
76 typedef unsigned _int64 QWORD;
77 
78 typedef union
79 {
80 	double f;
81     unsigned long long l[2];
82 } LDOUBLE;
83 
84 typedef __declspec(align(16)) struct
85 {
86 	LFLOAT f0,f1,f2,f3;
87 } SSESINGLE;
88 
89 typedef __declspec(align(16)) struct
90 {
91 	LDOUBLE d0,d1;
92 } SSEDOUBLE;
93 
94 
95 // this is the key data structure type used by the filter
96 // and the test program.  It will be aligned, since
97 // the __m128 types are all aligned.  It allows the
98 // use of one variable to carry all the needed data
99 // types.
100 typedef union
101 {
102 	__m128  m;
103 	__m128d md;
104 	__m128i mi;
105 	__m64   m64[2];
106 	DWORD	l[4];
107 	int		i[4];
108 	LFLOAT	f[4];
109 	QWORD	q[2];
110 	LDOUBLE d[2];
111 } ML128;
112 
113 // this defined to provide a MMX type for the FXSTOR structure.
114 typedef union
115 {
116 	unsigned short mmx[4];	// mmx regs are 64 bits
117 	unsigned short fp[5];	// floating point regs are 80 bits
118 } MMX80;
119 
120 /*****************************************************************/
121 
122 // define  constants used by SIMD
123 
124 // define MXCSR rounding control bits.
125 #define SDIMCW_RC   0x6000
126 #define SDIRC_NEAR  0x0000
127 #define SDIRC_DOWN  0x2000
128 #define SDIRC_UP    0x4000
129 #define SDIRC_CHOP  0x6000
130 
131 // define other MXCSR control bits
132 #define SDDAZ   0x0040
133 #define SDFTZ   0x8000
134 
135 #define opADD    0x58
136 #define opAND    0x54
137 #define opANDN   0x55
138 #define opCMP    0xC2
139 #define opCOMISS  0x2F
140 #define opCVTPI2PS    0x2A
141 #define opCVTTPS2PI   0x2C
142 #define opCVTPS2PI    0x2D
143 #define opCVTPS2PD    0x5A
144 #define opCVTDQ2PS    0x5B
145 #define opCVTTPD2DQ    0xE6
146 #define opDIV    0x5E
147 #define opMAX    0x5F
148 #define opMIN    0x5D
149 #define opMUL    0x59
150 #define opSQRT   0x51
151 #define opSUB    0x5C
152 #define opUCOMISS 0x2E
153 
154 // define EFlags bits
155 #define ZF  (1 << 6)
156 #define PF  (1 << 2)
157 #define CF  (1 << 0)
158 
159 // define the REX prefix bits
160 #define REX_PREFIX 0x40
161 #define REX_W      0x8
162 #define REX_R      0x4
163 #define REX_X      0x2
164 #define REX_B      0x1
165 
166 
167 // define the exception information record
168 
169 // constants for the status bits
170 #define IEM_INEXACT    0x20
171 #define IEM_UNDERFLOW  0x10
172 #define IEM_OVERFLOW   0x08
173 #define IEM_ZERODIVIDE 0x04
174 #define IEM_DENORMAL   0x02
175 #define IEM_INVALID    0x01
176 #define IEM_MASK       0x3F
177 
178 #define IMM_INEXACT    0x1000
179 #define IMM_UNDERFLOW  0x0800
180 #define IMM_OVERFLOW   0x0400
181 #define IMM_ZERODIVIDE 0x0200
182 #define IMM_DENORMAL   0x0100
183 #define IMM_INVALID    0x0080
184 #define IMM_MASK       0x1F80
185 
186 /*****************************************************************/
187 
188 // Instruction forms
189 
190 // Type enumerations
191 //
192 
193 typedef enum
194 {
195     fGdWsd,
196     fGdWss,
197     fQqWpd,
198     fQqWps,
199     fVpdQq,
200     fVpdWpd,
201     fVpdWpdIb,
202     fVpdWpdi,
203     fVpdWps,
204     fVpdiWpd,
205     fVpdiWps,
206     fVpsQq,
207     fVpsWpd,
208     fVpsWpdi,
209     fVpsWps,
210     fVpsWpsIb,
211     fVsdEd,
212     fVsdWsd,
213     fVsdWsdIb,
214     fVsdWss,
215     fVssEd,
216     fVssWsd,
217     fVssWss,
218     fVssWssIb
219 } InstType;
220 
221 // operand types
222 typedef enum
223 {
224     oEd,    //General register dword mod R/M
225     oGd,    //General register dword
226     oQq,    // MMX quadword mod R/M
227     oVpd,   // XMM register
228     oVpdi,
229     oVps,
230     oVsd,
231     oVss,
232     oWpd,   // XMM mod R/M
233     oWpdi,
234     oWps,
235     oWsd,
236     oWss
237 } OpType;
238 
239 // operand class
240 typedef enum
241 {
242     oXMMreg,
243     oXMMmrm,
244     oMMXreg,
245     oMMXmrm,
246     oGENreg,
247     oGENmrm,
248 } OpClass;
249 
250 // data types
251 typedef enum
252 {
253     dDW,        // integer DWORD
254     dPD,        // packed double precision
255     dPDI,       // packed integer DWORD
256     dPS,        // packed single precision
257     dQ,         // integer quadword
258     dSD,        // scalar double precision
259     dSS         // scalar single precision
260 } DataType;
261 
262 /*****************************************************************/
263 
264 // Structure definitions
265 //
266 
267 
268 // define the format of the data used by
269 // the FXSAVE and FXRSTOR commands
270 typedef struct
271 {
272     MMX80 mmx;              // the mmx/fp register
273     unsigned short reserved[3]; // floating point regs are 80 bits
274 } FPMMX;
275 
276 #if defined (_AMD64_)
277 // x86-64 version
278 typedef struct _FXMM_SAVE_AREA {
279     WORD    ControlWord;
280     WORD    StatusWord;
281     WORD    TagWord;
282     WORD    OpCode;
283     QWORD   ErrorOffset;
284     QWORD   DataOffset;
285     DWORD   Mxcsr;
286     DWORD   reserved3;
287     FPMMX   FMMXreg[8];
288     ML128   XMMreg[16];
289 } FXMM_SAVE_AREA;
290 #else
291 // 32 bit x86 version
292 typedef struct _FXMM_SAVE_AREA {
293     WORD    ControlWord;
294     WORD    StatusWord;
295     WORD    TagWord;
296     WORD    OpCode;
297     DWORD   ErrorOffset;
298     WORD    ErrorSelector;
299     WORD    reserved1;
300     DWORD   DataOffset;
301     WORD    DataSelector;
302     WORD    reserved2;
303     DWORD   Mxcsr;
304     DWORD   reserved3;
305     FPMMX   FMMXreg[8];
306     ML128   XMMreg[8];
307 } FXMM_SAVE_AREA;
308 #endif
309 typedef FXMM_SAVE_AREA *PFXMM_SAVE_AREA;
310 
311 /* This structure is used to access the excepting opcode */
312 typedef struct {
313     unsigned char opcode;
314     unsigned char rmbyte;
315     union {
316         unsigned long long offset; // this will need work for x86-64
317         unsigned char imm8;
318     } data;
319 
320 } SIMD_OP, *PSIMD_OP;
321 
322 // Define a SIMD exception flag type.
323 // This is just like the _FPIEEE_EXCEPTION_FLAGS
324 // except that it adds the denormal field.
325 typedef struct {
326     unsigned int Inexact : 1;
327     unsigned int Underflow : 1;
328     unsigned int Overflow : 1;
329     unsigned int ZeroDivide : 1;
330     unsigned int InvalidOperation : 1;
331     unsigned int Denormal : 1;
332 } _SIMD_EXCEPTION_FLAGS;
333 
334 
335 /* define the local simd record structures */
336 typedef struct {
337     unsigned int RoundingMode;
338     _SIMD_EXCEPTION_FLAGS Cause;
339     _SIMD_EXCEPTION_FLAGS Enable;
340     _SIMD_EXCEPTION_FLAGS Status;
341     PSIMD_OP opaddress;         // points to 0F xx opcode
342     int curAddr;                // used when parsing mod R/M byte
343     unsigned char prefix;
344     unsigned char opcode;
345     unsigned char rmbyte;
346     unsigned char immediate8;
347     // add a rex field for x86-64
348     unsigned char rex;
349     int eopcode;                // encoded opcode (index for tables)
350     int op_form;
351     int op1_class;              // XMM, MMX, or gen register
352     int op1_type;               // data format
353     int op2_class;
354     int op2_type;
355     int is_commiss;
356     int commiss_val;
357     unsigned int mxcsr;         // value of mscsr from context record.
358     ML128 op1_value;
359     ML128 op2_value;
360     ML128 *op2_ptr;
361 
362 } _SIMD_RECORD, *_PSIMD_RECORD;
363 
364 /* define a record for the operand form table */
365 typedef struct {
366     int op1;   // form of operand 1
367     int op2;   // form of operand 2
368 } _OPERAND_RECORD;
369 
370