1 /* This file is part of the Spring engine (GPL v2 or later), see LICENSE.html */
2
3 #ifdef USE_VALGRIND
4 #include <valgrind/valgrind.h>
5 #endif
6
7 #include "FPUCheck.h"
8 #include "lib/streflop/streflop_cond.h"
9 #include "System/Exceptions.h"
10 #include "System/ThreadPool.h"
11 #include "System/Log/ILog.h"
12 #include "System/Platform/CpuID.h"
13
14 #ifdef STREFLOP_H
15 #ifdef STREFLOP_SSE
16 #elif STREFLOP_X87
17 #else
18 #error "streflop FP-math mode must be either SSE or X87"
19 #endif
20 #endif
21
22 /**
23 @brief checks FPU control registers.
24 Checks the FPU control registers MXCSR and FPUCW,
25
26 For reference, the layout of the MXCSR register:
27 FZ:RC:RC:PM:UM:OM:ZM:DM:IM: Rsvd:PE:UE:OE:ZE:DE:IE
28 15 14 13 12 11 10 9 8 7| 6 5 4 3 2 1 0
29 Spring1: 0 0 0 1 1 1 0 1 0| 0 0 0 0 0 0 0 = 0x1D00 = 7424
30 Spring2: 0 0 0 1 1 1 1 1 1| 0 0 0 0 0 0 0 = 0x1F80 = 8064
31 Spring3: 0 0 0 1 1 0 0 1 0| 0 0 0 0 0 0 0 = 0x1900 = 6400 (signan)
32 Default: 0 0 0 1 1 1 1 1 1| 0 0 0 0 0 0 0 = 0x1F80 = 8064
33 MaskRsvd: 1 1 1 1 1 1 1 1 1| 0 0 0 0 0 0 0 = 0xFF80
34
35 And the layout of the 387 FPU control word register:
36 Rsvd:Rsvd:Rsvd:X:RC:RC:PC:PC: Rsvd:Rsvd:PM:UM:OM:ZM:DM:IM
37 15 14 13 12 11 10 9 8| 7 6 5 4 3 2 1 0
38 Spring1: 0 0 0 0 0 0 0 0| 0 0 1 1 1 0 1 0 = 0x003A = 58
39 Spring2: 0 0 0 0 0 0 0 0| 0 0 1 1 1 1 1 1 = 0x003F = 63
40 Spring3: 0 0 0 0 0 0 0 0| 0 0 1 1 0 0 1 0 = 0x0032 = 50 (signan)
41 Default: 0 0 0 0 0 0 1 1| 0 0 1 1 1 1 1 1 = 0x033F = 831
42 MaskRsvd: 0 0 0 1 1 1 1 1| 0 0 1 1 1 1 1 1 = 0x0F3F
43
44 Where:
45 Rsvd - Reserved
46 FZ - Flush to Zero
47 RC - Rounding Control
48 PM - Precision Mask
49 UM - Underflow Mask
50 OM - Overflow Mask
51 ZM - Zerodivide Mask
52 DM - Denormal Mask
53 IM - Invalid Mask
54 PE - Precision Exception
55 UE - Underflow Exception
56 OE - Overflow Exception
57 ZE - Zerodivide Exception
58 DE - Denormal Exception
59 IE - Invalid Exception
60 X - Infinity control (unused on 387 and higher)
61 PC - Precision Control
62
63 Spring1 - Control word used by spring in code in CGame::SimFrame().
64 Spring2 - Control word used by spring in code everywhere else.
65 Default - Default control word according to Intel.
66 MaskRsvd - Masks out the reserved bits.
67
68 Source: Intel Architecture Software Development Manual, Volume 1, Basic Architecture
69 */
good_fpu_control_registers(const char * text)70 void good_fpu_control_registers(const char* text)
71 {
72 #ifdef USE_VALGRIND
73 static const bool valgrindRunning = RUNNING_ON_VALGRIND;
74 if (valgrindRunning) {
75 // Valgrind doesn't allow us setting the FPU, so syncing is impossible
76 return;
77 }
78 #endif
79
80 // accepted/syncsafe FPU states:
81 int sse_a, sse_b, sse_c, x87_a, x87_b, x87_c;
82 {
83 sse_a = 0x1D00;
84 sse_b = 0x1F80;
85 sse_c = 0x1900; // signan
86 x87_a = 0x003A;
87 x87_b = 0x003F;
88 x87_c = 0x0032; // signan
89 }
90
91 #if defined(STREFLOP_SSE)
92 // struct
93 streflop::fpenv_t fenv;
94 streflop::fegetenv(&fenv);
95
96 const int fsse = fenv.sse_mode & 0xFF80;
97 const int fx87 = fenv.x87_mode & 0x0F3F;
98
99 bool ret = ((fsse == sse_a) || (fsse == sse_b) || (fsse == sse_c)) &&
100 ((fx87 == x87_a) || (fx87 == x87_b) || (fx87 == x87_c));
101
102 if (!ret) {
103 LOG_L(L_WARNING, "[%s] Sync warning: (env.sse_mode) MXCSR 0x%04X instead of 0x%04X or 0x%04X (\"%s\")", __FUNCTION__, fsse, sse_a, sse_b, text);
104 LOG_L(L_WARNING, "[%s] Sync warning: (env.x87_mode) FPUCW 0x%04X instead of 0x%04X or 0x%04X (\"%s\")", __FUNCTION__, fx87, x87_a, x87_b, text);
105
106 // Set single precision floating point math.
107 streflop::streflop_init<streflop::Simple>();
108 #if defined(__SUPPORT_SNAN__)
109 streflop::feraiseexcept(streflop::FPU_Exceptions(streflop::FE_INVALID | streflop::FE_DIVBYZERO | streflop::FE_OVERFLOW));
110 #endif
111 }
112
113 #elif defined(STREFLOP_X87)
114 // short int
115 streflop::fpenv_t fenv;
116 streflop::fegetenv(&fenv);
117
118 bool ret = (fenv & 0x0F3F) == x87_a || (fenv & 0x0F3F) == x87_b || (fenv & 0x0F3F) == x87_c;
119
120 if (!ret) {
121 LOG_L(L_WARNING, "[%s] Sync warning: FPUCW 0x%04X instead of 0x%04X or 0x%04X (\"%s\")", __FUNCTION__, fenv, x87_a, x87_b, text);
122
123 // Set single precision floating point math.
124 streflop::streflop_init<streflop::Simple>();
125 #if defined(__SUPPORT_SNAN__)
126 streflop::feraiseexcept(streflop::FPU_Exceptions(streflop::FE_INVALID | streflop::FE_DIVBYZERO | streflop::FE_OVERFLOW));
127 #endif
128 }
129 #endif
130 }
131
good_fpu_init()132 void good_fpu_init()
133 {
134 const unsigned int sseBits = springproc::GetProcSSEBits();
135 LOG("[CMyMath::Init] CPU SSE mask: %u, flags:", sseBits);
136 LOG("\tSSE 1.0: %d, SSE 2.0: %d", (sseBits >> 5) & 1, (sseBits >> 4) & 1);
137 LOG("\tSSE 3.0: %d, SSSE 3.0: %d", (sseBits >> 3) & 1, (sseBits >> 2) & 1);
138 LOG("\tSSE 4.1: %d, SSE 4.2: %d", (sseBits >> 1) & 1, (sseBits >> 0) & 1);
139 LOG("\tSSE 4.0A: %d, SSE 5.0A: %d", (sseBits >> 8) & 1, (sseBits >> 7) & 1);
140
141 #ifdef STREFLOP_H
142 const bool hasSSE1 = (sseBits >> 5) & 1;
143
144 #ifdef STREFLOP_SSE
145 if (hasSSE1) {
146 LOG("\tusing streflop SSE FP-math mode, CPU supports SSE instructions");
147 } else {
148 throw unsupported_error("CPU is missing SSE instruction support");
149 }
150 #else
151 if (hasSSE1) {
152 LOG_L(L_WARNING, "\tStreflop floating-point math is set to X87 mode");
153 LOG_L(L_WARNING, "\tThis may cause desyncs during multi-player games");
154 LOG_L(L_WARNING, "\tThis CPU is SSE-capable; consider recompiling");
155 } else {
156 LOG_L(L_WARNING, "\tStreflop floating-point math is not SSE-enabled");
157 LOG_L(L_WARNING, "\tThis may cause desyncs during multi-player games");
158 LOG_L(L_WARNING, "\tThis CPU is not SSE-capable; it can only use X87 mode");
159 }
160 #endif
161
162 // Set single precision floating point math.
163 streflop::streflop_init<streflop::Simple>();
164 #if defined(__SUPPORT_SNAN__)
165 streflop::feraiseexcept(streflop::FPU_Exceptions(streflop::FE_INVALID | streflop::FE_DIVBYZERO | streflop::FE_OVERFLOW));
166 #endif
167
168 #else
169 // probably should check if SSE was enabled during
170 // compilation and issue a warning about illegal
171 // instructions if so (or just die with an error)
172 LOG_L(L_WARNING, "Floating-point math is not controlled by streflop");
173 LOG_L(L_WARNING, "This makes keeping multi-player sync 99% impossible");
174 #endif
175 }
176
streflop_init_omp()177 void streflop_init_omp() {
178 // Initialize FPU in all worker threads, too
179 // Note: It's not needed for sync'ness cause all precision relevant
180 // mode flags are shared across the process!
181 // But the exception ones aren't (but are copied from the calling thread).
182 parallel([&]{
183 streflop::streflop_init<streflop::Simple>();
184 #if defined(__SUPPORT_SNAN__)
185 streflop::feraiseexcept(streflop::FPU_Exceptions(streflop::FE_INVALID | streflop::FE_DIVBYZERO | streflop::FE_OVERFLOW));
186 #endif
187 });
188 }
189
190 namespace springproc {
GetProcMaxStandardLevel()191 unsigned int GetProcMaxStandardLevel()
192 {
193 unsigned int rEAX = 0x00000000;
194 unsigned int rEBX = 0;
195 unsigned int rECX = 0;
196 unsigned int rEDX = 0;
197
198 ExecCPUID(&rEAX, &rEBX, &rECX, &rEDX);
199
200 return rEAX;
201 }
202
GetProcMaxExtendedLevel()203 unsigned int GetProcMaxExtendedLevel()
204 {
205 unsigned int rEAX = 0x80000000;
206 unsigned int rEBX = 0;
207 unsigned int rECX = 0;
208 unsigned int rEDX = 0;
209
210 ExecCPUID(&rEAX, &rEBX, &rECX, &rEDX);
211
212 return rEAX;
213 }
214
GetProcSSEBits()215 unsigned int GetProcSSEBits()
216 {
217 unsigned int rEAX = 0;
218 unsigned int rEBX = 0;
219 unsigned int rECX = 0;
220 unsigned int rEDX = 0;
221 unsigned int bits = 0;
222
223 if (GetProcMaxStandardLevel() >= 0x00000001U) {
224 rEAX = 0x00000001U; ExecCPUID(&rEAX, &rEBX, &rECX, &rEDX);
225
226 int SSE42 = (rECX >> 20) & 1; bits |= ( SSE42 << 0); // SSE 4.2
227 int SSE41 = (rECX >> 19) & 1; bits |= ( SSE41 << 1); // SSE 4.1
228 int SSSE30 = (rECX >> 9) & 1; bits |= (SSSE30 << 2); // Supplemental SSE 3.0
229 int SSE30 = (rECX >> 0) & 1; bits |= ( SSE30 << 3); // SSE 3.0
230
231 int SSE20 = (rEDX >> 26) & 1; bits |= ( SSE20 << 4); // SSE 2.0
232 int SSE10 = (rEDX >> 25) & 1; bits |= ( SSE10 << 5); // SSE 1.0
233 int MMX = (rEDX >> 23) & 1; bits |= ( MMX << 6); // MMX
234 }
235
236 if (GetProcMaxExtendedLevel() >= 0x80000001U) {
237 rEAX = 0x80000001U; ExecCPUID(&rEAX, &rEBX, &rECX, &rEDX);
238
239 int SSE50A = (rECX >> 11) & 1; bits |= (SSE50A << 7); // SSE 5.0A
240 int SSE40A = (rECX >> 6) & 1; bits |= (SSE40A << 8); // SSE 4.0A
241 int MSSE = (rECX >> 7) & 1; bits |= (MSSE << 9); // Misaligned SSE
242 }
243
244 return bits;
245 }
246 }
247