1 // Copyright 2021 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "src/base/overflowing-math.h"
6 #include "src/wasm/compilation-environment.h"
7 #include "test/cctest/cctest.h"
8 #include "test/cctest/wasm/wasm-run-utils.h"
9 #include "test/cctest/wasm/wasm-simd-utils.h"
10 #include "test/common/wasm/flag-utils.h"
11 #include "test/common/wasm/wasm-macro-gen.h"
12 
13 namespace v8 {
14 namespace internal {
15 namespace wasm {
16 namespace test_run_wasm_relaxed_simd {
17 
18 // Use this for experimental relaxed-simd opcodes.
19 #define WASM_RELAXED_SIMD_TEST(name)                            \
20   void RunWasm_##name##_Impl(TestExecutionTier execution_tier); \
21   TEST(RunWasm_##name##_turbofan) {                             \
22     if (!CpuFeatures::SupportsWasmSimd128()) return;            \
23     EXPERIMENTAL_FLAG_SCOPE(relaxed_simd);                      \
24     RunWasm_##name##_Impl(TestExecutionTier::kTurbofan);        \
25   }                                                             \
26   TEST(RunWasm_##name##_interpreter) {                          \
27     EXPERIMENTAL_FLAG_SCOPE(relaxed_simd);                      \
28     RunWasm_##name##_Impl(TestExecutionTier::kInterpreter);     \
29   }                                                             \
30   void RunWasm_##name##_Impl(TestExecutionTier execution_tier)
31 
32 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \
33     V8_TARGET_ARCH_PPC64
34 // Only used for qfma and qfms tests below.
35 
36 // FMOperation holds the params (a, b, c) for a Multiply-Add or
37 // Multiply-Subtract operation, and the expected result if the operation was
38 // fused, rounded only once for the entire operation, or unfused, rounded after
39 // multiply and again after add/subtract.
40 template <typename T>
41 struct FMOperation {
42   const T a;
43   const T b;
44   const T c;
45   const T fused_result;
46   const T unfused_result;
47 };
48 
49 // large_n is large number that overflows T when multiplied by itself, this is a
50 // useful constant to test fused/unfused behavior.
51 template <typename T>
52 constexpr T large_n = T(0);
53 
54 template <>
55 constexpr double large_n<double> = 1e200;
56 
57 template <>
58 constexpr float large_n<float> = 1e20;
59 
60 // Fused Multiply-Add performs a + b * c.
61 template <typename T>
62 static constexpr FMOperation<T> qfma_array[] = {
63     {1.0f, 2.0f, 3.0f, 7.0f, 7.0f},
64     // fused: a + b * c = -inf + (positive overflow) = -inf
65     // unfused: a + b * c = -inf + inf = NaN
66     {-std::numeric_limits<T>::infinity(), large_n<T>, large_n<T>,
67      -std::numeric_limits<T>::infinity(), std::numeric_limits<T>::quiet_NaN()},
68     // fused: a + b * c = inf + (negative overflow) = inf
69     // unfused: a + b * c = inf + -inf = NaN
70     {std::numeric_limits<T>::infinity(), -large_n<T>, large_n<T>,
71      std::numeric_limits<T>::infinity(), std::numeric_limits<T>::quiet_NaN()},
72     // NaN
73     {std::numeric_limits<T>::quiet_NaN(), 2.0f, 3.0f,
74      std::numeric_limits<T>::quiet_NaN(), std::numeric_limits<T>::quiet_NaN()},
75     // -NaN
76     {-std::numeric_limits<T>::quiet_NaN(), 2.0f, 3.0f,
77      std::numeric_limits<T>::quiet_NaN(), std::numeric_limits<T>::quiet_NaN()}};
78 
79 template <typename T>
qfma_vector()80 static constexpr base::Vector<const FMOperation<T>> qfma_vector() {
81   return base::ArrayVector(qfma_array<T>);
82 }
83 
84 // Fused Multiply-Subtract performs a - b * c.
85 template <typename T>
86 static constexpr FMOperation<T> qfms_array[]{
87     {1.0f, 2.0f, 3.0f, -5.0f, -5.0f},
88     // fused: a - b * c = inf - (positive overflow) = inf
89     // unfused: a - b * c = inf - inf = NaN
90     {std::numeric_limits<T>::infinity(), large_n<T>, large_n<T>,
91      std::numeric_limits<T>::infinity(), std::numeric_limits<T>::quiet_NaN()},
92     // fused: a - b * c = -inf - (negative overflow) = -inf
93     // unfused: a - b * c = -inf - -inf = NaN
94     {-std::numeric_limits<T>::infinity(), -large_n<T>, large_n<T>,
95      -std::numeric_limits<T>::infinity(), std::numeric_limits<T>::quiet_NaN()},
96     // NaN
97     {std::numeric_limits<T>::quiet_NaN(), 2.0f, 3.0f,
98      std::numeric_limits<T>::quiet_NaN(), std::numeric_limits<T>::quiet_NaN()},
99     // -NaN
100     {-std::numeric_limits<T>::quiet_NaN(), 2.0f, 3.0f,
101      std::numeric_limits<T>::quiet_NaN(), std::numeric_limits<T>::quiet_NaN()}};
102 
103 template <typename T>
qfms_vector()104 static constexpr base::Vector<const FMOperation<T>> qfms_vector() {
105   return base::ArrayVector(qfms_array<T>);
106 }
107 
108 // Fused results only when fma3 feature is enabled, and running on TurboFan or
109 // Liftoff (which can fall back to TurboFan if FMA is not implemented).
ExpectFused(TestExecutionTier tier)110 bool ExpectFused(TestExecutionTier tier) {
111 #ifdef V8_TARGET_ARCH_X64
112   return CpuFeatures::IsSupported(FMA3) &&
113          (tier == TestExecutionTier::kTurbofan ||
114           tier == TestExecutionTier::kLiftoff);
115 #else
116   return (tier == TestExecutionTier::kTurbofan ||
117           tier == TestExecutionTier::kLiftoff);
118 #endif
119 }
120 #endif  // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
121         // V8_TARGET_ARCH_PPC64
122 
123 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \
124     V8_TARGET_ARCH_PPC64
WASM_RELAXED_SIMD_TEST(F32x4Qfma)125 WASM_RELAXED_SIMD_TEST(F32x4Qfma) {
126   WasmRunner<int32_t, float, float, float> r(execution_tier);
127   // Set up global to hold mask output.
128   float* g = r.builder().AddGlobal<float>(kWasmS128);
129   // Build fn to splat test values, perform compare op, and write the result.
130   byte value1 = 0, value2 = 1, value3 = 2;
131   BUILD(r,
132         WASM_GLOBAL_SET(0, WASM_SIMD_F32x4_QFMA(
133                                WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value1)),
134                                WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value2)),
135                                WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value3)))),
136         WASM_ONE);
137 
138   for (FMOperation<float> x : qfma_vector<float>()) {
139     r.Call(x.a, x.b, x.c);
140     float expected =
141         ExpectFused(execution_tier) ? x.fused_result : x.unfused_result;
142     for (int i = 0; i < 4; i++) {
143       float actual = LANE(g, i);
144       CheckFloatResult(x.a, x.b, expected, actual, true /* exact */);
145     }
146   }
147 }
148 
WASM_RELAXED_SIMD_TEST(F32x4Qfms)149 WASM_RELAXED_SIMD_TEST(F32x4Qfms) {
150   WasmRunner<int32_t, float, float, float> r(execution_tier);
151   // Set up global to hold mask output.
152   float* g = r.builder().AddGlobal<float>(kWasmS128);
153   // Build fn to splat test values, perform compare op, and write the result.
154   byte value1 = 0, value2 = 1, value3 = 2;
155   BUILD(r,
156         WASM_GLOBAL_SET(0, WASM_SIMD_F32x4_QFMS(
157                                WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value1)),
158                                WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value2)),
159                                WASM_SIMD_F32x4_SPLAT(WASM_LOCAL_GET(value3)))),
160         WASM_ONE);
161 
162   for (FMOperation<float> x : qfms_vector<float>()) {
163     r.Call(x.a, x.b, x.c);
164     float expected =
165         ExpectFused(execution_tier) ? x.fused_result : x.unfused_result;
166     for (int i = 0; i < 4; i++) {
167       float actual = LANE(g, i);
168       CheckFloatResult(x.a, x.b, expected, actual, true /* exact */);
169     }
170   }
171 }
172 
WASM_RELAXED_SIMD_TEST(F64x2Qfma)173 WASM_RELAXED_SIMD_TEST(F64x2Qfma) {
174   WasmRunner<int32_t, double, double, double> r(execution_tier);
175   // Set up global to hold mask output.
176   double* g = r.builder().AddGlobal<double>(kWasmS128);
177   // Build fn to splat test values, perform compare op, and write the result.
178   byte value1 = 0, value2 = 1, value3 = 2;
179   BUILD(r,
180         WASM_GLOBAL_SET(0, WASM_SIMD_F64x2_QFMA(
181                                WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value1)),
182                                WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value2)),
183                                WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value3)))),
184         WASM_ONE);
185 
186   for (FMOperation<double> x : qfma_vector<double>()) {
187     r.Call(x.a, x.b, x.c);
188     double expected =
189         ExpectFused(execution_tier) ? x.fused_result : x.unfused_result;
190     for (int i = 0; i < 2; i++) {
191       double actual = LANE(g, i);
192       CheckDoubleResult(x.a, x.b, expected, actual, true /* exact */);
193     }
194   }
195 }
196 
WASM_RELAXED_SIMD_TEST(F64x2Qfms)197 WASM_RELAXED_SIMD_TEST(F64x2Qfms) {
198   WasmRunner<int32_t, double, double, double> r(execution_tier);
199   // Set up global to hold mask output.
200   double* g = r.builder().AddGlobal<double>(kWasmS128);
201   // Build fn to splat test values, perform compare op, and write the result.
202   byte value1 = 0, value2 = 1, value3 = 2;
203   BUILD(r,
204         WASM_GLOBAL_SET(0, WASM_SIMD_F64x2_QFMS(
205                                WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value1)),
206                                WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value2)),
207                                WASM_SIMD_F64x2_SPLAT(WASM_LOCAL_GET(value3)))),
208         WASM_ONE);
209 
210   for (FMOperation<double> x : qfms_vector<double>()) {
211     r.Call(x.a, x.b, x.c);
212     double expected =
213         ExpectFused(execution_tier) ? x.fused_result : x.unfused_result;
214     for (int i = 0; i < 2; i++) {
215       double actual = LANE(g, i);
216       CheckDoubleResult(x.a, x.b, expected, actual, true /* exact */);
217     }
218   }
219 }
220 #endif  // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
221         // V8_TARGET_ARCH_PPC64
222 
WASM_RELAXED_SIMD_TEST(F32x4RecipApprox)223 WASM_RELAXED_SIMD_TEST(F32x4RecipApprox) {
224   RunF32x4UnOpTest(execution_tier, kExprF32x4RecipApprox, base::Recip,
225                    false /* !exact */);
226 }
227 
WASM_RELAXED_SIMD_TEST(F32x4RecipSqrtApprox)228 WASM_RELAXED_SIMD_TEST(F32x4RecipSqrtApprox) {
229   RunF32x4UnOpTest(execution_tier, kExprF32x4RecipSqrtApprox, base::RecipSqrt,
230                    false /* !exact */);
231 }
232 
233 #undef WASM_RELAXED_SIMD_TEST
234 }  // namespace test_run_wasm_relaxed_simd
235 }  // namespace wasm
236 }  // namespace internal
237 }  // namespace v8
238