1 // Copyright (c) 2015-2016 The Khronos Group Inc.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <cfloat>
16 #include <cmath>
17 #include <cstdio>
18 #include <limits>
19 #include <sstream>
20 #include <string>
21 #include <tuple>
22 #include <utility>
23 #include <vector>
24 
25 #include "gmock/gmock.h"
26 #include "source/util/hex_float.h"
27 #include "test/unit_spirv.h"
28 
29 namespace spvtools {
30 namespace utils {
31 namespace {
32 
33 using ::testing::Eq;
34 
35 // In this file "encode" means converting a number into a string,
36 // and "decode" means converting a string into a number.
37 
38 using HexFloatTest =
39     ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
40 using DecodeHexFloatTest =
41     ::testing::TestWithParam<std::pair<std::string, FloatProxy<float>>>;
42 using HexDoubleTest =
43     ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
44 using DecodeHexDoubleTest =
45     ::testing::TestWithParam<std::pair<std::string, FloatProxy<double>>>;
46 using RoundTripFloatTest = ::testing::TestWithParam<float>;
47 using RoundTripDoubleTest = ::testing::TestWithParam<double>;
48 
49 // Hex-encodes a float value.
50 template <typename T>
EncodeViaHexFloat(const T & value)51 std::string EncodeViaHexFloat(const T& value) {
52   std::stringstream ss;
53   ss << HexFloat<T>(value);
54   return ss.str();
55 }
56 
57 // The following two tests can't be DRY because they take different parameter
58 // types.
59 
TEST_P(HexFloatTest,EncodeCorrectly)60 TEST_P(HexFloatTest, EncodeCorrectly) {
61   EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
62 }
63 
TEST_P(HexDoubleTest,EncodeCorrectly)64 TEST_P(HexDoubleTest, EncodeCorrectly) {
65   EXPECT_THAT(EncodeViaHexFloat(GetParam().first), Eq(GetParam().second));
66 }
67 
68 // Decodes a hex-float string.
69 template <typename T>
Decode(const std::string & str)70 FloatProxy<T> Decode(const std::string& str) {
71   HexFloat<FloatProxy<T>> decoded(0.f);
72   EXPECT_TRUE((std::stringstream(str) >> decoded).eof());
73   return decoded.value();
74 }
75 
TEST_P(HexFloatTest,DecodeCorrectly)76 TEST_P(HexFloatTest, DecodeCorrectly) {
77   EXPECT_THAT(Decode<float>(GetParam().second), Eq(GetParam().first));
78 }
79 
TEST_P(HexDoubleTest,DecodeCorrectly)80 TEST_P(HexDoubleTest, DecodeCorrectly) {
81   EXPECT_THAT(Decode<double>(GetParam().second), Eq(GetParam().first));
82 }
83 
84 INSTANTIATE_TEST_SUITE_P(
85     Float32Tests, HexFloatTest,
86     ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
87         {0.f, "0x0p+0"},
88         {1.f, "0x1p+0"},
89         {2.f, "0x1p+1"},
90         {3.f, "0x1.8p+1"},
91         {0.5f, "0x1p-1"},
92         {0.25f, "0x1p-2"},
93         {0.75f, "0x1.8p-1"},
94         {-0.f, "-0x0p+0"},
95         {-1.f, "-0x1p+0"},
96         {-0.5f, "-0x1p-1"},
97         {-0.25f, "-0x1p-2"},
98         {-0.75f, "-0x1.8p-1"},
99 
100         // Larger numbers
101         {512.f, "0x1p+9"},
102         {-512.f, "-0x1p+9"},
103         {1024.f, "0x1p+10"},
104         {-1024.f, "-0x1p+10"},
105         {1024.f + 8.f, "0x1.02p+10"},
106         {-1024.f - 8.f, "-0x1.02p+10"},
107 
108         // Small numbers
109         {1.0f / 512.f, "0x1p-9"},
110         {1.0f / -512.f, "-0x1p-9"},
111         {1.0f / 1024.f, "0x1p-10"},
112         {1.0f / -1024.f, "-0x1p-10"},
113         {1.0f / 1024.f + 1.0f / 8.f, "0x1.02p-3"},
114         {1.0f / -1024.f - 1.0f / 8.f, "-0x1.02p-3"},
115 
116         // lowest non-denorm
117         {float(ldexp(1.0f, -126)), "0x1p-126"},
118         {float(ldexp(-1.0f, -126)), "-0x1p-126"},
119 
120         // Denormalized values
121         {float(ldexp(1.0f, -127)), "0x1p-127"},
122         {float(ldexp(1.0f, -127) / 2.0f), "0x1p-128"},
123         {float(ldexp(1.0f, -127) / 4.0f), "0x1p-129"},
124         {float(ldexp(1.0f, -127) / 8.0f), "0x1p-130"},
125         {float(ldexp(-1.0f, -127)), "-0x1p-127"},
126         {float(ldexp(-1.0f, -127) / 2.0f), "-0x1p-128"},
127         {float(ldexp(-1.0f, -127) / 4.0f), "-0x1p-129"},
128         {float(ldexp(-1.0f, -127) / 8.0f), "-0x1p-130"},
129 
130         {float(ldexp(1.0, -127) + (ldexp(1.0, -127) / 2.0f)), "0x1.8p-127"},
131         {float(ldexp(1.0, -127) / 2.0 + (ldexp(1.0, -127) / 4.0f)),
132          "0x1.8p-128"},
133 
134     })));
135 
136 INSTANTIATE_TEST_SUITE_P(
137     Float32NanTests, HexFloatTest,
138     ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
139         // Various NAN and INF cases
140         {uint32_t(0xFF800000), "-0x1p+128"},         // -inf
141         {uint32_t(0x7F800000), "0x1p+128"},          // inf
142         {uint32_t(0xFFC00000), "-0x1.8p+128"},       // -nan
143         {uint32_t(0xFF800100), "-0x1.0002p+128"},    // -nan
144         {uint32_t(0xFF800c00), "-0x1.0018p+128"},    // -nan
145         {uint32_t(0xFF80F000), "-0x1.01ep+128"},     // -nan
146         {uint32_t(0xFFFFFFFF), "-0x1.fffffep+128"},  // -nan
147         {uint32_t(0x7FC00000), "0x1.8p+128"},        // +nan
148         {uint32_t(0x7F800100), "0x1.0002p+128"},     // +nan
149         {uint32_t(0x7f800c00), "0x1.0018p+128"},     // +nan
150         {uint32_t(0x7F80F000), "0x1.01ep+128"},      // +nan
151         {uint32_t(0x7FFFFFFF), "0x1.fffffep+128"},   // +nan
152     })));
153 
154 INSTANTIATE_TEST_SUITE_P(
155     Float64Tests, HexDoubleTest,
156     ::testing::ValuesIn(
157         std::vector<std::pair<FloatProxy<double>, std::string>>({
158             {0., "0x0p+0"},
159             {1., "0x1p+0"},
160             {2., "0x1p+1"},
161             {3., "0x1.8p+1"},
162             {0.5, "0x1p-1"},
163             {0.25, "0x1p-2"},
164             {0.75, "0x1.8p-1"},
165             {-0., "-0x0p+0"},
166             {-1., "-0x1p+0"},
167             {-0.5, "-0x1p-1"},
168             {-0.25, "-0x1p-2"},
169             {-0.75, "-0x1.8p-1"},
170 
171             // Larger numbers
172             {512., "0x1p+9"},
173             {-512., "-0x1p+9"},
174             {1024., "0x1p+10"},
175             {-1024., "-0x1p+10"},
176             {1024. + 8., "0x1.02p+10"},
177             {-1024. - 8., "-0x1.02p+10"},
178 
179             // Large outside the range of normal floats
180             {ldexp(1.0, 128), "0x1p+128"},
181             {ldexp(1.0, 129), "0x1p+129"},
182             {ldexp(-1.0, 128), "-0x1p+128"},
183             {ldexp(-1.0, 129), "-0x1p+129"},
184             {ldexp(1.0, 128) + ldexp(1.0, 90), "0x1.0000000004p+128"},
185             {ldexp(1.0, 129) + ldexp(1.0, 120), "0x1.008p+129"},
186             {ldexp(-1.0, 128) + ldexp(1.0, 90), "-0x1.fffffffff8p+127"},
187             {ldexp(-1.0, 129) + ldexp(1.0, 120), "-0x1.ffp+128"},
188 
189             // Small numbers
190             {1.0 / 512., "0x1p-9"},
191             {1.0 / -512., "-0x1p-9"},
192             {1.0 / 1024., "0x1p-10"},
193             {1.0 / -1024., "-0x1p-10"},
194             {1.0 / 1024. + 1.0 / 8., "0x1.02p-3"},
195             {1.0 / -1024. - 1.0 / 8., "-0x1.02p-3"},
196 
197             // Small outside the range of normal floats
198             {ldexp(1.0, -128), "0x1p-128"},
199             {ldexp(1.0, -129), "0x1p-129"},
200             {ldexp(-1.0, -128), "-0x1p-128"},
201             {ldexp(-1.0, -129), "-0x1p-129"},
202             {ldexp(1.0, -128) + ldexp(1.0, -90), "0x1.0000000004p-90"},
203             {ldexp(1.0, -129) + ldexp(1.0, -120), "0x1.008p-120"},
204             {ldexp(-1.0, -128) + ldexp(1.0, -90), "0x1.fffffffff8p-91"},
205             {ldexp(-1.0, -129) + ldexp(1.0, -120), "0x1.ffp-121"},
206 
207             // lowest non-denorm
208             {ldexp(1.0, -1022), "0x1p-1022"},
209             {ldexp(-1.0, -1022), "-0x1p-1022"},
210 
211             // Denormalized values
212             {ldexp(1.0, -1023), "0x1p-1023"},
213             {ldexp(1.0, -1023) / 2.0, "0x1p-1024"},
214             {ldexp(1.0, -1023) / 4.0, "0x1p-1025"},
215             {ldexp(1.0, -1023) / 8.0, "0x1p-1026"},
216             {ldexp(-1.0, -1024), "-0x1p-1024"},
217             {ldexp(-1.0, -1024) / 2.0, "-0x1p-1025"},
218             {ldexp(-1.0, -1024) / 4.0, "-0x1p-1026"},
219             {ldexp(-1.0, -1024) / 8.0, "-0x1p-1027"},
220 
221             {ldexp(1.0, -1023) + (ldexp(1.0, -1023) / 2.0), "0x1.8p-1023"},
222             {ldexp(1.0, -1023) / 2.0 + (ldexp(1.0, -1023) / 4.0),
223              "0x1.8p-1024"},
224 
225         })));
226 
227 INSTANTIATE_TEST_SUITE_P(
228     Float64NanTests, HexDoubleTest,
229     ::testing::ValuesIn(std::vector<
230                         std::pair<FloatProxy<double>, std::string>>({
231         // Various NAN and INF cases
232         {uint64_t(0xFFF0000000000000LL), "-0x1p+1024"},                // -inf
233         {uint64_t(0x7FF0000000000000LL), "0x1p+1024"},                 // +inf
234         {uint64_t(0xFFF8000000000000LL), "-0x1.8p+1024"},              // -nan
235         {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},             // -nan
236         {uint64_t(0xFFF0000000000001LL), "-0x1.0000000000001p+1024"},  // -nan
237         {uint64_t(0xFFF0000300000000LL), "-0x1.00003p+1024"},          // -nan
238         {uint64_t(0xFFFFFFFFFFFFFFFFLL), "-0x1.fffffffffffffp+1024"},  // -nan
239         {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},               // +nan
240         {uint64_t(0x7FF0F00000000000LL), "0x1.0fp+1024"},              // +nan
241         {uint64_t(0x7FF0000000000001LL), "0x1.0000000000001p+1024"},   // -nan
242         {uint64_t(0x7FF0000300000000LL), "0x1.00003p+1024"},           // -nan
243         {uint64_t(0x7FFFFFFFFFFFFFFFLL), "0x1.fffffffffffffp+1024"},   // -nan
244     })));
245 
246 // Tests that encoding a value and decoding it again restores
247 // the same value.
TEST_P(RoundTripFloatTest,CanStoreAccurately)248 TEST_P(RoundTripFloatTest, CanStoreAccurately) {
249   std::stringstream ss;
250   ss << FloatProxy<float>(GetParam());
251   ss.seekg(0);
252   FloatProxy<float> res;
253   ss >> res;
254   EXPECT_THAT(GetParam(), Eq(res.getAsFloat()));
255 }
256 
TEST_P(RoundTripDoubleTest,CanStoreAccurately)257 TEST_P(RoundTripDoubleTest, CanStoreAccurately) {
258   std::stringstream ss;
259   ss << FloatProxy<double>(GetParam());
260   ss.seekg(0);
261   FloatProxy<double> res;
262   ss >> res;
263   EXPECT_THAT(GetParam(), Eq(res.getAsFloat()));
264 }
265 
266 INSTANTIATE_TEST_SUITE_P(
267     Float32StoreTests, RoundTripFloatTest,
268     ::testing::ValuesIn(std::vector<float>(
269         {// Value requiring more than 6 digits of precision to be
270          // represented accurately.
271          3.0000002f})));
272 
273 INSTANTIATE_TEST_SUITE_P(
274     Float64StoreTests, RoundTripDoubleTest,
275     ::testing::ValuesIn(std::vector<double>(
276         {// Value requiring more than 15 digits of precision to be
277          // represented accurately.
278          1.5000000000000002})));
279 
TEST(HexFloatStreamTest,OperatorLeftShiftPreservesFloatAndFill)280 TEST(HexFloatStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
281   std::stringstream s;
282   s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
283     << FloatProxy<float>(uint32_t(0xFF800100)) << " " << std::setw(4) << 9;
284   EXPECT_THAT(s.str(), Eq(std::string("xx10 -0x1.0002p+128 xx11")));
285 }
286 
TEST(HexDoubleStreamTest,OperatorLeftShiftPreservesFloatAndFill)287 TEST(HexDoubleStreamTest, OperatorLeftShiftPreservesFloatAndFill) {
288   std::stringstream s;
289   s << std::setw(4) << std::oct << std::setfill('x') << 8 << " "
290     << FloatProxy<double>(uint64_t(0x7FF0F00000000000LL)) << " " << std::setw(4)
291     << 9;
292   EXPECT_THAT(s.str(), Eq(std::string("xx10 0x1.0fp+1024 xx11")));
293 }
294 
TEST_P(DecodeHexFloatTest,DecodeCorrectly)295 TEST_P(DecodeHexFloatTest, DecodeCorrectly) {
296   EXPECT_THAT(Decode<float>(GetParam().first), Eq(GetParam().second));
297 }
298 
TEST_P(DecodeHexDoubleTest,DecodeCorrectly)299 TEST_P(DecodeHexDoubleTest, DecodeCorrectly) {
300   EXPECT_THAT(Decode<double>(GetParam().first), Eq(GetParam().second));
301 }
302 
303 INSTANTIATE_TEST_SUITE_P(
304     Float32DecodeTests, DecodeHexFloatTest,
305     ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
306         {"0x0p+000", 0.f},
307         {"0x0p0", 0.f},
308         {"0x0p-0", 0.f},
309 
310         // flush to zero cases
311         {"0x1p-500", 0.f},  // Exponent underflows.
312         {"-0x1p-500", -0.f},
313         {"0x0.00000000001p-126", 0.f},  // Fraction causes underflow.
314         {"-0x0.0000000001p-127", -0.f},
315         {"-0x0.01p-142", -0.f},  // Fraction causes additional underflow.
316         {"0x0.01p-142", 0.f},
317 
318         // Some floats that do not encode the same way as they decode.
319         {"0x2p+0", 2.f},
320         {"0xFFp+0", 255.f},
321         {"0x0.8p+0", 0.5f},
322         {"0x0.4p+0", 0.25f},
323     })));
324 
325 INSTANTIATE_TEST_SUITE_P(
326     Float32DecodeInfTests, DecodeHexFloatTest,
327     ::testing::ValuesIn(std::vector<std::pair<std::string, FloatProxy<float>>>({
328         // inf cases
329         {"-0x1p+128", uint32_t(0xFF800000)},   // -inf
330         {"0x32p+127", uint32_t(0x7F800000)},   // inf
331         {"0x32p+500", uint32_t(0x7F800000)},   // inf
332         {"-0x32p+127", uint32_t(0xFF800000)},  // -inf
333     })));
334 
335 INSTANTIATE_TEST_SUITE_P(
336     Float64DecodeTests, DecodeHexDoubleTest,
337     ::testing::ValuesIn(
338         std::vector<std::pair<std::string, FloatProxy<double>>>({
339             {"0x0p+000", 0.},
340             {"0x0p0", 0.},
341             {"0x0p-0", 0.},
342 
343             // flush to zero cases
344             {"0x1p-5000", 0.},  // Exponent underflows.
345             {"-0x1p-5000", -0.},
346             {"0x0.0000000000000001p-1023", 0.},  // Fraction causes underflow.
347             {"-0x0.000000000000001p-1024", -0.},
348             {"-0x0.01p-1090", -0.f},  // Fraction causes additional underflow.
349             {"0x0.01p-1090", 0.},
350 
351             // Some floats that do not encode the same way as they decode.
352             {"0x2p+0", 2.},
353             {"0xFFp+0", 255.},
354             {"0x0.8p+0", 0.5},
355             {"0x0.4p+0", 0.25},
356         })));
357 
358 INSTANTIATE_TEST_SUITE_P(
359     Float64DecodeInfTests, DecodeHexDoubleTest,
360     ::testing::ValuesIn(
361         std::vector<std::pair<std::string, FloatProxy<double>>>({
362             // inf cases
363             {"-0x1p+1024", uint64_t(0xFFF0000000000000)},   // -inf
364             {"0x32p+1023", uint64_t(0x7FF0000000000000)},   // inf
365             {"0x32p+5000", uint64_t(0x7FF0000000000000)},   // inf
366             {"-0x32p+1023", uint64_t(0xFFF0000000000000)},  // -inf
367         })));
368 
TEST(FloatProxy,ValidConversion)369 TEST(FloatProxy, ValidConversion) {
370   EXPECT_THAT(FloatProxy<float>(1.f).getAsFloat(), Eq(1.0f));
371   EXPECT_THAT(FloatProxy<float>(32.f).getAsFloat(), Eq(32.0f));
372   EXPECT_THAT(FloatProxy<float>(-1.f).getAsFloat(), Eq(-1.0f));
373   EXPECT_THAT(FloatProxy<float>(0.f).getAsFloat(), Eq(0.0f));
374   EXPECT_THAT(FloatProxy<float>(-0.f).getAsFloat(), Eq(-0.0f));
375   EXPECT_THAT(FloatProxy<float>(1.2e32f).getAsFloat(), Eq(1.2e32f));
376 
377   EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0xFF800000)).getAsFloat()));
378   EXPECT_TRUE(std::isinf(FloatProxy<float>(uint32_t(0x7F800000)).getAsFloat()));
379   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFC00000)).getAsFloat()));
380   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800100)).getAsFloat()));
381   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF800c00)).getAsFloat()));
382   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFF80F000)).getAsFloat()));
383   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0xFFFFFFFF)).getAsFloat()));
384   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FC00000)).getAsFloat()));
385   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F800100)).getAsFloat()));
386   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7f800c00)).getAsFloat()));
387   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7F80F000)).getAsFloat()));
388   EXPECT_TRUE(std::isnan(FloatProxy<float>(uint32_t(0x7FFFFFFF)).getAsFloat()));
389 
390   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800000)).data(), Eq(0xFF800000u));
391   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800000)).data(), Eq(0x7F800000u));
392   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFC00000)).data(), Eq(0xFFC00000u));
393   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800100)).data(), Eq(0xFF800100u));
394   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF800c00)).data(), Eq(0xFF800c00u));
395   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFF80F000)).data(), Eq(0xFF80F000u));
396   EXPECT_THAT(FloatProxy<float>(uint32_t(0xFFFFFFFF)).data(), Eq(0xFFFFFFFFu));
397   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FC00000)).data(), Eq(0x7FC00000u));
398   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F800100)).data(), Eq(0x7F800100u));
399   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7f800c00)).data(), Eq(0x7f800c00u));
400   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7F80F000)).data(), Eq(0x7F80F000u));
401   EXPECT_THAT(FloatProxy<float>(uint32_t(0x7FFFFFFF)).data(), Eq(0x7FFFFFFFu));
402 }
403 
TEST(FloatProxy,Nan)404 TEST(FloatProxy, Nan) {
405   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFC00000)).isNan());
406   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800100)).isNan());
407   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF800c00)).isNan());
408   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFF80F000)).isNan());
409   EXPECT_TRUE(FloatProxy<float>(uint32_t(0xFFFFFFFF)).isNan());
410   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FC00000)).isNan());
411   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F800100)).isNan());
412   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7f800c00)).isNan());
413   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7F80F000)).isNan());
414   EXPECT_TRUE(FloatProxy<float>(uint32_t(0x7FFFFFFF)).isNan());
415 }
416 
TEST(FloatProxy,Negation)417 TEST(FloatProxy, Negation) {
418   EXPECT_THAT((-FloatProxy<float>(1.f)).getAsFloat(), Eq(-1.0f));
419   EXPECT_THAT((-FloatProxy<float>(0.f)).getAsFloat(), Eq(-0.0f));
420 
421   EXPECT_THAT((-FloatProxy<float>(-1.f)).getAsFloat(), Eq(1.0f));
422   EXPECT_THAT((-FloatProxy<float>(-0.f)).getAsFloat(), Eq(0.0f));
423 
424   EXPECT_THAT((-FloatProxy<float>(32.f)).getAsFloat(), Eq(-32.0f));
425   EXPECT_THAT((-FloatProxy<float>(-32.f)).getAsFloat(), Eq(32.0f));
426 
427   EXPECT_THAT((-FloatProxy<float>(1.2e32f)).getAsFloat(), Eq(-1.2e32f));
428   EXPECT_THAT((-FloatProxy<float>(-1.2e32f)).getAsFloat(), Eq(1.2e32f));
429 
430   EXPECT_THAT(
431       (-FloatProxy<float>(std::numeric_limits<float>::infinity())).getAsFloat(),
432       Eq(-std::numeric_limits<float>::infinity()));
433   EXPECT_THAT((-FloatProxy<float>(-std::numeric_limits<float>::infinity()))
434                   .getAsFloat(),
435               Eq(std::numeric_limits<float>::infinity()));
436 }
437 
438 // Test conversion of FloatProxy values to strings.
439 //
440 // In previous cases, we always wrapped the FloatProxy value in a HexFloat
441 // before conversion to a string.  In the following cases, the FloatProxy
442 // decides for itself whether to print as a regular number or as a hex float.
443 
444 using FloatProxyFloatTest =
445     ::testing::TestWithParam<std::pair<FloatProxy<float>, std::string>>;
446 using FloatProxyDoubleTest =
447     ::testing::TestWithParam<std::pair<FloatProxy<double>, std::string>>;
448 
449 // Converts a float value to a string via a FloatProxy.
450 template <typename T>
EncodeViaFloatProxy(const T & value)451 std::string EncodeViaFloatProxy(const T& value) {
452   std::stringstream ss;
453   ss << value;
454   return ss.str();
455 }
456 
457 // Converts a floating point string so that the exponent prefix
458 // is 'e', and the exponent value does not have leading zeros.
459 // The Microsoft runtime library likes to write things like "2.5E+010".
460 // Convert that to "2.5e+10".
461 // We don't care what happens to strings that are not floating point
462 // strings.
NormalizeExponentInFloatString(std::string in)463 std::string NormalizeExponentInFloatString(std::string in) {
464   std::string result;
465   // Reserve one spot for the terminating null, even when the sscanf fails.
466   std::vector<char> prefix(in.size() + 1);
467   char e;
468   char plus_or_minus;
469   int exponent;  // in base 10
470   if ((4 == std::sscanf(in.c_str(), "%[-+.0123456789]%c%c%d", prefix.data(), &e,
471                         &plus_or_minus, &exponent)) &&
472       (e == 'e' || e == 'E') &&
473       (plus_or_minus == '-' || plus_or_minus == '+')) {
474     // It looks like a floating point value with exponent.
475     std::stringstream out;
476     out << prefix.data() << 'e' << plus_or_minus << exponent;
477     result = out.str();
478   } else {
479     result = in;
480   }
481   return result;
482 }
483 
TEST(NormalizeFloat,Sample)484 TEST(NormalizeFloat, Sample) {
485   EXPECT_THAT(NormalizeExponentInFloatString(""), Eq(""));
486   EXPECT_THAT(NormalizeExponentInFloatString("1e-12"), Eq("1e-12"));
487   EXPECT_THAT(NormalizeExponentInFloatString("1E+14"), Eq("1e+14"));
488   EXPECT_THAT(NormalizeExponentInFloatString("1e-0012"), Eq("1e-12"));
489   EXPECT_THAT(NormalizeExponentInFloatString("1.263E+014"), Eq("1.263e+14"));
490 }
491 
492 // The following two tests can't be DRY because they take different parameter
493 // types.
TEST_P(FloatProxyFloatTest,EncodeCorrectly)494 TEST_P(FloatProxyFloatTest, EncodeCorrectly) {
495   EXPECT_THAT(
496       NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
497       Eq(GetParam().second));
498 }
499 
TEST_P(FloatProxyDoubleTest,EncodeCorrectly)500 TEST_P(FloatProxyDoubleTest, EncodeCorrectly) {
501   EXPECT_THAT(
502       NormalizeExponentInFloatString(EncodeViaFloatProxy(GetParam().first)),
503       Eq(GetParam().second));
504 }
505 
506 INSTANTIATE_TEST_SUITE_P(
507     Float32Tests, FloatProxyFloatTest,
508     ::testing::ValuesIn(std::vector<std::pair<FloatProxy<float>, std::string>>({
509         // Zero
510         {0.f, "0"},
511         // Normal numbers
512         {1.f, "1"},
513         {-0.25f, "-0.25"},
514         {1000.0f, "1000"},
515 
516         // Still normal numbers, but with large magnitude exponents.
517         {float(ldexp(1.f, 126)), "8.50705917e+37"},
518         {float(ldexp(-1.f, -126)), "-1.17549435e-38"},
519 
520         // denormalized values are printed as hex floats.
521         {float(ldexp(1.0f, -127)), "0x1p-127"},
522         {float(ldexp(1.5f, -128)), "0x1.8p-128"},
523         {float(ldexp(1.25, -129)), "0x1.4p-129"},
524         {float(ldexp(1.125, -130)), "0x1.2p-130"},
525         {float(ldexp(-1.0f, -127)), "-0x1p-127"},
526         {float(ldexp(-1.0f, -128)), "-0x1p-128"},
527         {float(ldexp(-1.0f, -129)), "-0x1p-129"},
528         {float(ldexp(-1.5f, -130)), "-0x1.8p-130"},
529 
530         // NaNs
531         {FloatProxy<float>(uint32_t(0xFFC00000)), "-0x1.8p+128"},
532         {FloatProxy<float>(uint32_t(0xFF800100)), "-0x1.0002p+128"},
533 
534         {std::numeric_limits<float>::infinity(), "0x1p+128"},
535         {-std::numeric_limits<float>::infinity(), "-0x1p+128"},
536     })));
537 
538 INSTANTIATE_TEST_SUITE_P(
539     Float64Tests, FloatProxyDoubleTest,
540     ::testing::ValuesIn(
541         std::vector<std::pair<FloatProxy<double>, std::string>>({
542             {0., "0"},
543             {1., "1"},
544             {-0.25, "-0.25"},
545             {1000.0, "1000"},
546 
547             // Large outside the range of normal floats
548             {ldexp(1.0, 128), "3.4028236692093846e+38"},
549             {ldexp(1.5, 129), "1.0208471007628154e+39"},
550             {ldexp(-1.0, 128), "-3.4028236692093846e+38"},
551             {ldexp(-1.5, 129), "-1.0208471007628154e+39"},
552 
553             // Small outside the range of normal floats
554             {ldexp(1.5, -129), "2.2040519077917891e-39"},
555             {ldexp(-1.5, -129), "-2.2040519077917891e-39"},
556 
557             // lowest non-denorm
558             {ldexp(1.0, -1022), "2.2250738585072014e-308"},
559             {ldexp(-1.0, -1022), "-2.2250738585072014e-308"},
560 
561             // Denormalized values
562             {ldexp(1.125, -1023), "0x1.2p-1023"},
563             {ldexp(-1.375, -1024), "-0x1.6p-1024"},
564 
565             // NaNs
566             {uint64_t(0x7FF8000000000000LL), "0x1.8p+1024"},
567             {uint64_t(0xFFF0F00000000000LL), "-0x1.0fp+1024"},
568 
569             // Infinity
570             {std::numeric_limits<double>::infinity(), "0x1p+1024"},
571             {-std::numeric_limits<double>::infinity(), "-0x1p+1024"},
572 
573         })));
574 
575 // double is used so that unbiased_exponent can be used with the output
576 // of ldexp directly.
unbiased_exponent(double f)577 int32_t unbiased_exponent(double f) {
578   return HexFloat<FloatProxy<float>>(static_cast<float>(f))
579       .getUnbiasedNormalizedExponent();
580 }
581 
unbiased_half_exponent(uint16_t f)582 int16_t unbiased_half_exponent(uint16_t f) {
583   return HexFloat<FloatProxy<Float16>>(f).getUnbiasedNormalizedExponent();
584 }
585 
TEST(HexFloatOperationTest,UnbiasedExponent)586 TEST(HexFloatOperationTest, UnbiasedExponent) {
587   // Float cases
588   EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, 0)));
589   EXPECT_EQ(-32, unbiased_exponent(ldexp(1.0f, -32)));
590   EXPECT_EQ(42, unbiased_exponent(ldexp(1.0f, 42)));
591   EXPECT_EQ(125, unbiased_exponent(ldexp(1.0f, 125)));
592 
593   EXPECT_EQ(128,
594             HexFloat<FloatProxy<float>>(std::numeric_limits<float>::infinity())
595                 .getUnbiasedNormalizedExponent());
596 
597   EXPECT_EQ(-100, unbiased_exponent(ldexp(1.0f, -100)));
598   EXPECT_EQ(-127, unbiased_exponent(ldexp(1.0f, -127)));  // First denorm
599   EXPECT_EQ(-128, unbiased_exponent(ldexp(1.0f, -128)));
600   EXPECT_EQ(-129, unbiased_exponent(ldexp(1.0f, -129)));
601   EXPECT_EQ(-140, unbiased_exponent(ldexp(1.0f, -140)));
602   // Smallest representable number
603   EXPECT_EQ(-126 - 23, unbiased_exponent(ldexp(1.0f, -126 - 23)));
604   // Should get rounded to 0 first.
605   EXPECT_EQ(0, unbiased_exponent(ldexp(1.0f, -127 - 23)));
606 
607   // Float16 cases
608   // The exponent is represented in the bits 0x7C00
609   // The offset is -15
610   EXPECT_EQ(0, unbiased_half_exponent(0x3C00));
611   EXPECT_EQ(3, unbiased_half_exponent(0x4800));
612   EXPECT_EQ(-1, unbiased_half_exponent(0x3800));
613   EXPECT_EQ(-14, unbiased_half_exponent(0x0400));
614   EXPECT_EQ(16, unbiased_half_exponent(0x7C00));
615   EXPECT_EQ(10, unbiased_half_exponent(0x6400));
616 
617   // Smallest representable number
618   EXPECT_EQ(-24, unbiased_half_exponent(0x0001));
619 }
620 
621 // Creates a float that is the sum of 1/(2 ^ fractions[i]) for i in factions
float_fractions(const std::vector<uint32_t> & fractions)622 float float_fractions(const std::vector<uint32_t>& fractions) {
623   float f = 0;
624   for (int32_t i : fractions) {
625     f += std::ldexp(1.0f, -i);
626   }
627   return f;
628 }
629 
630 // Returns the normalized significand of a HexFloat<FloatProxy<float>>
631 // that was created by calling float_fractions with the input fractions,
632 // raised to the power of exp.
normalized_significand(const std::vector<uint32_t> & fractions,uint32_t exp)633 uint32_t normalized_significand(const std::vector<uint32_t>& fractions,
634                                 uint32_t exp) {
635   return HexFloat<FloatProxy<float>>(
636              static_cast<float>(ldexp(float_fractions(fractions), exp)))
637       .getNormalizedSignificand();
638 }
639 
640 // Sets the bits from MSB to LSB of the significand part of a float.
641 // For example 0 would set the bit 23 (counting from LSB to MSB),
642 // and 1 would set the 22nd bit.
bits_set(const std::vector<uint32_t> & bits)643 uint32_t bits_set(const std::vector<uint32_t>& bits) {
644   const uint32_t top_bit = 1u << 22u;
645   uint32_t val = 0;
646   for (uint32_t i : bits) {
647     val |= top_bit >> i;
648   }
649   return val;
650 }
651 
652 // The same as bits_set but for a Float16 value instead of 32-bit floating
653 // point.
half_bits_set(const std::vector<uint32_t> & bits)654 uint16_t half_bits_set(const std::vector<uint32_t>& bits) {
655   const uint32_t top_bit = 1u << 9u;
656   uint32_t val = 0;
657   for (uint32_t i : bits) {
658     val |= top_bit >> i;
659   }
660   return static_cast<uint16_t>(val);
661 }
662 
TEST(HexFloatOperationTest,NormalizedSignificand)663 TEST(HexFloatOperationTest, NormalizedSignificand) {
664   // For normalized numbers (the following) it should be a simple matter
665   // of getting rid of the top implicit bit
666   EXPECT_EQ(bits_set({}), normalized_significand({0}, 0));
667   EXPECT_EQ(bits_set({0}), normalized_significand({0, 1}, 0));
668   EXPECT_EQ(bits_set({0, 1}), normalized_significand({0, 1, 2}, 0));
669   EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 0));
670   EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 32));
671   EXPECT_EQ(bits_set({1}), normalized_significand({0, 2}, 126));
672 
673   // For denormalized numbers we expect the normalized significand to
674   // shift as if it were normalized. This means, in practice that the
675   // top_most set bit will be cut off. Looks very similar to above (on purpose)
676   EXPECT_EQ(bits_set({}),
677             normalized_significand({0}, static_cast<uint32_t>(-127)));
678   EXPECT_EQ(bits_set({3}),
679             normalized_significand({0, 4}, static_cast<uint32_t>(-128)));
680   EXPECT_EQ(bits_set({3}),
681             normalized_significand({0, 4}, static_cast<uint32_t>(-127)));
682   EXPECT_EQ(bits_set({}),
683             normalized_significand({22}, static_cast<uint32_t>(-127)));
684   EXPECT_EQ(bits_set({0}),
685             normalized_significand({21, 22}, static_cast<uint32_t>(-127)));
686 }
687 
688 // Returns the 32-bit floating point value created by
689 // calling setFromSignUnbiasedExponentAndNormalizedSignificand
690 // on a HexFloat<FloatProxy<float>>
set_from_sign(bool negative,int32_t unbiased_exponent,uint32_t significand,bool round_denorm_up)691 float set_from_sign(bool negative, int32_t unbiased_exponent,
692                     uint32_t significand, bool round_denorm_up) {
693   HexFloat<FloatProxy<float>> f(0.f);
694   f.setFromSignUnbiasedExponentAndNormalizedSignificand(
695       negative, unbiased_exponent, significand, round_denorm_up);
696   return f.value().getAsFloat();
697 }
698 
TEST(HexFloatOperationTests,SetFromSignUnbiasedExponentAndNormalizedSignificand)699 TEST(HexFloatOperationTests,
700      SetFromSignUnbiasedExponentAndNormalizedSignificand) {
701   EXPECT_EQ(1.f, set_from_sign(false, 0, 0, false));
702 
703   // Tests insertion of various denormalized numbers with and without round up.
704   EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
705             set_from_sign(false, -149, 0, false));
706   EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
707             set_from_sign(false, -149, 0, true));
708   EXPECT_EQ(0.f, set_from_sign(false, -150, 1, false));
709   EXPECT_EQ(static_cast<float>(ldexp(1.f, -149)),
710             set_from_sign(false, -150, 1, true));
711 
712   EXPECT_EQ(ldexp(1.0f, -127), set_from_sign(false, -127, 0, false));
713   EXPECT_EQ(ldexp(1.0f, -128), set_from_sign(false, -128, 0, false));
714   EXPECT_EQ(float_fractions({0, 1, 2, 5}),
715             set_from_sign(false, 0, bits_set({0, 1, 4}), false));
716   EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -32),
717             set_from_sign(false, -32, bits_set({0, 1, 4}), false));
718   EXPECT_EQ(ldexp(float_fractions({0, 1, 2, 5}), -128),
719             set_from_sign(false, -128, bits_set({0, 1, 4}), false));
720 
721   // The negative cases from above.
722   EXPECT_EQ(-1.f, set_from_sign(true, 0, 0, false));
723   EXPECT_EQ(-ldexp(1.0, -127), set_from_sign(true, -127, 0, false));
724   EXPECT_EQ(-ldexp(1.0, -128), set_from_sign(true, -128, 0, false));
725   EXPECT_EQ(-float_fractions({0, 1, 2, 5}),
726             set_from_sign(true, 0, bits_set({0, 1, 4}), false));
727   EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -32),
728             set_from_sign(true, -32, bits_set({0, 1, 4}), false));
729   EXPECT_EQ(-ldexp(float_fractions({0, 1, 2, 5}), -128),
730             set_from_sign(true, -128, bits_set({0, 1, 4}), false));
731 }
732 
TEST(HexFloatOperationTests,NonRounding)733 TEST(HexFloatOperationTests, NonRounding) {
734   // Rounding from 32-bit hex-float to 32-bit hex-float should be trivial,
735   // except in the denorm case which is a bit more complex.
736   using HF = HexFloat<FloatProxy<float>>;
737   bool carry_bit = false;
738 
739   round_direction rounding[] = {round_direction::kToZero,
740                                 round_direction::kToNearestEven,
741                                 round_direction::kToPositiveInfinity,
742                                 round_direction::kToNegativeInfinity};
743 
744   // Everything fits, so this should be straight-forward
745   for (round_direction round : rounding) {
746     EXPECT_EQ(bits_set({}),
747               HF(0.f).getRoundedNormalizedSignificand<HF>(round, &carry_bit));
748     EXPECT_FALSE(carry_bit);
749 
750     EXPECT_EQ(bits_set({0}),
751               HF(float_fractions({0, 1}))
752                   .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
753     EXPECT_FALSE(carry_bit);
754 
755     EXPECT_EQ(bits_set({1, 3}),
756               HF(float_fractions({0, 2, 4}))
757                   .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
758     EXPECT_FALSE(carry_bit);
759 
760     EXPECT_EQ(
761         bits_set({0, 1, 4}),
762         HF(static_cast<float>(-ldexp(float_fractions({0, 1, 2, 5}), -128)))
763             .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
764     EXPECT_FALSE(carry_bit);
765 
766     EXPECT_EQ(bits_set({0, 1, 4, 22}),
767               HF(static_cast<float>(float_fractions({0, 1, 2, 5, 23})))
768                   .getRoundedNormalizedSignificand<HF>(round, &carry_bit));
769     EXPECT_FALSE(carry_bit);
770   }
771 }
772 
773 using RD = round_direction;
774 struct RoundSignificandCase {
775   float source_float;
776   std::pair<int16_t, bool> expected_results;
777   round_direction round;
778 };
779 
780 using HexFloatRoundTest = ::testing::TestWithParam<RoundSignificandCase>;
781 
TEST_P(HexFloatRoundTest,RoundDownToFP16)782 TEST_P(HexFloatRoundTest, RoundDownToFP16) {
783   using HF = HexFloat<FloatProxy<float>>;
784   using HF16 = HexFloat<FloatProxy<Float16>>;
785 
786   HF input_value(GetParam().source_float);
787   bool carry_bit = false;
788   EXPECT_EQ(GetParam().expected_results.first,
789             input_value.getRoundedNormalizedSignificand<HF16>(GetParam().round,
790                                                               &carry_bit));
791   EXPECT_EQ(carry_bit, GetParam().expected_results.second);
792 }
793 
794 // clang-format off
795 INSTANTIATE_TEST_SUITE_P(F32ToF16, HexFloatRoundTest,
796   ::testing::ValuesIn(std::vector<RoundSignificandCase>(
797   {
798     {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToZero},
799     {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNearestEven},
800     {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToPositiveInfinity},
801     {float_fractions({0}), std::make_pair(half_bits_set({}), false), RD::kToNegativeInfinity},
802     {float_fractions({0, 1}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
803 
804     {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
805     {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
806     {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
807     {float_fractions({0, 1, 11}), std::make_pair(half_bits_set({0}), false), RD::kToNearestEven},
808 
809     {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToZero},
810     {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToPositiveInfinity},
811     {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
812     {float_fractions({0, 1, 10, 11}), std::make_pair(half_bits_set({0, 8}), false), RD::kToNearestEven},
813 
814     {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
815     {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
816     {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
817     {float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
818 
819     {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
820     {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0}), false), RD::kToPositiveInfinity},
821     {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNegativeInfinity},
822     {-float_fractions({0, 1, 11, 12}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
823 
824     {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToZero},
825     {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
826     {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
827     {float_fractions({0, 1, 11, 22}), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
828 
829     // Carries
830     {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToZero},
831     {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToPositiveInfinity},
832     {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), false), RD::kToNegativeInfinity},
833     {float_fractions({0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}), std::make_pair(half_bits_set({}), true), RD::kToNearestEven},
834 
835     // Cases where original number was denorm. Note: this should have no effect
836     // the number is pre-normalized.
837     {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -128)), std::make_pair(half_bits_set({0}), false), RD::kToZero},
838     {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -129)), std::make_pair(half_bits_set({0, 9}), false), RD::kToPositiveInfinity},
839     {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -131)), std::make_pair(half_bits_set({0}), false), RD::kToNegativeInfinity},
840     {static_cast<float>(ldexp(float_fractions({0, 1, 11, 13}), -130)), std::make_pair(half_bits_set({0, 9}), false), RD::kToNearestEven},
841   })));
842 // clang-format on
843 
844 struct UpCastSignificandCase {
845   uint16_t source_half;
846   uint32_t expected_result;
847 };
848 
849 using HexFloatRoundUpSignificandTest =
850     ::testing::TestWithParam<UpCastSignificandCase>;
TEST_P(HexFloatRoundUpSignificandTest,Widening)851 TEST_P(HexFloatRoundUpSignificandTest, Widening) {
852   using HF = HexFloat<FloatProxy<float>>;
853   using HF16 = HexFloat<FloatProxy<Float16>>;
854   bool carry_bit = false;
855 
856   round_direction rounding[] = {round_direction::kToZero,
857                                 round_direction::kToNearestEven,
858                                 round_direction::kToPositiveInfinity,
859                                 round_direction::kToNegativeInfinity};
860 
861   // Everything fits, so everything should just be bit-shifts.
862   for (round_direction round : rounding) {
863     carry_bit = false;
864     HF16 input_value(GetParam().source_half);
865     EXPECT_EQ(
866         GetParam().expected_result,
867         input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit))
868         << std::hex << "0x"
869         << input_value.getRoundedNormalizedSignificand<HF>(round, &carry_bit)
870         << "  0x" << GetParam().expected_result;
871     EXPECT_FALSE(carry_bit);
872   }
873 }
874 
875 INSTANTIATE_TEST_SUITE_P(
876     F16toF32, HexFloatRoundUpSignificandTest,
877     // 0xFC00 of the source 16-bit hex value cover the sign and the exponent.
878     // They are ignored for this test.
879     ::testing::ValuesIn(std::vector<UpCastSignificandCase>({
880         {0x3F00, 0x600000},
881         {0x0F00, 0x600000},
882         {0x0F01, 0x602000},
883         {0x0FFF, 0x7FE000},
884     })));
885 
886 struct DownCastTest {
887   float source_float;
888   uint16_t expected_half;
889   std::vector<round_direction> directions;
890 };
891 
get_round_text(round_direction direction)892 std::string get_round_text(round_direction direction) {
893 #define CASE(round_direction) \
894   case round_direction:       \
895     return #round_direction
896 
897   switch (direction) {
898     CASE(round_direction::kToZero);
899     CASE(round_direction::kToPositiveInfinity);
900     CASE(round_direction::kToNegativeInfinity);
901     CASE(round_direction::kToNearestEven);
902   }
903 #undef CASE
904   return "";
905 }
906 
907 using HexFloatFP32To16Tests = ::testing::TestWithParam<DownCastTest>;
908 
TEST_P(HexFloatFP32To16Tests,NarrowingCasts)909 TEST_P(HexFloatFP32To16Tests, NarrowingCasts) {
910   using HF = HexFloat<FloatProxy<float>>;
911   using HF16 = HexFloat<FloatProxy<Float16>>;
912   HF f(GetParam().source_float);
913   for (auto round : GetParam().directions) {
914     HF16 half(0);
915     f.castTo(half, round);
916     EXPECT_EQ(GetParam().expected_half, half.value().getAsFloat().get_value())
917         << get_round_text(round) << "  " << std::hex
918         << BitwiseCast<uint32_t>(GetParam().source_float)
919         << " cast to: " << half.value().getAsFloat().get_value();
920   }
921 }
922 
923 const uint16_t positive_infinity = 0x7C00;
924 const uint16_t negative_infinity = 0xFC00;
925 
926 INSTANTIATE_TEST_SUITE_P(
927     F32ToF16, HexFloatFP32To16Tests,
928     ::testing::ValuesIn(std::vector<DownCastTest>({
929         // Exactly representable as half.
930         {0.f,
931          0x0,
932          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
933           RD::kToNearestEven}},
934         {-0.f,
935          0x8000,
936          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
937           RD::kToNearestEven}},
938         {1.0f,
939          0x3C00,
940          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
941           RD::kToNearestEven}},
942         {-1.0f,
943          0xBC00,
944          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
945           RD::kToNearestEven}},
946 
947         {float_fractions({0, 1, 10}),
948          0x3E01,
949          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
950           RD::kToNearestEven}},
951         {-float_fractions({0, 1, 10}),
952          0xBE01,
953          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
954           RD::kToNearestEven}},
955         {static_cast<float>(ldexp(float_fractions({0, 1, 10}), 3)),
956          0x4A01,
957          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
958           RD::kToNearestEven}},
959         {static_cast<float>(-ldexp(float_fractions({0, 1, 10}), 3)),
960          0xCA01,
961          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
962           RD::kToNearestEven}},
963 
964         // Underflow
965         {static_cast<float>(ldexp(1.0f, -25)),
966          0x0,
967          {RD::kToZero, RD::kToNegativeInfinity, RD::kToNearestEven}},
968         {static_cast<float>(ldexp(1.0f, -25)), 0x1, {RD::kToPositiveInfinity}},
969         {static_cast<float>(-ldexp(1.0f, -25)),
970          0x8000,
971          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNearestEven}},
972         {static_cast<float>(-ldexp(1.0f, -25)),
973          0x8001,
974          {RD::kToNegativeInfinity}},
975         {static_cast<float>(ldexp(1.0f, -24)),
976          0x1,
977          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
978           RD::kToNearestEven}},
979 
980         // Overflow
981         {static_cast<float>(ldexp(1.0f, 16)),
982          positive_infinity,
983          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
984           RD::kToNearestEven}},
985         {static_cast<float>(ldexp(1.0f, 18)),
986          positive_infinity,
987          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
988           RD::kToNearestEven}},
989         {static_cast<float>(ldexp(1.3f, 16)),
990          positive_infinity,
991          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
992           RD::kToNearestEven}},
993         {static_cast<float>(-ldexp(1.0f, 16)),
994          negative_infinity,
995          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
996           RD::kToNearestEven}},
997         {static_cast<float>(-ldexp(1.0f, 18)),
998          negative_infinity,
999          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
1000           RD::kToNearestEven}},
1001         {static_cast<float>(-ldexp(1.3f, 16)),
1002          negative_infinity,
1003          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
1004           RD::kToNearestEven}},
1005 
1006         // Transfer of Infinities
1007         {std::numeric_limits<float>::infinity(),
1008          positive_infinity,
1009          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
1010           RD::kToNearestEven}},
1011         {-std::numeric_limits<float>::infinity(),
1012          negative_infinity,
1013          {RD::kToZero, RD::kToPositiveInfinity, RD::kToNegativeInfinity,
1014           RD::kToNearestEven}},
1015 
1016         // Nans are below because we cannot test for equality.
1017     })));
1018 
1019 struct UpCastCase {
1020   uint16_t source_half;
1021   float expected_float;
1022 };
1023 
1024 using HexFloatFP16To32Tests = ::testing::TestWithParam<UpCastCase>;
TEST_P(HexFloatFP16To32Tests,WideningCasts)1025 TEST_P(HexFloatFP16To32Tests, WideningCasts) {
1026   using HF = HexFloat<FloatProxy<float>>;
1027   using HF16 = HexFloat<FloatProxy<Float16>>;
1028   HF16 f(GetParam().source_half);
1029 
1030   round_direction rounding[] = {round_direction::kToZero,
1031                                 round_direction::kToNearestEven,
1032                                 round_direction::kToPositiveInfinity,
1033                                 round_direction::kToNegativeInfinity};
1034 
1035   // Everything fits, so everything should just be bit-shifts.
1036   for (round_direction round : rounding) {
1037     HF flt(0.f);
1038     f.castTo(flt, round);
1039     EXPECT_EQ(GetParam().expected_float, flt.value().getAsFloat())
1040         << get_round_text(round) << "  " << std::hex
1041         << BitwiseCast<uint16_t>(GetParam().source_half)
1042         << " cast to: " << flt.value().getAsFloat();
1043   }
1044 }
1045 
1046 INSTANTIATE_TEST_SUITE_P(
1047     F16ToF32, HexFloatFP16To32Tests,
1048     ::testing::ValuesIn(std::vector<UpCastCase>({
1049         {0x0000, 0.f},
1050         {0x8000, -0.f},
1051         {0x3C00, 1.0f},
1052         {0xBC00, -1.0f},
1053         {0x3F00, float_fractions({0, 1, 2})},
1054         {0xBF00, -float_fractions({0, 1, 2})},
1055         {0x3F01, float_fractions({0, 1, 2, 10})},
1056         {0xBF01, -float_fractions({0, 1, 2, 10})},
1057 
1058         // denorm
1059         {0x0001, static_cast<float>(ldexp(1.0, -24))},
1060         {0x0002, static_cast<float>(ldexp(1.0, -23))},
1061         {0x8001, static_cast<float>(-ldexp(1.0, -24))},
1062         {0x8011, static_cast<float>(-ldexp(1.0, -20) + -ldexp(1.0, -24))},
1063 
1064         // inf
1065         {0x7C00, std::numeric_limits<float>::infinity()},
1066         {0xFC00, -std::numeric_limits<float>::infinity()},
1067     })));
1068 
TEST(HexFloatOperationTests,NanTests)1069 TEST(HexFloatOperationTests, NanTests) {
1070   using HF = HexFloat<FloatProxy<float>>;
1071   using HF16 = HexFloat<FloatProxy<Float16>>;
1072   round_direction rounding[] = {round_direction::kToZero,
1073                                 round_direction::kToNearestEven,
1074                                 round_direction::kToPositiveInfinity,
1075                                 round_direction::kToNegativeInfinity};
1076 
1077   // Everything fits, so everything should just be bit-shifts.
1078   for (round_direction round : rounding) {
1079     HF16 f16(0);
1080     HF f(0.f);
1081     HF(std::numeric_limits<float>::quiet_NaN()).castTo(f16, round);
1082     EXPECT_TRUE(f16.value().isNan());
1083     HF(std::numeric_limits<float>::signaling_NaN()).castTo(f16, round);
1084     EXPECT_TRUE(f16.value().isNan());
1085 
1086     HF16(0x7C01).castTo(f, round);
1087     EXPECT_TRUE(f.value().isNan());
1088     HF16(0x7C11).castTo(f, round);
1089     EXPECT_TRUE(f.value().isNan());
1090     HF16(0xFC01).castTo(f, round);
1091     EXPECT_TRUE(f.value().isNan());
1092     HF16(0x7C10).castTo(f, round);
1093     EXPECT_TRUE(f.value().isNan());
1094     HF16(0xFF00).castTo(f, round);
1095     EXPECT_TRUE(f.value().isNan());
1096   }
1097 }
1098 
1099 // A test case for parsing good and bad HexFloat<FloatProxy<T>> literals.
1100 template <typename T>
1101 struct FloatParseCase {
1102   std::string literal;
1103   bool negate_value;
1104   bool expect_success;
1105   HexFloat<FloatProxy<T>> expected_value;
1106 };
1107 
1108 using ParseNormalFloatTest = ::testing::TestWithParam<FloatParseCase<float>>;
1109 
TEST_P(ParseNormalFloatTest,Samples)1110 TEST_P(ParseNormalFloatTest, Samples) {
1111   std::stringstream input(GetParam().literal);
1112   HexFloat<FloatProxy<float>> parsed_value(0.0f);
1113   ParseNormalFloat(input, GetParam().negate_value, parsed_value);
1114   EXPECT_NE(GetParam().expect_success, input.fail())
1115       << " literal: " << GetParam().literal
1116       << " negate: " << GetParam().negate_value;
1117   if (GetParam().expect_success) {
1118     EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
1119         << " literal: " << GetParam().literal
1120         << " negate: " << GetParam().negate_value;
1121   }
1122 }
1123 
1124 // Returns a FloatParseCase with expected failure.
1125 template <typename T>
BadFloatParseCase(std::string literal,bool negate_value,T expected_value)1126 FloatParseCase<T> BadFloatParseCase(std::string literal, bool negate_value,
1127                                     T expected_value) {
1128   HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
1129   return FloatParseCase<T>{literal, negate_value, false, proxy_expected_value};
1130 }
1131 
1132 // Returns a FloatParseCase that should successfully parse to a given value.
1133 template <typename T>
GoodFloatParseCase(std::string literal,bool negate_value,T expected_value)1134 FloatParseCase<T> GoodFloatParseCase(std::string literal, bool negate_value,
1135                                      T expected_value) {
1136   HexFloat<FloatProxy<T>> proxy_expected_value(expected_value);
1137   return FloatParseCase<T>{literal, negate_value, true, proxy_expected_value};
1138 }
1139 
1140 INSTANTIATE_TEST_SUITE_P(
1141     FloatParse, ParseNormalFloatTest,
1142     ::testing::ValuesIn(std::vector<FloatParseCase<float>>{
1143         // Failing cases due to trivially incorrect syntax.
1144         BadFloatParseCase("abc", false, 0.0f),
1145         BadFloatParseCase("abc", true, 0.0f),
1146 
1147         // Valid cases.
1148         GoodFloatParseCase("0", false, 0.0f),
1149         GoodFloatParseCase("0.0", false, 0.0f),
1150         GoodFloatParseCase("-0.0", false, -0.0f),
1151         GoodFloatParseCase("2.0", false, 2.0f),
1152         GoodFloatParseCase("-2.0", false, -2.0f),
1153         GoodFloatParseCase("+2.0", false, 2.0f),
1154         // Cases with negate_value being true.
1155         GoodFloatParseCase("0.0", true, -0.0f),
1156         GoodFloatParseCase("2.0", true, -2.0f),
1157 
1158         // When negate_value is true, we should not accept a
1159         // leading minus or plus.
1160         BadFloatParseCase("-0.0", true, 0.0f),
1161         BadFloatParseCase("-2.0", true, 0.0f),
1162         BadFloatParseCase("+0.0", true, 0.0f),
1163         BadFloatParseCase("+2.0", true, 0.0f),
1164 
1165         // Overflow is an error for 32-bit float parsing.
1166         BadFloatParseCase("1e40", false, FLT_MAX),
1167         BadFloatParseCase("1e40", true, -FLT_MAX),
1168         BadFloatParseCase("-1e40", false, -FLT_MAX),
1169         // We can't have -1e40 and negate_value == true since
1170         // that represents an original case of "--1e40" which
1171         // is invalid.
1172     }));
1173 
1174 using ParseNormalFloat16Test =
1175     ::testing::TestWithParam<FloatParseCase<Float16>>;
1176 
TEST_P(ParseNormalFloat16Test,Samples)1177 TEST_P(ParseNormalFloat16Test, Samples) {
1178   std::stringstream input(GetParam().literal);
1179   HexFloat<FloatProxy<Float16>> parsed_value(0);
1180   ParseNormalFloat(input, GetParam().negate_value, parsed_value);
1181   EXPECT_NE(GetParam().expect_success, input.fail())
1182       << " literal: " << GetParam().literal
1183       << " negate: " << GetParam().negate_value;
1184   if (GetParam().expect_success) {
1185     EXPECT_THAT(parsed_value.value(), Eq(GetParam().expected_value.value()))
1186         << " literal: " << GetParam().literal
1187         << " negate: " << GetParam().negate_value;
1188   }
1189 }
1190 
1191 INSTANTIATE_TEST_SUITE_P(
1192     Float16Parse, ParseNormalFloat16Test,
1193     ::testing::ValuesIn(std::vector<FloatParseCase<Float16>>{
1194         // Failing cases due to trivially incorrect syntax.
1195         BadFloatParseCase<Float16>("abc", false, uint16_t{0}),
1196         BadFloatParseCase<Float16>("abc", true, uint16_t{0}),
1197 
1198         // Valid cases.
1199         GoodFloatParseCase<Float16>("0", false, uint16_t{0}),
1200         GoodFloatParseCase<Float16>("0.0", false, uint16_t{0}),
1201         GoodFloatParseCase<Float16>("-0.0", false, uint16_t{0x8000}),
1202         GoodFloatParseCase<Float16>("2.0", false, uint16_t{0x4000}),
1203         GoodFloatParseCase<Float16>("-2.0", false, uint16_t{0xc000}),
1204         GoodFloatParseCase<Float16>("+2.0", false, uint16_t{0x4000}),
1205         // Cases with negate_value being true.
1206         GoodFloatParseCase<Float16>("0.0", true, uint16_t{0x8000}),
1207         GoodFloatParseCase<Float16>("2.0", true, uint16_t{0xc000}),
1208 
1209         // When negate_value is true, we should not accept a leading minus or
1210         // plus.
1211         BadFloatParseCase<Float16>("-0.0", true, uint16_t{0}),
1212         BadFloatParseCase<Float16>("-2.0", true, uint16_t{0}),
1213         BadFloatParseCase<Float16>("+0.0", true, uint16_t{0}),
1214         BadFloatParseCase<Float16>("+2.0", true, uint16_t{0}),
1215     }));
1216 
1217 // A test case for detecting infinities.
1218 template <typename T>
1219 struct OverflowParseCase {
1220   std::string input;
1221   bool expect_success;
1222   T expected_value;
1223 };
1224 
1225 using FloatProxyParseOverflowFloatTest =
1226     ::testing::TestWithParam<OverflowParseCase<float>>;
1227 
TEST_P(FloatProxyParseOverflowFloatTest,Sample)1228 TEST_P(FloatProxyParseOverflowFloatTest, Sample) {
1229   std::istringstream input(GetParam().input);
1230   HexFloat<FloatProxy<float>> value(0.0f);
1231   input >> value;
1232   EXPECT_NE(GetParam().expect_success, input.fail());
1233   if (GetParam().expect_success) {
1234     EXPECT_THAT(value.value().getAsFloat(), GetParam().expected_value);
1235   }
1236 }
1237 
1238 INSTANTIATE_TEST_SUITE_P(
1239     FloatOverflow, FloatProxyParseOverflowFloatTest,
1240     ::testing::ValuesIn(std::vector<OverflowParseCase<float>>({
1241         {"0", true, 0.0f},
1242         {"0.0", true, 0.0f},
1243         {"1.0", true, 1.0f},
1244         {"1e38", true, 1e38f},
1245         {"-1e38", true, -1e38f},
1246         {"1e40", false, FLT_MAX},
1247         {"-1e40", false, -FLT_MAX},
1248         {"1e400", false, FLT_MAX},
1249         {"-1e400", false, -FLT_MAX},
1250     })));
1251 
1252 using FloatProxyParseOverflowDoubleTest =
1253     ::testing::TestWithParam<OverflowParseCase<double>>;
1254 
TEST_P(FloatProxyParseOverflowDoubleTest,Sample)1255 TEST_P(FloatProxyParseOverflowDoubleTest, Sample) {
1256   std::istringstream input(GetParam().input);
1257   HexFloat<FloatProxy<double>> value(0.0);
1258   input >> value;
1259   EXPECT_NE(GetParam().expect_success, input.fail());
1260   if (GetParam().expect_success) {
1261     EXPECT_THAT(value.value().getAsFloat(), Eq(GetParam().expected_value));
1262   }
1263 }
1264 
1265 INSTANTIATE_TEST_SUITE_P(
1266     DoubleOverflow, FloatProxyParseOverflowDoubleTest,
1267     ::testing::ValuesIn(std::vector<OverflowParseCase<double>>({
1268         {"0", true, 0.0},
1269         {"0.0", true, 0.0},
1270         {"1.0", true, 1.0},
1271         {"1e38", true, 1e38},
1272         {"-1e38", true, -1e38},
1273         {"1e40", true, 1e40},
1274         {"-1e40", true, -1e40},
1275         {"1e400", false, DBL_MAX},
1276         {"-1e400", false, -DBL_MAX},
1277     })));
1278 
1279 using FloatProxyParseOverflowFloat16Test =
1280     ::testing::TestWithParam<OverflowParseCase<uint16_t>>;
1281 
TEST_P(FloatProxyParseOverflowFloat16Test,Sample)1282 TEST_P(FloatProxyParseOverflowFloat16Test, Sample) {
1283   std::istringstream input(GetParam().input);
1284   HexFloat<FloatProxy<Float16>> value(0);
1285   input >> value;
1286   EXPECT_NE(GetParam().expect_success, input.fail())
1287       << " literal: " << GetParam().input;
1288   if (GetParam().expect_success) {
1289     EXPECT_THAT(value.value().data(), Eq(GetParam().expected_value))
1290         << " literal: " << GetParam().input;
1291   }
1292 }
1293 
1294 INSTANTIATE_TEST_SUITE_P(
1295     Float16Overflow, FloatProxyParseOverflowFloat16Test,
1296     ::testing::ValuesIn(std::vector<OverflowParseCase<uint16_t>>({
1297         {"0", true, uint16_t{0}},
1298         {"0.0", true, uint16_t{0}},
1299         {"1.0", true, uint16_t{0x3c00}},
1300         // Overflow for 16-bit float is an error, and returns max or
1301         // lowest value.
1302         {"1e38", false, uint16_t{0x7bff}},
1303         {"1e40", false, uint16_t{0x7bff}},
1304         {"1e400", false, uint16_t{0x7bff}},
1305         {"-1e38", false, uint16_t{0xfbff}},
1306         {"-1e40", false, uint16_t{0xfbff}},
1307         {"-1e400", false, uint16_t{0xfbff}},
1308     })));
1309 
TEST(FloatProxy,Max)1310 TEST(FloatProxy, Max) {
1311   EXPECT_THAT(FloatProxy<Float16>::max().getAsFloat().get_value(),
1312               Eq(uint16_t{0x7bff}));
1313   EXPECT_THAT(FloatProxy<float>::max().getAsFloat(),
1314               Eq(std::numeric_limits<float>::max()));
1315   EXPECT_THAT(FloatProxy<double>::max().getAsFloat(),
1316               Eq(std::numeric_limits<double>::max()));
1317 }
1318 
TEST(FloatProxy,Lowest)1319 TEST(FloatProxy, Lowest) {
1320   EXPECT_THAT(FloatProxy<Float16>::lowest().getAsFloat().get_value(),
1321               Eq(uint16_t{0xfbff}));
1322   EXPECT_THAT(FloatProxy<float>::lowest().getAsFloat(),
1323               Eq(std::numeric_limits<float>::lowest()));
1324   EXPECT_THAT(FloatProxy<double>::lowest().getAsFloat(),
1325               Eq(std::numeric_limits<double>::lowest()));
1326 }
1327 
1328 // TODO(awoloszyn): Add fp16 tests and HexFloatTraits.
1329 }  // namespace
1330 }  // namespace utils
1331 }  // namespace spvtools
1332