1 /*
2 RawSpeed - RAW file decoder.
3
4 Copyright (C) 2017 Vasily Khoruzhick
5 Copyright (C) 2020 Roman Lebedev
6
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2 of the License, or (at your option) any later version.
11
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with this library; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21
22 #pragma once
23
24 #include <cstdint> // for uint32_t, uint16_t
25
26 namespace rawspeed {
27
28 namespace ieee_754_2008 {
29
30 // Refer to "3.6 Interchange format parameters",
31 // "Table 3.5—Binary interchange format parameters"
32
33 // All formats are:
34 // MSB [Sign bit] [Exponent bits] [Fraction bits] LSB
35
36 template <int StorageWidth_, int FractionWidth_, int ExponentWidth_>
37 struct BinaryN {
38 static constexpr uint32_t StorageWidth = StorageWidth_;
39
40 // FIXME: if we had compile-time log2/round, we'd only need StorageWidth.
41
42 static constexpr uint32_t FractionWidth = FractionWidth_;
43 static constexpr uint32_t ExponentWidth = ExponentWidth_;
44 // SignWidth is always 1.
45 static_assert(FractionWidth + ExponentWidth + 1 == StorageWidth);
46
47 static constexpr uint32_t Precision = FractionWidth + 1;
48
49 static constexpr uint32_t ExponentMax = (1 << (ExponentWidth - 1)) - 1;
50
51 static constexpr int32_t Bias = ExponentMax;
52
53 // FractionPos is always 0.
54 static constexpr uint32_t ExponentPos = FractionWidth;
55 static constexpr uint32_t SignBitPos = StorageWidth - 1;
56 };
57
58 // IEEE-754-2008: binary16:
59 // bits 9-0 - fraction (10 bit)
60 // bits 14-10 - exponent (5 bit)
61 // bit 15 - sign
62 struct Binary16 : public BinaryN</*StorageWidth=*/16, /*FractionWidth=*/10,
63 /*ExponentWidth=*/5> {
64 static_assert(Precision == 11);
65 static_assert(ExponentMax == 15);
66 static_assert(ExponentPos == 10);
67 static_assert(SignBitPos == 15);
68 };
69
70 // IEEE-754-2008: binary24:
71 // bits 15-0 - fraction (16 bit)
72 // bits 22-16 - exponent (7 bit)
73 // bit 23 - sign
74 struct Binary24 : public BinaryN</*StorageWidth=*/24, /*FractionWidth=*/16,
75 /*ExponentWidth=*/7> {
76 static_assert(Precision == 17);
77 static_assert(ExponentMax == 63);
78 static_assert(ExponentPos == 16);
79 static_assert(SignBitPos == 23);
80 };
81
82 // IEEE-754-2008: binary32:
83 // bits 22-0 - fraction (23 bit)
84 // bits 30-23 - exponent (8 bit)
85 // bit 31 - sign
86 struct Binary32 : public BinaryN</*StorageWidth=*/32, /*FractionWidth=*/23,
87 /*ExponentWidth=*/8> {
88 static_assert(Precision == 24);
89 static_assert(ExponentMax == 127);
90 static_assert(ExponentPos == 23);
91 static_assert(SignBitPos == 31);
92 };
93
94 // exp = 0, fract = +-0: zero
95 // exp = 0; fract != 0: subnormal numbers
96 // equation: -1 ^ sign * 2 ^ (1 - Bias) * 0.fraction
97 // exp = 1..(2^ExponentWidth - 2): normalized value
98 // equation: -1 ^ sign * 2 ^ (exponent - Bias) * 1.fraction
99 // exp = 2^ExponentWidth - 1, fract = +-0: +-infinity
100 // exp = 2^ExponentWidth - 1, fract != 0: NaN
101
102 } // namespace ieee_754_2008
103
104 template <typename NarrowType, typename WideType>
extendBinaryFloatingPoint(uint32_t narrow)105 inline uint32_t extendBinaryFloatingPoint(uint32_t narrow) {
106 uint32_t sign = (narrow >> NarrowType::SignBitPos) & 1;
107 uint32_t narrow_exponent = (narrow >> NarrowType::ExponentPos) &
108 ((1 << NarrowType::ExponentWidth) - 1);
109 uint32_t narrow_fraction = narrow & ((1 << NarrowType::FractionWidth) - 1);
110
111 // Normalized or zero
112 uint32_t wide_exponent =
113 static_cast<int32_t>(narrow_exponent) - NarrowType::Bias + WideType::Bias;
114 uint32_t wide_fraction =
115 narrow_fraction << (WideType::FractionWidth - NarrowType::FractionWidth);
116
117 if (narrow_exponent == ((1 << NarrowType::ExponentWidth) - 1)) {
118 // Infinity or NaN
119 wide_exponent = ((1 << WideType::ExponentWidth) - 1);
120 // Narrow fraction is kept/widened!
121 } else if (narrow_exponent == 0) {
122 if (narrow_fraction == 0) {
123 // +-Zero
124 wide_exponent = 0;
125 wide_fraction = 0;
126 } else {
127 // Subnormal numbers
128 // We can represent it as a normalized value in wider type,
129 // we have to shift fraction until we get 1.new_fraction
130 // and decrement exponent for each shift.
131 // FIXME; what is the implicit precondition here?
132 wide_exponent = 1 - NarrowType::Bias + WideType::Bias;
133 while (!(wide_fraction & (1 << WideType::FractionWidth))) {
134 wide_exponent -= 1;
135 wide_fraction <<= 1;
136 }
137 wide_fraction &= ((1 << WideType::FractionWidth) - 1);
138 }
139 }
140 return (sign << WideType::SignBitPos) |
141 (wide_exponent << WideType::ExponentPos) | wide_fraction;
142 }
143
144 // Expand IEEE-754-2008 binary16 into float32
fp16ToFloat(uint16_t fp16)145 inline uint32_t fp16ToFloat(uint16_t fp16) {
146 return extendBinaryFloatingPoint<ieee_754_2008::Binary16,
147 ieee_754_2008::Binary32>(fp16);
148 }
149
150 // Expand IEEE-754-2008 binary24 into float32
fp24ToFloat(uint32_t fp24)151 inline uint32_t fp24ToFloat(uint32_t fp24) {
152 return extendBinaryFloatingPoint<ieee_754_2008::Binary24,
153 ieee_754_2008::Binary32>(fp24);
154 }
155
156 } // namespace rawspeed
157