1 /*
2     RawSpeed - RAW file decoder.
3 
4     Copyright (C) 2017 Vasily Khoruzhick
5     Copyright (C) 2020 Roman Lebedev
6 
7     This library is free software; you can redistribute it and/or
8     modify it under the terms of the GNU Lesser General Public
9     License as published by the Free Software Foundation; either
10     version 2 of the License, or (at your option) any later version.
11 
12     This library is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15     Lesser General Public License for more details.
16 
17     You should have received a copy of the GNU Lesser General Public
18     License along with this library; if not, write to the Free Software
19     Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 */
21 
22 #pragma once
23 
24 #include <cstdint> // for uint32_t, uint16_t
25 
26 namespace rawspeed {
27 
28 namespace ieee_754_2008 {
29 
30 // Refer to "3.6 Interchange format parameters",
31 //          "Table 3.5—Binary interchange format parameters"
32 
33 // All formats are:
34 // MSB [Sign bit] [Exponent bits] [Fraction bits] LSB
35 
36 template <int StorageWidth_, int FractionWidth_, int ExponentWidth_>
37 struct BinaryN {
38   static constexpr uint32_t StorageWidth = StorageWidth_;
39 
40   // FIXME: if we had compile-time log2/round, we'd only need StorageWidth.
41 
42   static constexpr uint32_t FractionWidth = FractionWidth_;
43   static constexpr uint32_t ExponentWidth = ExponentWidth_;
44   // SignWidth is always 1.
45   static_assert(FractionWidth + ExponentWidth + 1 == StorageWidth);
46 
47   static constexpr uint32_t Precision = FractionWidth + 1;
48 
49   static constexpr uint32_t ExponentMax = (1 << (ExponentWidth - 1)) - 1;
50 
51   static constexpr int32_t Bias = ExponentMax;
52 
53   // FractionPos is always 0.
54   static constexpr uint32_t ExponentPos = FractionWidth;
55   static constexpr uint32_t SignBitPos = StorageWidth - 1;
56 };
57 
58 // IEEE-754-2008: binary16:
59 // bits 9-0 - fraction (10 bit)
60 // bits 14-10 - exponent (5 bit)
61 // bit 15 - sign
62 struct Binary16 : public BinaryN</*StorageWidth=*/16, /*FractionWidth=*/10,
63                                  /*ExponentWidth=*/5> {
64   static_assert(Precision == 11);
65   static_assert(ExponentMax == 15);
66   static_assert(ExponentPos == 10);
67   static_assert(SignBitPos == 15);
68 };
69 
70 // IEEE-754-2008: binary24:
71 // bits 15-0 - fraction (16 bit)
72 // bits 22-16 - exponent (7 bit)
73 // bit 23 - sign
74 struct Binary24 : public BinaryN</*StorageWidth=*/24, /*FractionWidth=*/16,
75                                  /*ExponentWidth=*/7> {
76   static_assert(Precision == 17);
77   static_assert(ExponentMax == 63);
78   static_assert(ExponentPos == 16);
79   static_assert(SignBitPos == 23);
80 };
81 
82 // IEEE-754-2008: binary32:
83 // bits 22-0 - fraction (23 bit)
84 // bits 30-23 - exponent (8 bit)
85 // bit 31 - sign
86 struct Binary32 : public BinaryN</*StorageWidth=*/32, /*FractionWidth=*/23,
87                                  /*ExponentWidth=*/8> {
88   static_assert(Precision == 24);
89   static_assert(ExponentMax == 127);
90   static_assert(ExponentPos == 23);
91   static_assert(SignBitPos == 31);
92 };
93 
94 // exp = 0, fract  = +-0: zero
95 // exp = 0; fract !=   0: subnormal numbers
96 //                        equation: -1 ^ sign * 2 ^ (1 - Bias) * 0.fraction
97 // exp = 1..(2^ExponentWidth - 2): normalized value
98 //                     equation: -1 ^ sign * 2 ^ (exponent - Bias) * 1.fraction
99 // exp = 2^ExponentWidth - 1, fract  = +-0: +-infinity
100 // exp = 2^ExponentWidth - 1, fract !=   0: NaN
101 
102 } // namespace ieee_754_2008
103 
104 template <typename NarrowType, typename WideType>
extendBinaryFloatingPoint(uint32_t narrow)105 inline uint32_t extendBinaryFloatingPoint(uint32_t narrow) {
106   uint32_t sign = (narrow >> NarrowType::SignBitPos) & 1;
107   uint32_t narrow_exponent = (narrow >> NarrowType::ExponentPos) &
108                              ((1 << NarrowType::ExponentWidth) - 1);
109   uint32_t narrow_fraction = narrow & ((1 << NarrowType::FractionWidth) - 1);
110 
111   // Normalized or zero
112   uint32_t wide_exponent =
113       static_cast<int32_t>(narrow_exponent) - NarrowType::Bias + WideType::Bias;
114   uint32_t wide_fraction =
115       narrow_fraction << (WideType::FractionWidth - NarrowType::FractionWidth);
116 
117   if (narrow_exponent == ((1 << NarrowType::ExponentWidth) - 1)) {
118     // Infinity or NaN
119     wide_exponent = ((1 << WideType::ExponentWidth) - 1);
120     // Narrow fraction is kept/widened!
121   } else if (narrow_exponent == 0) {
122     if (narrow_fraction == 0) {
123       // +-Zero
124       wide_exponent = 0;
125       wide_fraction = 0;
126     } else {
127       // Subnormal numbers
128       // We can represent it as a normalized value in wider type,
129       // we have to shift fraction until we get 1.new_fraction
130       // and decrement exponent for each shift.
131       // FIXME; what is the implicit precondition here?
132       wide_exponent = 1 - NarrowType::Bias + WideType::Bias;
133       while (!(wide_fraction & (1 << WideType::FractionWidth))) {
134         wide_exponent -= 1;
135         wide_fraction <<= 1;
136       }
137       wide_fraction &= ((1 << WideType::FractionWidth) - 1);
138     }
139   }
140   return (sign << WideType::SignBitPos) |
141          (wide_exponent << WideType::ExponentPos) | wide_fraction;
142 }
143 
144 // Expand IEEE-754-2008 binary16 into float32
fp16ToFloat(uint16_t fp16)145 inline uint32_t fp16ToFloat(uint16_t fp16) {
146   return extendBinaryFloatingPoint<ieee_754_2008::Binary16,
147                                    ieee_754_2008::Binary32>(fp16);
148 }
149 
150 // Expand IEEE-754-2008 binary24 into float32
fp24ToFloat(uint32_t fp24)151 inline uint32_t fp24ToFloat(uint32_t fp24) {
152   return extendBinaryFloatingPoint<ieee_754_2008::Binary24,
153                                    ieee_754_2008::Binary32>(fp24);
154 }
155 
156 } // namespace rawspeed
157