1 /** @file
2  * @brief functions to serialise and unserialise a double
3  */
4 /* Copyright (C) 2006,2007,2008,2009,2015 Olly Betts
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #include <config.h>
26 
27 #include <xapian/error.h>
28 
29 #include "omassert.h"
30 
31 #include "serialise-double.h"
32 
33 #include <cfloat>
34 #include <cmath>
35 
36 #include <algorithm>
37 #include <string>
38 
39 using namespace std;
40 
41 // The serialisation we use for doubles is inspired by a comp.lang.c post
42 // by Jens Moeller:
43 //
44 // https://groups.google.com/group/comp.lang.c/browse_thread/thread/6558d4653f6dea8b/75a529ec03148c98
45 //
46 // The clever part is that the mantissa is encoded as a base-256 number which
47 // means there's no rounding error provided both ends have FLT_RADIX as some
48 // power of two.
49 //
50 // FLT_RADIX == 2 seems to be ubiquitous on modern UNIX platforms, while
51 // some older platforms used FLT_RADIX == 16 (IBM machines for example).
52 // FLT_RADIX == 10 seems to be very rare (the only instance Google finds
53 // is for a cross-compiler to some TI calculators).
54 
55 #if FLT_RADIX == 2
56 # define MAX_MANTISSA_BYTES ((DBL_MANT_DIG + 7 + 7) / 8)
57 # define MAX_EXP ((DBL_MAX_EXP + 1) / 8)
58 # define MAX_MANTISSA (1 << (DBL_MAX_EXP & 7))
59 #elif FLT_RADIX == 16
60 # define MAX_MANTISSA_BYTES ((DBL_MANT_DIG + 1 + 1) / 2)
61 # define MAX_EXP ((DBL_MAX_EXP + 1) / 2)
62 # define MAX_MANTISSA (1 << ((DBL_MAX_EXP & 1) * 4))
63 #else
64 # error FLT_RADIX is a value not currently handled (not 2 or 16)
65 // # define MAX_MANTISSA_BYTES (sizeof(double) + 1)
66 #endif
67 
base256ify_double(double & v)68 static int base256ify_double(double &v) {
69     int exp;
70     v = frexp(v, &exp);
71     // v is now in the range [0.5, 1.0)
72     --exp;
73 #if FLT_RADIX == 2
74     v = scalbn(v, (exp & 7) + 1);
75 #else
76     v = ldexp(v, (exp & 7) + 1);
77 #endif
78     // v is now in the range [1.0, 256.0)
79     exp >>= 3;
80     return exp;
81 }
82 
serialise_double(double v)83 std::string serialise_double(double v)
84 {
85     /* First byte:
86      *  bit 7 Negative flag
87      *  bit 4..6 Mantissa length - 1
88      *  bit 0..3 --- 0-13 -> Exponent + 7
89      *            \- 14 -> Exponent given by next byte
90      *             - 15 -> Exponent given by next 2 bytes
91      *
92      * Then optional medium (1 byte) or large exponent (2 bytes, lsb first)
93      *
94      * Then mantissa (0 iff value is 0)
95      */
96 
97     bool negative = (v < 0.0);
98 
99     if (negative) v = -v;
100 
101     int exp = base256ify_double(v);
102 
103     string result;
104 
105     if (exp <= 6 && exp >= -7) {
106 	unsigned char b = static_cast<unsigned char>(exp + 7);
107 	if (negative) b |= static_cast<unsigned char>(0x80);
108 	result += char(b);
109     } else {
110 	if (exp >= -128 && exp < 127) {
111 	    result += negative ? char(0x8e) : char(0x0e);
112 	    result += char(exp + 128);
113 	} else {
114 	    if (exp < -32768 || exp > 32767) {
115 		throw Xapian::InternalError("Insane exponent in floating point number");
116 	    }
117 	    result += negative ? char(0x8f) : char(0x0f);
118 	    result += char(unsigned(exp + 32768) & 0xff);
119 	    result += char(unsigned(exp + 32768) >> 8);
120 	}
121     }
122 
123     int maxbytes = min(MAX_MANTISSA_BYTES, 8);
124 
125     size_t n = result.size();
126     do {
127 	unsigned char byte = static_cast<unsigned char>(v);
128 	result += char(byte);
129 	v -= double(byte);
130 	v *= 256.0;
131     } while (v != 0.0 && --maxbytes);
132 
133     n = result.size() - n;
134     if (n > 1) {
135 	Assert(n <= 8);
136 	result[0] = static_cast<unsigned char>(result[0] | ((n - 1) << 4));
137     }
138 
139     return result;
140 }
141 
unserialise_double(const char ** p,const char * end)142 double unserialise_double(const char ** p, const char *end)
143 {
144     if (end - *p < 2) {
145 	throw Xapian::SerialisationError("Bad encoded double: insufficient data");
146     }
147     unsigned char first = *(*p)++;
148     if (first == 0 && *(*p) == 0) {
149 	++*p;
150 	return 0.0;
151     }
152 
153     bool negative = (first & 0x80) != 0;
154     size_t mantissa_len = ((first >> 4) & 0x07) + 1;
155 
156     int exp = first & 0x0f;
157     if (exp >= 14) {
158 	int bigexp = static_cast<unsigned char>(*(*p)++);
159 	if (exp == 15) {
160 	    if (*p == end) {
161 		throw Xapian::SerialisationError("Bad encoded double: short large exponent");
162 	    }
163 	    exp = bigexp | (static_cast<unsigned char>(*(*p)++) << 8);
164 	    exp -= 32768;
165 	} else {
166 	    exp = bigexp - 128;
167 	}
168     } else {
169 	exp -= 7;
170     }
171 
172     if (size_t(end - *p) < mantissa_len) {
173 	throw Xapian::SerialisationError("Bad encoded double: short mantissa");
174     }
175 
176     double v = 0.0;
177 
178     static double dbl_max_mantissa = DBL_MAX;
179     static int dbl_max_exp = base256ify_double(dbl_max_mantissa);
180     *p += mantissa_len;
181     if (exp > dbl_max_exp ||
182 	(exp == dbl_max_exp &&
183 	 double(static_cast<unsigned char>((*p)[-1])) > dbl_max_mantissa)) {
184 	// The mantissa check should be precise provided that FLT_RADIX
185 	// is a power of 2.
186 	v = HUGE_VAL;
187     } else {
188 	const char *q = *p;
189 	while (mantissa_len--) {
190 	    v *= 0.00390625; // 1/256
191 	    v += double(static_cast<unsigned char>(*--q));
192 	}
193 
194 #if FLT_RADIX == 2
195 	if (exp) v = scalbn(v, exp * 8);
196 #elif FLT_RADIX == 16
197 	if (exp) v = scalbn(v, exp * 2);
198 #else
199 	if (exp) v = ldexp(v, exp * 8);
200 #endif
201 
202 #if 0
203 	if (v == 0.0) {
204 	    // FIXME: handle underflow
205 	}
206 #endif
207     }
208 
209     if (negative) v = -v;
210 
211     return v;
212 }
213