1 /*-
2  * Copyright (c) 2014-2018 MongoDB, Inc.
3  * Copyright (c) 2008-2014 WiredTiger, Inc.
4  *	All rights reserved.
5  *
6  * See the file LICENSE for redistribution information.
7  */
8 
9 /*
10  * Variable-length integer encoding.
11  * We need up to 64 bits, signed and unsigned.  Further, we want the packed
12  * representation to have the same lexicographic ordering as the integer
13  * values.  This avoids the need for special-purpose comparison code.
14  *
15  * Try hard to keep small values small (up to ~2 bytes): that gives the biggest
16  * benefit for common cases storing small values.  After that, just encode the
17  * length in the first byte: we could squeeze in a couple of extra bits, but
18  * the marginal benefit is small, and we want this code to be relatively
19  * easy to implement in client code or scripting APIs.
20  *
21  * First byte  | Next |                        |
22  * byte        | bytes| Min Value              | Max Value
23  * ------------+------+------------------------+--------------------------------
24  * [00 00xxxx] | free | N/A                    | N/A
25  * [00 01llll] | llll | -2^64                  | -2^13 - 2^6
26  * [00 1xxxxx] | 1    | -2^13 - 2^6            | -2^6 - 1
27  * [01 xxxxxx] | 0    | -2^6                   | -1
28  * [10 xxxxxx] | 0    | 0                      | 2^6 - 1
29  * [11 0xxxxx] | 1    | 2^6                    | 2^13 + 2^6 - 1
30  * [11 10llll] | llll | 2^13 + 2^6             | 2^64 - 1
31  * [11 11xxxx] | free | N/A                    | N/A
32  */
33 
34 #define	NEG_MULTI_MARKER (uint8_t)0x10
35 #define	NEG_2BYTE_MARKER (uint8_t)0x20
36 #define	NEG_1BYTE_MARKER (uint8_t)0x40
37 #define	POS_1BYTE_MARKER (uint8_t)0x80
38 #define	POS_2BYTE_MARKER (uint8_t)0xc0
39 #define	POS_MULTI_MARKER (uint8_t)0xe0
40 
41 #define	NEG_1BYTE_MIN (-(1 << 6))
42 #define	NEG_2BYTE_MIN (-(1 << 13) + NEG_1BYTE_MIN)
43 #define	POS_1BYTE_MAX ((1 << 6) - 1)
44 #define	POS_2BYTE_MAX ((1 << 13) + POS_1BYTE_MAX)
45 
46 /* Extract bits <start> to <end> from a value (counting from LSB == 0). */
47 #define	GET_BITS(x, start, end)                                         \
48 	(((uint64_t)(x) & ((1U << (start)) - 1U)) >> (end))
49 
50 /*
51  * Size checks: return ENOMEM if not enough room when writing, EINVAL if the
52  * length is wrong when reading (presumably the value is corrupted).
53  */
54 #define	WT_SIZE_CHECK_PACK(l, maxl)					\
55 	WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), ENOMEM)
56 #define	WT_SIZE_CHECK_UNPACK(l, maxl)					\
57 	WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), EINVAL)
58 
59 /* Count the leading zero bytes. */
60 #if defined(__GNUC__)
61 #define	WT_LEADING_ZEROS(x, i)						\
62 	((i) = ((x) == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3)
63 #elif defined(_MSC_VER)
64 #define	WT_LEADING_ZEROS(x, i)	do {					\
65 	if ((x) == 0) (i) = (int)sizeof(x);				\
66 	else  { 							\
67 		unsigned long __index;					\
68 		_BitScanReverse64(&__index, x);				\
69 		__index = 63 ^ __index;					\
70 		(i) = (int)(__index >> 3); }				\
71 	} while (0)
72 #else
73 #define	WT_LEADING_ZEROS(x, i) do {					\
74 	uint64_t __x = (x);						\
75 	uint64_t __m = (uint64_t)0xff << 56;				\
76 	for ((i) = 0; !(__x & __m) && (i) != 8; (i)++)			\
77 		__m >>= 8;						\
78 } while (0)
79 #endif
80 
81 /*
82  * __wt_vpack_posint --
83  *	Packs a positive variable-length integer in the specified location.
84  */
85 static inline int
__wt_vpack_posint(uint8_t ** pp,size_t maxlen,uint64_t x)86 __wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x)
87 {
88 	uint8_t *p;
89 	int len, lz, shift;
90 
91 	WT_LEADING_ZEROS(x, lz);
92 	len = (int)sizeof(x) - lz;
93 	WT_SIZE_CHECK_PACK(len + 1, maxlen);
94 	p = *pp;
95 
96 	/* There are four bits we can use in the first byte. */
97 	*p++ |= (len & 0xf);
98 
99 	for (shift = (len - 1) << 3; len != 0; --len, shift -= 8)
100 		*p++ = (uint8_t)(x >> shift);
101 
102 	*pp = p;
103 	return (0);
104 }
105 
106 /*
107  * __wt_vpack_negint --
108  *	Packs a negative variable-length integer in the specified location.
109  */
110 static inline int
__wt_vpack_negint(uint8_t ** pp,size_t maxlen,uint64_t x)111 __wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x)
112 {
113 	uint8_t *p;
114 	int len, lz, shift;
115 
116 	WT_LEADING_ZEROS(~x, lz);
117 	len = (int)sizeof(x) - lz;
118 	WT_SIZE_CHECK_PACK(len + 1, maxlen);
119 	p = *pp;
120 
121 	/*
122 	 * There are four size bits we can use in the first byte.
123 	 * For negative numbers, we store the number of leading 0xff bytes
124 	 * to maintain ordering (if this is not obvious, it may help to
125 	 * remember that -1 is the largest negative number).
126 	 */
127 	*p++ |= (lz & 0xf);
128 
129 	for (shift = (len - 1) << 3; len != 0; shift -= 8, --len)
130 		*p++ = (uint8_t)(x >> shift);
131 
132 	*pp = p;
133 	return (0);
134 }
135 
136 /*
137  * __wt_vunpack_posint --
138  *	Reads a variable-length positive integer from the specified location.
139  */
140 static inline int
__wt_vunpack_posint(const uint8_t ** pp,size_t maxlen,uint64_t * retp)141 __wt_vunpack_posint(const uint8_t **pp, size_t maxlen, uint64_t *retp)
142 {
143 	uint64_t x;
144 	uint8_t len;
145 	const uint8_t *p;
146 
147 	/* There are four length bits in the first byte. */
148 	p = *pp;
149 	len = (*p++ & 0xf);
150 	WT_SIZE_CHECK_UNPACK(len + 1, maxlen);
151 
152 	for (x = 0; len != 0; --len)
153 		x = (x << 8) | *p++;
154 
155 	*retp = x;
156 	*pp = p;
157 	return (0);
158 }
159 
160 /*
161  * __wt_vunpack_negint --
162  *	Reads a variable-length negative integer from the specified location.
163  */
164 static inline int
__wt_vunpack_negint(const uint8_t ** pp,size_t maxlen,uint64_t * retp)165 __wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp)
166 {
167 	uint64_t x;
168 	uint8_t len;
169 	const uint8_t *p;
170 
171 	/* There are four length bits in the first byte. */
172 	p = *pp;
173 	len = (int)sizeof(x) - (*p++ & 0xf);
174 	WT_SIZE_CHECK_UNPACK(len + 1, maxlen);
175 
176 	for (x = UINT64_MAX; len != 0; --len)
177 		x = (x << 8) | *p++;
178 
179 	*retp = x;
180 	*pp = p;
181 	return (0);
182 }
183 
184 /*
185  * __wt_vpack_uint --
186  *	Variable-sized packing for unsigned integers
187  */
188 static inline int
__wt_vpack_uint(uint8_t ** pp,size_t maxlen,uint64_t x)189 __wt_vpack_uint(uint8_t **pp, size_t maxlen, uint64_t x)
190 {
191 	uint8_t *p;
192 
193 	WT_SIZE_CHECK_PACK(1, maxlen);
194 	p = *pp;
195 	if (x <= POS_1BYTE_MAX)
196 		*p++ = POS_1BYTE_MARKER | GET_BITS(x, 6, 0);
197 	else if (x <= POS_2BYTE_MAX) {
198 		WT_SIZE_CHECK_PACK(2, maxlen);
199 		x -= POS_1BYTE_MAX + 1;
200 		*p++ = POS_2BYTE_MARKER | GET_BITS(x, 13, 8);
201 		*p++ = GET_BITS(x, 8, 0);
202 	} else if (x == POS_2BYTE_MAX + 1) {
203 		/*
204 		 * This is a special case where we could store the value with
205 		 * just a single byte, but we append a zero byte so that the
206 		 * encoding doesn't get shorter for this one value.
207 		 */
208 		*p++ = POS_MULTI_MARKER | 0x1;
209 		*p++ = 0;
210 	} else {
211 		x -= POS_2BYTE_MAX + 1;
212 		*p = POS_MULTI_MARKER;
213 		return (__wt_vpack_posint(pp, maxlen, x));
214 	}
215 
216 	*pp = p;
217 	return (0);
218 }
219 
220 /*
221  * __wt_vpack_int --
222  *	Variable-sized packing for signed integers
223  */
224 static inline int
__wt_vpack_int(uint8_t ** pp,size_t maxlen,int64_t x)225 __wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x)
226 {
227 	uint8_t *p;
228 
229 	WT_SIZE_CHECK_PACK(1, maxlen);
230 	p = *pp;
231 	if (x < NEG_2BYTE_MIN) {
232 		*p = NEG_MULTI_MARKER;
233 		return (__wt_vpack_negint(pp, maxlen, (uint64_t)x));
234 	}
235 	if (x < NEG_1BYTE_MIN) {
236 		WT_SIZE_CHECK_PACK(2, maxlen);
237 		x -= NEG_2BYTE_MIN;
238 		*p++ = NEG_2BYTE_MARKER | GET_BITS(x, 13, 8);
239 		*p++ = GET_BITS(x, 8, 0);
240 	} else if (x < 0) {
241 		x -= NEG_1BYTE_MIN;
242 		*p++ = NEG_1BYTE_MARKER | GET_BITS(x, 6, 0);
243 	} else
244 		/* For non-negative values, use the unsigned code above. */
245 		return (__wt_vpack_uint(pp, maxlen, (uint64_t)x));
246 
247 	*pp = p;
248 	return (0);
249 }
250 
251 /*
252  * __wt_vunpack_uint --
253  *	Variable-sized unpacking for unsigned integers
254  */
255 static inline int
__wt_vunpack_uint(const uint8_t ** pp,size_t maxlen,uint64_t * xp)256 __wt_vunpack_uint(const uint8_t **pp, size_t maxlen, uint64_t *xp)
257 {
258 	const uint8_t *p;
259 
260 	WT_SIZE_CHECK_UNPACK(1, maxlen);
261 	p = *pp;
262 	switch (*p & 0xf0) {
263 	case POS_1BYTE_MARKER:
264 	case POS_1BYTE_MARKER | 0x10:
265 	case POS_1BYTE_MARKER | 0x20:
266 	case POS_1BYTE_MARKER | 0x30:
267 		*xp = GET_BITS(*p, 6, 0);
268 		p += 1;
269 		break;
270 	case POS_2BYTE_MARKER:
271 	case POS_2BYTE_MARKER | 0x10:
272 		WT_SIZE_CHECK_UNPACK(2, maxlen);
273 		*xp = GET_BITS(*p++, 5, 0) << 8;
274 		*xp |= *p++;
275 		*xp += POS_1BYTE_MAX + 1;
276 		break;
277 	case POS_MULTI_MARKER:
278 		WT_RET(__wt_vunpack_posint(pp, maxlen, xp));
279 		*xp += POS_2BYTE_MAX + 1;
280 		return (0);
281 	default:
282 		return (EINVAL);
283 	}
284 
285 	*pp = p;
286 	return (0);
287 }
288 
289 /*
290  * __wt_vunpack_int --
291  *	Variable-sized packing for signed integers
292  */
293 static inline int
__wt_vunpack_int(const uint8_t ** pp,size_t maxlen,int64_t * xp)294 __wt_vunpack_int(const uint8_t **pp, size_t maxlen, int64_t *xp)
295 {
296 	const uint8_t *p;
297 
298 	WT_SIZE_CHECK_UNPACK(1, maxlen);
299 	p = *pp;
300 	switch (*p & 0xf0) {
301 	case NEG_MULTI_MARKER:
302 		WT_RET(__wt_vunpack_negint(pp, maxlen, (uint64_t *)xp));
303 		return (0);
304 	case NEG_2BYTE_MARKER:
305 	case NEG_2BYTE_MARKER | 0x10:
306 		WT_SIZE_CHECK_UNPACK(2, maxlen);
307 		*xp = (int64_t)(GET_BITS(*p++, 5, 0) << 8);
308 		*xp |= *p++;
309 		*xp += NEG_2BYTE_MIN;
310 		break;
311 	case NEG_1BYTE_MARKER:
312 	case NEG_1BYTE_MARKER | 0x10:
313 	case NEG_1BYTE_MARKER | 0x20:
314 	case NEG_1BYTE_MARKER | 0x30:
315 		*xp = NEG_1BYTE_MIN + (int64_t)GET_BITS(*p, 6, 0);
316 		p += 1;
317 		break;
318 	default:
319 		/* Identical to the unsigned case. */
320 		return (__wt_vunpack_uint(pp, maxlen, (uint64_t *)xp));
321 	}
322 
323 	*pp = p;
324 	return (0);
325 }
326 
327 /*
328  * __wt_vsize_posint --
329  *	Return the packed size of a positive variable-length integer.
330  */
331 static inline size_t
__wt_vsize_posint(uint64_t x)332 __wt_vsize_posint(uint64_t x)
333 {
334 	int lz;
335 
336 	WT_LEADING_ZEROS(x, lz);
337 	return ((size_t)(WT_INTPACK64_MAXSIZE - lz));
338 }
339 
340 /*
341  * __wt_vsize_negint --
342  *	Return the packed size of a negative variable-length integer.
343  */
344 static inline size_t
__wt_vsize_negint(uint64_t x)345 __wt_vsize_negint(uint64_t x)
346 {
347 	int lz;
348 
349 	WT_LEADING_ZEROS(~x, lz);
350 	return (size_t)(WT_INTPACK64_MAXSIZE - lz);
351 }
352 
353 /*
354  * __wt_vsize_uint --
355  *	Return the packed size of an unsigned integer.
356  */
357 static inline size_t
__wt_vsize_uint(uint64_t x)358 __wt_vsize_uint(uint64_t x)
359 {
360 	if (x <= POS_1BYTE_MAX)
361 		return (1);
362 	if (x <= POS_2BYTE_MAX + 1)
363 		return (2);
364 	x -= POS_2BYTE_MAX + 1;
365 	return (__wt_vsize_posint(x));
366 }
367 
368 /*
369  * __wt_vsize_int --
370  *	Return the packed size of a signed integer.
371  */
372 static inline size_t
__wt_vsize_int(int64_t x)373 __wt_vsize_int(int64_t x)
374 {
375 	if (x < NEG_2BYTE_MIN)
376 		return (__wt_vsize_negint((uint64_t)x));
377 	if (x < NEG_1BYTE_MIN)
378 		return (2);
379 	if (x < 0)
380 		return (1);
381 	/* For non-negative values, use the unsigned code above. */
382 	return (__wt_vsize_uint((uint64_t)x));
383 }
384