1 /*-
2 * Copyright (c) 2014-2018 MongoDB, Inc.
3 * Copyright (c) 2008-2014 WiredTiger, Inc.
4 * All rights reserved.
5 *
6 * See the file LICENSE for redistribution information.
7 */
8
9 /*
10 * Variable-length integer encoding.
11 * We need up to 64 bits, signed and unsigned. Further, we want the packed
12 * representation to have the same lexicographic ordering as the integer
13 * values. This avoids the need for special-purpose comparison code.
14 *
15 * Try hard to keep small values small (up to ~2 bytes): that gives the biggest
16 * benefit for common cases storing small values. After that, just encode the
17 * length in the first byte: we could squeeze in a couple of extra bits, but
18 * the marginal benefit is small, and we want this code to be relatively
19 * easy to implement in client code or scripting APIs.
20 *
21 * First byte | Next | |
22 * byte | bytes| Min Value | Max Value
23 * ------------+------+------------------------+--------------------------------
24 * [00 00xxxx] | free | N/A | N/A
25 * [00 01llll] | llll | -2^64 | -2^13 - 2^6
26 * [00 1xxxxx] | 1 | -2^13 - 2^6 | -2^6 - 1
27 * [01 xxxxxx] | 0 | -2^6 | -1
28 * [10 xxxxxx] | 0 | 0 | 2^6 - 1
29 * [11 0xxxxx] | 1 | 2^6 | 2^13 + 2^6 - 1
30 * [11 10llll] | llll | 2^13 + 2^6 | 2^64 - 1
31 * [11 11xxxx] | free | N/A | N/A
32 */
33
34 #define NEG_MULTI_MARKER (uint8_t)0x10
35 #define NEG_2BYTE_MARKER (uint8_t)0x20
36 #define NEG_1BYTE_MARKER (uint8_t)0x40
37 #define POS_1BYTE_MARKER (uint8_t)0x80
38 #define POS_2BYTE_MARKER (uint8_t)0xc0
39 #define POS_MULTI_MARKER (uint8_t)0xe0
40
41 #define NEG_1BYTE_MIN (-(1 << 6))
42 #define NEG_2BYTE_MIN (-(1 << 13) + NEG_1BYTE_MIN)
43 #define POS_1BYTE_MAX ((1 << 6) - 1)
44 #define POS_2BYTE_MAX ((1 << 13) + POS_1BYTE_MAX)
45
46 /* Extract bits <start> to <end> from a value (counting from LSB == 0). */
47 #define GET_BITS(x, start, end) \
48 (((uint64_t)(x) & ((1U << (start)) - 1U)) >> (end))
49
50 /*
51 * Size checks: return ENOMEM if not enough room when writing, EINVAL if the
52 * length is wrong when reading (presumably the value is corrupted).
53 */
54 #define WT_SIZE_CHECK_PACK(l, maxl) \
55 WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), ENOMEM)
56 #define WT_SIZE_CHECK_UNPACK(l, maxl) \
57 WT_RET_TEST((maxl) != 0 && (size_t)(l) > (maxl), EINVAL)
58
59 /* Count the leading zero bytes. */
60 #if defined(__GNUC__)
61 #define WT_LEADING_ZEROS(x, i) \
62 ((i) = ((x) == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3)
63 #elif defined(_MSC_VER)
64 #define WT_LEADING_ZEROS(x, i) do { \
65 if ((x) == 0) (i) = (int)sizeof(x); \
66 else { \
67 unsigned long __index; \
68 _BitScanReverse64(&__index, x); \
69 __index = 63 ^ __index; \
70 (i) = (int)(__index >> 3); } \
71 } while (0)
72 #else
73 #define WT_LEADING_ZEROS(x, i) do { \
74 uint64_t __x = (x); \
75 uint64_t __m = (uint64_t)0xff << 56; \
76 for ((i) = 0; !(__x & __m) && (i) != 8; (i)++) \
77 __m >>= 8; \
78 } while (0)
79 #endif
80
81 /*
82 * __wt_vpack_posint --
83 * Packs a positive variable-length integer in the specified location.
84 */
85 static inline int
__wt_vpack_posint(uint8_t ** pp,size_t maxlen,uint64_t x)86 __wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x)
87 {
88 uint8_t *p;
89 int len, lz, shift;
90
91 WT_LEADING_ZEROS(x, lz);
92 len = (int)sizeof(x) - lz;
93 WT_SIZE_CHECK_PACK(len + 1, maxlen);
94 p = *pp;
95
96 /* There are four bits we can use in the first byte. */
97 *p++ |= (len & 0xf);
98
99 for (shift = (len - 1) << 3; len != 0; --len, shift -= 8)
100 *p++ = (uint8_t)(x >> shift);
101
102 *pp = p;
103 return (0);
104 }
105
106 /*
107 * __wt_vpack_negint --
108 * Packs a negative variable-length integer in the specified location.
109 */
110 static inline int
__wt_vpack_negint(uint8_t ** pp,size_t maxlen,uint64_t x)111 __wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x)
112 {
113 uint8_t *p;
114 int len, lz, shift;
115
116 WT_LEADING_ZEROS(~x, lz);
117 len = (int)sizeof(x) - lz;
118 WT_SIZE_CHECK_PACK(len + 1, maxlen);
119 p = *pp;
120
121 /*
122 * There are four size bits we can use in the first byte.
123 * For negative numbers, we store the number of leading 0xff bytes
124 * to maintain ordering (if this is not obvious, it may help to
125 * remember that -1 is the largest negative number).
126 */
127 *p++ |= (lz & 0xf);
128
129 for (shift = (len - 1) << 3; len != 0; shift -= 8, --len)
130 *p++ = (uint8_t)(x >> shift);
131
132 *pp = p;
133 return (0);
134 }
135
136 /*
137 * __wt_vunpack_posint --
138 * Reads a variable-length positive integer from the specified location.
139 */
140 static inline int
__wt_vunpack_posint(const uint8_t ** pp,size_t maxlen,uint64_t * retp)141 __wt_vunpack_posint(const uint8_t **pp, size_t maxlen, uint64_t *retp)
142 {
143 uint64_t x;
144 uint8_t len;
145 const uint8_t *p;
146
147 /* There are four length bits in the first byte. */
148 p = *pp;
149 len = (*p++ & 0xf);
150 WT_SIZE_CHECK_UNPACK(len + 1, maxlen);
151
152 for (x = 0; len != 0; --len)
153 x = (x << 8) | *p++;
154
155 *retp = x;
156 *pp = p;
157 return (0);
158 }
159
160 /*
161 * __wt_vunpack_negint --
162 * Reads a variable-length negative integer from the specified location.
163 */
164 static inline int
__wt_vunpack_negint(const uint8_t ** pp,size_t maxlen,uint64_t * retp)165 __wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp)
166 {
167 uint64_t x;
168 uint8_t len;
169 const uint8_t *p;
170
171 /* There are four length bits in the first byte. */
172 p = *pp;
173 len = (int)sizeof(x) - (*p++ & 0xf);
174 WT_SIZE_CHECK_UNPACK(len + 1, maxlen);
175
176 for (x = UINT64_MAX; len != 0; --len)
177 x = (x << 8) | *p++;
178
179 *retp = x;
180 *pp = p;
181 return (0);
182 }
183
184 /*
185 * __wt_vpack_uint --
186 * Variable-sized packing for unsigned integers
187 */
188 static inline int
__wt_vpack_uint(uint8_t ** pp,size_t maxlen,uint64_t x)189 __wt_vpack_uint(uint8_t **pp, size_t maxlen, uint64_t x)
190 {
191 uint8_t *p;
192
193 WT_SIZE_CHECK_PACK(1, maxlen);
194 p = *pp;
195 if (x <= POS_1BYTE_MAX)
196 *p++ = POS_1BYTE_MARKER | GET_BITS(x, 6, 0);
197 else if (x <= POS_2BYTE_MAX) {
198 WT_SIZE_CHECK_PACK(2, maxlen);
199 x -= POS_1BYTE_MAX + 1;
200 *p++ = POS_2BYTE_MARKER | GET_BITS(x, 13, 8);
201 *p++ = GET_BITS(x, 8, 0);
202 } else if (x == POS_2BYTE_MAX + 1) {
203 /*
204 * This is a special case where we could store the value with
205 * just a single byte, but we append a zero byte so that the
206 * encoding doesn't get shorter for this one value.
207 */
208 *p++ = POS_MULTI_MARKER | 0x1;
209 *p++ = 0;
210 } else {
211 x -= POS_2BYTE_MAX + 1;
212 *p = POS_MULTI_MARKER;
213 return (__wt_vpack_posint(pp, maxlen, x));
214 }
215
216 *pp = p;
217 return (0);
218 }
219
220 /*
221 * __wt_vpack_int --
222 * Variable-sized packing for signed integers
223 */
224 static inline int
__wt_vpack_int(uint8_t ** pp,size_t maxlen,int64_t x)225 __wt_vpack_int(uint8_t **pp, size_t maxlen, int64_t x)
226 {
227 uint8_t *p;
228
229 WT_SIZE_CHECK_PACK(1, maxlen);
230 p = *pp;
231 if (x < NEG_2BYTE_MIN) {
232 *p = NEG_MULTI_MARKER;
233 return (__wt_vpack_negint(pp, maxlen, (uint64_t)x));
234 }
235 if (x < NEG_1BYTE_MIN) {
236 WT_SIZE_CHECK_PACK(2, maxlen);
237 x -= NEG_2BYTE_MIN;
238 *p++ = NEG_2BYTE_MARKER | GET_BITS(x, 13, 8);
239 *p++ = GET_BITS(x, 8, 0);
240 } else if (x < 0) {
241 x -= NEG_1BYTE_MIN;
242 *p++ = NEG_1BYTE_MARKER | GET_BITS(x, 6, 0);
243 } else
244 /* For non-negative values, use the unsigned code above. */
245 return (__wt_vpack_uint(pp, maxlen, (uint64_t)x));
246
247 *pp = p;
248 return (0);
249 }
250
251 /*
252 * __wt_vunpack_uint --
253 * Variable-sized unpacking for unsigned integers
254 */
255 static inline int
__wt_vunpack_uint(const uint8_t ** pp,size_t maxlen,uint64_t * xp)256 __wt_vunpack_uint(const uint8_t **pp, size_t maxlen, uint64_t *xp)
257 {
258 const uint8_t *p;
259
260 WT_SIZE_CHECK_UNPACK(1, maxlen);
261 p = *pp;
262 switch (*p & 0xf0) {
263 case POS_1BYTE_MARKER:
264 case POS_1BYTE_MARKER | 0x10:
265 case POS_1BYTE_MARKER | 0x20:
266 case POS_1BYTE_MARKER | 0x30:
267 *xp = GET_BITS(*p, 6, 0);
268 p += 1;
269 break;
270 case POS_2BYTE_MARKER:
271 case POS_2BYTE_MARKER | 0x10:
272 WT_SIZE_CHECK_UNPACK(2, maxlen);
273 *xp = GET_BITS(*p++, 5, 0) << 8;
274 *xp |= *p++;
275 *xp += POS_1BYTE_MAX + 1;
276 break;
277 case POS_MULTI_MARKER:
278 WT_RET(__wt_vunpack_posint(pp, maxlen, xp));
279 *xp += POS_2BYTE_MAX + 1;
280 return (0);
281 default:
282 return (EINVAL);
283 }
284
285 *pp = p;
286 return (0);
287 }
288
289 /*
290 * __wt_vunpack_int --
291 * Variable-sized packing for signed integers
292 */
293 static inline int
__wt_vunpack_int(const uint8_t ** pp,size_t maxlen,int64_t * xp)294 __wt_vunpack_int(const uint8_t **pp, size_t maxlen, int64_t *xp)
295 {
296 const uint8_t *p;
297
298 WT_SIZE_CHECK_UNPACK(1, maxlen);
299 p = *pp;
300 switch (*p & 0xf0) {
301 case NEG_MULTI_MARKER:
302 WT_RET(__wt_vunpack_negint(pp, maxlen, (uint64_t *)xp));
303 return (0);
304 case NEG_2BYTE_MARKER:
305 case NEG_2BYTE_MARKER | 0x10:
306 WT_SIZE_CHECK_UNPACK(2, maxlen);
307 *xp = (int64_t)(GET_BITS(*p++, 5, 0) << 8);
308 *xp |= *p++;
309 *xp += NEG_2BYTE_MIN;
310 break;
311 case NEG_1BYTE_MARKER:
312 case NEG_1BYTE_MARKER | 0x10:
313 case NEG_1BYTE_MARKER | 0x20:
314 case NEG_1BYTE_MARKER | 0x30:
315 *xp = NEG_1BYTE_MIN + (int64_t)GET_BITS(*p, 6, 0);
316 p += 1;
317 break;
318 default:
319 /* Identical to the unsigned case. */
320 return (__wt_vunpack_uint(pp, maxlen, (uint64_t *)xp));
321 }
322
323 *pp = p;
324 return (0);
325 }
326
327 /*
328 * __wt_vsize_posint --
329 * Return the packed size of a positive variable-length integer.
330 */
331 static inline size_t
__wt_vsize_posint(uint64_t x)332 __wt_vsize_posint(uint64_t x)
333 {
334 int lz;
335
336 WT_LEADING_ZEROS(x, lz);
337 return ((size_t)(WT_INTPACK64_MAXSIZE - lz));
338 }
339
340 /*
341 * __wt_vsize_negint --
342 * Return the packed size of a negative variable-length integer.
343 */
344 static inline size_t
__wt_vsize_negint(uint64_t x)345 __wt_vsize_negint(uint64_t x)
346 {
347 int lz;
348
349 WT_LEADING_ZEROS(~x, lz);
350 return (size_t)(WT_INTPACK64_MAXSIZE - lz);
351 }
352
353 /*
354 * __wt_vsize_uint --
355 * Return the packed size of an unsigned integer.
356 */
357 static inline size_t
__wt_vsize_uint(uint64_t x)358 __wt_vsize_uint(uint64_t x)
359 {
360 if (x <= POS_1BYTE_MAX)
361 return (1);
362 if (x <= POS_2BYTE_MAX + 1)
363 return (2);
364 x -= POS_2BYTE_MAX + 1;
365 return (__wt_vsize_posint(x));
366 }
367
368 /*
369 * __wt_vsize_int --
370 * Return the packed size of a signed integer.
371 */
372 static inline size_t
__wt_vsize_int(int64_t x)373 __wt_vsize_int(int64_t x)
374 {
375 if (x < NEG_2BYTE_MIN)
376 return (__wt_vsize_negint((uint64_t)x));
377 if (x < NEG_1BYTE_MIN)
378 return (2);
379 if (x < 0)
380 return (1);
381 /* For non-negative values, use the unsigned code above. */
382 return (__wt_vsize_uint((uint64_t)x));
383 }
384