1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <type_traits>
20 
21 #include <folly/Conv.h>
22 #include <folly/Expected.h>
23 #include <folly/Likely.h>
24 #include <folly/Portability.h>
25 #include <folly/Range.h>
26 
27 namespace folly {
28 
29 /**
30  * Variable-length integer encoding, using a little-endian, base-128
31  * representation.
32  *
33  * The MSb is set on all bytes except the last.
34  *
35  * Details:
36  * https://developers.google.com/protocol-buffers/docs/encoding#varints
37  *
38  * If you want to encode multiple values, GroupVarint (in GroupVarint.h)
39  * is faster and likely smaller.
40  */
41 
42 /**
43  * Maximum length (in bytes) of the varint encoding of a 32-bit value.
44  */
45 constexpr size_t kMaxVarintLength32 = 5;
46 
47 /**
48  * Maximum length (in bytes) of the varint encoding of a 64-bit value.
49  */
50 constexpr size_t kMaxVarintLength64 = 10;
51 
52 /**
53  * Encode a value in the given buffer, returning the number of bytes used
54  * for encoding.
55  * buf must have enough space to represent the value (at least
56  * kMaxVarintLength64 bytes to encode arbitrary 64-bit values)
57  */
58 size_t encodeVarint(uint64_t val, uint8_t* buf);
59 
60 /**
61  * Determine the number of bytes needed to represent "val".
62  * 32-bit values need at most 5 bytes.
63  * 64-bit values need at most 10 bytes.
64  */
65 int encodeVarintSize(uint64_t val);
66 
67 /**
68  * Decode a value from a given buffer, advances data past the returned value.
69  * Throws on error.
70  */
71 template <class T>
72 uint64_t decodeVarint(Range<T*>& data);
73 
74 enum class DecodeVarintError {
75   TooManyBytes = 0,
76   TooFewBytes = 1,
77 };
78 
79 /**
80  * A variant of decodeVarint() that does not throw on error. Useful in contexts
81  * where only part of a serialized varint may be attempted to be decoded, e.g.,
82  * when a serialized varint arrives on the boundary of a network packet.
83  */
84 template <class T>
85 Expected<uint64_t, DecodeVarintError> tryDecodeVarint(Range<T*>& data);
86 
87 /**
88  * ZigZag encoding that maps signed integers with a small absolute value
89  * to unsigned integers with a small (positive) values. Without this,
90  * encoding negative values using Varint would use up 9 or 10 bytes.
91  *
92  * if x >= 0, encodeZigZag(x) == 2*x
93  * if x <  0, encodeZigZag(x) == -2*x + 1
94  */
95 
encodeZigZag(int64_t val)96 inline uint64_t encodeZigZag(int64_t val) {
97   // Bit-twiddling magic stolen from the Google protocol buffer document;
98   // val >> 63 is an arithmetic shift because val is signed
99   auto uval = static_cast<uint64_t>(val);
100   return static_cast<uint64_t>((uval << 1) ^ (val >> 63));
101 }
102 
decodeZigZag(uint64_t val)103 inline int64_t decodeZigZag(uint64_t val) {
104   return static_cast<int64_t>((val >> 1) ^ -(val & 1));
105 }
106 
107 // Implementation below
108 
encodeVarint(uint64_t val,uint8_t * buf)109 inline size_t encodeVarint(uint64_t val, uint8_t* buf) {
110   uint8_t* p = buf;
111   while (val >= 128) {
112     *p++ = 0x80 | (val & 0x7f);
113     val >>= 7;
114   }
115   *p++ = uint8_t(val);
116   return size_t(p - buf);
117 }
118 
encodeVarintSize(uint64_t val)119 inline int encodeVarintSize(uint64_t val) {
120   if (folly::kIsArchAmd64) {
121     // __builtin_clzll is undefined for 0
122     int highBit = 64 - __builtin_clzll(val | 1);
123     return (highBit + 6) / 7;
124   } else {
125     int s = 1;
126     while (val >= 128) {
127       ++s;
128       val >>= 7;
129     }
130     return s;
131   }
132 }
133 
134 template <class T>
decodeVarint(Range<T * > & data)135 inline uint64_t decodeVarint(Range<T*>& data) {
136   auto expected = tryDecodeVarint(data);
137   if (!expected) {
138     throw std::invalid_argument(
139         expected.error() == DecodeVarintError::TooManyBytes
140             ? "Invalid varint value: too many bytes."
141             : "Invalid varint value: too few bytes.");
142   }
143   return *expected;
144 }
145 
146 template <class T>
tryDecodeVarint(Range<T * > & data)147 inline Expected<uint64_t, DecodeVarintError> tryDecodeVarint(Range<T*>& data) {
148   static_assert(
149       std::is_same<typename std::remove_cv<T>::type, char>::value ||
150           std::is_same<typename std::remove_cv<T>::type, unsigned char>::value,
151       "Only character ranges are supported");
152 
153   const int8_t* begin = reinterpret_cast<const int8_t*>(data.begin());
154   const int8_t* end = reinterpret_cast<const int8_t*>(data.end());
155   const int8_t* p = begin;
156   uint64_t val = 0;
157 
158   // end is always greater than or equal to begin, so this subtraction is safe
159   if (LIKELY(size_t(end - begin) >= kMaxVarintLength64)) { // fast path
160     int64_t b;
161     do {
162       b = *p++;
163       val = (b & 0x7f);
164       if (b >= 0) {
165         break;
166       }
167       b = *p++;
168       val |= (b & 0x7f) << 7;
169       if (b >= 0) {
170         break;
171       }
172       b = *p++;
173       val |= (b & 0x7f) << 14;
174       if (b >= 0) {
175         break;
176       }
177       b = *p++;
178       val |= (b & 0x7f) << 21;
179       if (b >= 0) {
180         break;
181       }
182       b = *p++;
183       val |= (b & 0x7f) << 28;
184       if (b >= 0) {
185         break;
186       }
187       b = *p++;
188       val |= (b & 0x7f) << 35;
189       if (b >= 0) {
190         break;
191       }
192       b = *p++;
193       val |= (b & 0x7f) << 42;
194       if (b >= 0) {
195         break;
196       }
197       b = *p++;
198       val |= (b & 0x7f) << 49;
199       if (b >= 0) {
200         break;
201       }
202       b = *p++;
203       val |= (b & 0x7f) << 56;
204       if (b >= 0) {
205         break;
206       }
207       b = *p++;
208       val |= (b & 0x01) << 63;
209       if (b >= 0) {
210         break;
211       }
212       return makeUnexpected(DecodeVarintError::TooManyBytes);
213     } while (false);
214   } else {
215     int shift = 0;
216     while (p != end && *p < 0) {
217       val |= static_cast<uint64_t>(*p++ & 0x7f) << shift;
218       shift += 7;
219     }
220     if (p == end) {
221       return makeUnexpected(DecodeVarintError::TooFewBytes);
222     }
223     val |= static_cast<uint64_t>(*p++) << shift;
224   }
225 
226   data.uncheckedAdvance(p - begin);
227   return val;
228 }
229 
230 } // namespace folly
231