1 /*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #pragma once
18
19 #include <cstdint>
20 #include <stdexcept>
21
22 #include <folly/GLog.h>
23 #include <folly/Utility.h>
24 #include <folly/lang/Assume.h>
25 #include <thrift/lib/cpp/protocol/TProtocol.h>
26
27 namespace apache {
28 namespace thrift {
29
30 namespace detail {
31 namespace nimble {
32
33 using protocol::TType;
34
35 // FIELD METADATA AND TYPE ENCODING
36 // There are two encodings we use to encode (field-id, nimble-type) pairs.
37 //
38 // Note:
39 // Below, we'll talk about "adjusted field ids". The adjustment is just
40 // subtracting 1, so that we don't waste an encoding on a field ID of 0.
41 //
42 // COMPACT ENCODING:
43 // If the adjusted field id is < 32, then we use a single byte to encode it. The
44 // low 3 bits store the type (this avoids ever emitting a 0 byte, which we
45 // can then use as the "end of struct" marker), and the high 5 bits encode the
46 // adjusted field ID.
47 //
48 // LARGE-FIELD-ID ENCODING:
49 // Otherwise, we'll use a multiple bytes to encode it. The low 3 bits of the
50 // first byte will be 1 (which would correspond to an encoded type of INVALID),
51 // and the high 3 bits will encode the field type.
52 //
53 // If the adjusted field ID fits in 1 byte, we set the fourth lowest bit to 0
54 // and set the next byte to the adjusted field ID. Otherwise, we set the fourth
55 // lowest bit to 1 and set the next 2 bytes to the adjusted field ID, in
56 // little-endian order.
57 //
58 // CONTAINERS
59 // Container types get 1 extra byte, to indicate element types. For lists, the
60 // element type lives in the low 3 bits. For maps, the key type lives in the
61 // low 3 bits and the value type lives in the high 3 bits.
62
63 // The NimbleType is all we see on the wire. (We include explicit enum values to
64 // emphasize that this is wire-visible; it's not safe to change order without
65 // keeping the value).
66 enum class NimbleType {
67 // An end-of-struct "STOP" field.
68 STOP = 0,
69 // Primitive types; those that cannot contain any other types. In particular,
70 // primitive types can use the 1-byte-field-metadata encoding scheme.
71 ONE_CHUNK = 1,
72 TWO_CHUNK = 2,
73 STRING = 3,
74 // Complex types; those that *can* contain other types (and therefore a
75 // variable-length number of field stream bytes).
76 STRUCT = 4,
77 LIST = 5,
78 MAP = 6,
79 // We sometimes obtain a NimbleType by masking off 3 bits. The value 7,
80 // though, does not correspond to any type. In field ID encoding, it should
81 // indicate a multi-byte encoding. In (e.g.) map or list encoding, it's just
82 // invalid data.
83 INVALID = 7,
84 };
85
86 // For byte the first byte of some field ID, determine whether or not it uses
87 // the compact 1-byte encoding;
isCompactMetadata(std::uint8_t byte)88 inline bool isCompactMetadata(std::uint8_t byte) {
89 return (byte & 7) != 7;
90 }
91
nimbleTypeFromCompactMetadata(std::uint8_t byte)92 inline NimbleType nimbleTypeFromCompactMetadata(std::uint8_t byte) {
93 DCHECK(isCompactMetadata(byte));
94 return (NimbleType)(byte & 7);
95 }
96
fieldIdFromCompactMetadata(std::uint8_t byte)97 inline std::int16_t fieldIdFromCompactMetadata(std::uint8_t byte) {
98 DCHECK(isCompactMetadata(byte));
99 return (byte >> 3) + 1;
100 }
101
nimbleTypeFromLargeMetadata(std::uint8_t byte1)102 inline NimbleType nimbleTypeFromLargeMetadata(std::uint8_t byte1) {
103 DCHECK(!isCompactMetadata(byte1));
104 return (NimbleType)(byte1 >> 5);
105 }
106
isLargeMetadataTwoByte(std::uint8_t byte1)107 inline bool isLargeMetadataTwoByte(std::uint8_t byte1) {
108 DCHECK(!isCompactMetadata(byte1));
109 return (byte1 & (1 << 3)) == 0;
110 }
111
fieldIdFromTwoByteMetadata(std::uint8_t byte1,std::uint8_t byte2)112 inline std::uint16_t fieldIdFromTwoByteMetadata(
113 std::uint8_t byte1, std::uint8_t byte2) {
114 DCHECK(isLargeMetadataTwoByte(byte1));
115 return byte2 + 1;
116 }
117
fieldIdFromThreeByteMetadata(std::uint8_t byte1,std::uint16_t short_)118 inline std::uint16_t fieldIdFromThreeByteMetadata(
119 std::uint8_t byte1, std::uint16_t short_) {
120 DCHECK(!isLargeMetadataTwoByte(byte1));
121 return short_ + 1;
122 }
123
ttypeToNimbleType(TType ttype)124 inline NimbleType ttypeToNimbleType(TType ttype) {
125 // Don't worry about the performance of this switch; this function is only
126 // ever called when its type is a compile-time constant.
127 switch (ttype) {
128 case TType::T_STOP:
129 return NimbleType::STOP;
130 case TType::T_BOOL:
131 return NimbleType::ONE_CHUNK;
132 case TType::T_BYTE: // == TType::T_I08
133 return NimbleType::ONE_CHUNK;
134 case TType::T_DOUBLE:
135 return NimbleType::TWO_CHUNK;
136 case TType::T_I16:
137 return NimbleType::ONE_CHUNK;
138 case TType::T_I32:
139 return NimbleType::ONE_CHUNK;
140 case TType::T_U64:
141 return NimbleType::TWO_CHUNK;
142 case TType::T_I64:
143 return NimbleType::TWO_CHUNK;
144 case TType::T_STRING:
145 return NimbleType::STRING;
146 case TType::T_STRUCT:
147 return NimbleType::STRUCT;
148 case TType::T_MAP:
149 return NimbleType::MAP;
150 case TType::T_SET:
151 return NimbleType::LIST;
152 case TType::T_LIST:
153 return NimbleType::LIST;
154 case TType::T_UTF8:
155 return NimbleType::STRING;
156 case TType::T_UTF16:
157 return NimbleType::STRING;
158 case TType::T_FLOAT:
159 return NimbleType::ONE_CHUNK;
160 default:
161 // A TType never comes in off the wire (it couldn't; we encode Nimble
162 // types on the wire).
163 folly::assume_unreachable();
164 }
165 }
166
167 struct FieldBytes {
FieldBytesFieldBytes168 FieldBytes() : len(0), bytes{0, 0, 0} {}
169 std::size_t len;
170 std::uint8_t bytes[3];
171 };
172
stopBytes()173 inline FieldBytes stopBytes() {
174 FieldBytes result;
175 result.len = 1;
176 result.bytes[0] = 0;
177 return result;
178 }
179
mapBeginByte(NimbleType key,NimbleType value)180 inline FieldBytes mapBeginByte(NimbleType key, NimbleType value) {
181 FieldBytes result;
182 result.len = 1;
183 result.bytes[0] = static_cast<std::uint8_t>((int)key | ((int)value << 5));
184 return result;
185 }
186
187 // We take a reference (instead of returning the type directly) to match the map
188 // and protocol interface equivalents.
listTypeFromByte(std::uint8_t byte,NimbleType & elem)189 inline void listTypeFromByte(std::uint8_t byte, NimbleType& elem) {
190 elem = (NimbleType)(byte & 7);
191 }
192
mapTypesFromByte(std::uint8_t byte,NimbleType & key,NimbleType & val)193 inline void mapTypesFromByte(
194 std::uint8_t byte, NimbleType& key, NimbleType& val) {
195 key = (NimbleType)(byte & 7);
196 val = (NimbleType)(byte >> 5);
197 }
198
listBeginByte(NimbleType elem)199 inline FieldBytes listBeginByte(NimbleType elem) {
200 FieldBytes result;
201 result.len = 1;
202 result.bytes[0] = static_cast<std::uint8_t>((int)elem);
203 return result;
204 }
205
206 // This is always called with static values, in readNoXfer. It compiles out;
207 // there's no code-size risk in inlining it.
208 FOLLY_ALWAYS_INLINE
fieldBeginBytes(NimbleType type,std::uint16_t fieldId)209 FieldBytes fieldBeginBytes(NimbleType type, std::uint16_t fieldId) {
210 // This is only called with trusted values, never a type off the wire; that
211 // type should always be valid.
212 DCHECK(type != NimbleType::INVALID);
213
214 FieldBytes result;
215
216 if (type == NimbleType::STOP) {
217 result.len = 1;
218 result.bytes[0] = 0;
219 return result;
220 }
221
222 std::uint16_t adjustedFieldId = fieldId - 1;
223 if (adjustedFieldId < 32) {
224 result.len = 1;
225 result.bytes[0] =
226 static_cast<std::uint8_t>((adjustedFieldId << 3) | (int)type);
227 } else {
228 std::uint8_t lengthBit;
229 if (adjustedFieldId < 256) {
230 lengthBit = 0;
231 result.len = 2;
232 } else {
233 lengthBit = (1 << 3);
234 result.len = 3;
235 }
236 std::uint8_t lowTypeBits =
237 static_cast<std::uint8_t>((int)NimbleType::INVALID);
238 std::uint8_t highTypeBits = static_cast<std::uint8_t>((int)type << 5);
239 result.bytes[0] = lowTypeBits | highTypeBits | lengthBit;
240 result.bytes[1] = adjustedFieldId & 0xFF;
241 result.bytes[2] = adjustedFieldId >> 8;
242 };
243 return result;
244 }
245
246 } // namespace nimble
247 } // namespace detail
248 } // namespace thrift
249 } // namespace apache
250