1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <cstdint>
20 #include <stdexcept>
21 
22 #include <folly/GLog.h>
23 #include <folly/Utility.h>
24 #include <folly/lang/Assume.h>
25 #include <thrift/lib/cpp/protocol/TProtocol.h>
26 
27 namespace apache {
28 namespace thrift {
29 
30 namespace detail {
31 namespace nimble {
32 
33 using protocol::TType;
34 
35 // FIELD METADATA AND TYPE ENCODING
36 // There are two encodings we use to encode (field-id, nimble-type) pairs.
37 //
38 // Note:
39 // Below, we'll talk about "adjusted field ids". The adjustment is just
40 // subtracting 1, so that we don't waste an encoding on a field ID of 0.
41 //
42 // COMPACT ENCODING:
43 // If the adjusted field id is < 32, then we use a single byte to encode it. The
44 // low 3 bits store the type (this avoids ever emitting a 0 byte, which we
45 // can then use as the "end of struct" marker), and the high 5 bits encode the
46 // adjusted field ID.
47 //
48 // LARGE-FIELD-ID ENCODING:
49 // Otherwise, we'll use a multiple bytes to encode it. The low 3 bits of the
50 // first byte will be 1 (which would correspond to an encoded type of INVALID),
51 // and the high 3 bits will encode the field type.
52 //
53 // If the adjusted field ID fits in 1 byte, we set the fourth lowest bit to 0
54 // and set the next byte to the adjusted field ID. Otherwise, we set the fourth
55 // lowest bit to 1 and set the next 2 bytes to the adjusted field ID, in
56 // little-endian order.
57 //
58 // CONTAINERS
59 // Container types get 1 extra byte, to indicate element types. For lists, the
60 // element type lives in the low 3 bits. For maps, the key type lives in the
61 // low 3 bits and the value type lives in the high 3 bits.
62 
63 // The NimbleType is all we see on the wire. (We include explicit enum values to
64 // emphasize that this is wire-visible; it's not safe to change order without
65 // keeping the value).
66 enum class NimbleType {
67   // An end-of-struct "STOP" field.
68   STOP = 0,
69   // Primitive types; those that cannot contain any other types. In particular,
70   // primitive types can use the 1-byte-field-metadata encoding scheme.
71   ONE_CHUNK = 1,
72   TWO_CHUNK = 2,
73   STRING = 3,
74   // Complex types; those that *can* contain other types (and therefore a
75   // variable-length number of field stream bytes).
76   STRUCT = 4,
77   LIST = 5,
78   MAP = 6,
79   // We sometimes obtain a NimbleType by masking off 3 bits. The value 7,
80   // though, does not correspond to any type. In field ID encoding, it should
81   // indicate a multi-byte encoding. In (e.g.) map or list encoding, it's just
82   // invalid data.
83   INVALID = 7,
84 };
85 
86 // For byte the first byte of some field ID, determine whether or not it uses
87 // the compact 1-byte encoding;
isCompactMetadata(std::uint8_t byte)88 inline bool isCompactMetadata(std::uint8_t byte) {
89   return (byte & 7) != 7;
90 }
91 
nimbleTypeFromCompactMetadata(std::uint8_t byte)92 inline NimbleType nimbleTypeFromCompactMetadata(std::uint8_t byte) {
93   DCHECK(isCompactMetadata(byte));
94   return (NimbleType)(byte & 7);
95 }
96 
fieldIdFromCompactMetadata(std::uint8_t byte)97 inline std::int16_t fieldIdFromCompactMetadata(std::uint8_t byte) {
98   DCHECK(isCompactMetadata(byte));
99   return (byte >> 3) + 1;
100 }
101 
nimbleTypeFromLargeMetadata(std::uint8_t byte1)102 inline NimbleType nimbleTypeFromLargeMetadata(std::uint8_t byte1) {
103   DCHECK(!isCompactMetadata(byte1));
104   return (NimbleType)(byte1 >> 5);
105 }
106 
isLargeMetadataTwoByte(std::uint8_t byte1)107 inline bool isLargeMetadataTwoByte(std::uint8_t byte1) {
108   DCHECK(!isCompactMetadata(byte1));
109   return (byte1 & (1 << 3)) == 0;
110 }
111 
fieldIdFromTwoByteMetadata(std::uint8_t byte1,std::uint8_t byte2)112 inline std::uint16_t fieldIdFromTwoByteMetadata(
113     std::uint8_t byte1, std::uint8_t byte2) {
114   DCHECK(isLargeMetadataTwoByte(byte1));
115   return byte2 + 1;
116 }
117 
fieldIdFromThreeByteMetadata(std::uint8_t byte1,std::uint16_t short_)118 inline std::uint16_t fieldIdFromThreeByteMetadata(
119     std::uint8_t byte1, std::uint16_t short_) {
120   DCHECK(!isLargeMetadataTwoByte(byte1));
121   return short_ + 1;
122 }
123 
ttypeToNimbleType(TType ttype)124 inline NimbleType ttypeToNimbleType(TType ttype) {
125   // Don't worry about the performance of this switch; this function is only
126   // ever called when its type is a compile-time constant.
127   switch (ttype) {
128     case TType::T_STOP:
129       return NimbleType::STOP;
130     case TType::T_BOOL:
131       return NimbleType::ONE_CHUNK;
132     case TType::T_BYTE: // == TType::T_I08
133       return NimbleType::ONE_CHUNK;
134     case TType::T_DOUBLE:
135       return NimbleType::TWO_CHUNK;
136     case TType::T_I16:
137       return NimbleType::ONE_CHUNK;
138     case TType::T_I32:
139       return NimbleType::ONE_CHUNK;
140     case TType::T_U64:
141       return NimbleType::TWO_CHUNK;
142     case TType::T_I64:
143       return NimbleType::TWO_CHUNK;
144     case TType::T_STRING:
145       return NimbleType::STRING;
146     case TType::T_STRUCT:
147       return NimbleType::STRUCT;
148     case TType::T_MAP:
149       return NimbleType::MAP;
150     case TType::T_SET:
151       return NimbleType::LIST;
152     case TType::T_LIST:
153       return NimbleType::LIST;
154     case TType::T_UTF8:
155       return NimbleType::STRING;
156     case TType::T_UTF16:
157       return NimbleType::STRING;
158     case TType::T_FLOAT:
159       return NimbleType::ONE_CHUNK;
160     default:
161       // A TType never comes in off the wire (it couldn't; we encode Nimble
162       // types on the wire).
163       folly::assume_unreachable();
164   }
165 }
166 
167 struct FieldBytes {
FieldBytesFieldBytes168   FieldBytes() : len(0), bytes{0, 0, 0} {}
169   std::size_t len;
170   std::uint8_t bytes[3];
171 };
172 
stopBytes()173 inline FieldBytes stopBytes() {
174   FieldBytes result;
175   result.len = 1;
176   result.bytes[0] = 0;
177   return result;
178 }
179 
mapBeginByte(NimbleType key,NimbleType value)180 inline FieldBytes mapBeginByte(NimbleType key, NimbleType value) {
181   FieldBytes result;
182   result.len = 1;
183   result.bytes[0] = static_cast<std::uint8_t>((int)key | ((int)value << 5));
184   return result;
185 }
186 
187 // We take a reference (instead of returning the type directly) to match the map
188 // and protocol interface equivalents.
listTypeFromByte(std::uint8_t byte,NimbleType & elem)189 inline void listTypeFromByte(std::uint8_t byte, NimbleType& elem) {
190   elem = (NimbleType)(byte & 7);
191 }
192 
mapTypesFromByte(std::uint8_t byte,NimbleType & key,NimbleType & val)193 inline void mapTypesFromByte(
194     std::uint8_t byte, NimbleType& key, NimbleType& val) {
195   key = (NimbleType)(byte & 7);
196   val = (NimbleType)(byte >> 5);
197 }
198 
listBeginByte(NimbleType elem)199 inline FieldBytes listBeginByte(NimbleType elem) {
200   FieldBytes result;
201   result.len = 1;
202   result.bytes[0] = static_cast<std::uint8_t>((int)elem);
203   return result;
204 }
205 
206 // This is always called with static values, in readNoXfer. It compiles out;
207 // there's no code-size risk in inlining it.
208 FOLLY_ALWAYS_INLINE
fieldBeginBytes(NimbleType type,std::uint16_t fieldId)209 FieldBytes fieldBeginBytes(NimbleType type, std::uint16_t fieldId) {
210   // This is only called with trusted values, never a type off the wire; that
211   // type should always be valid.
212   DCHECK(type != NimbleType::INVALID);
213 
214   FieldBytes result;
215 
216   if (type == NimbleType::STOP) {
217     result.len = 1;
218     result.bytes[0] = 0;
219     return result;
220   }
221 
222   std::uint16_t adjustedFieldId = fieldId - 1;
223   if (adjustedFieldId < 32) {
224     result.len = 1;
225     result.bytes[0] =
226         static_cast<std::uint8_t>((adjustedFieldId << 3) | (int)type);
227   } else {
228     std::uint8_t lengthBit;
229     if (adjustedFieldId < 256) {
230       lengthBit = 0;
231       result.len = 2;
232     } else {
233       lengthBit = (1 << 3);
234       result.len = 3;
235     }
236     std::uint8_t lowTypeBits =
237         static_cast<std::uint8_t>((int)NimbleType::INVALID);
238     std::uint8_t highTypeBits = static_cast<std::uint8_t>((int)type << 5);
239     result.bytes[0] = lowTypeBits | highTypeBits | lengthBit;
240     result.bytes[1] = adjustedFieldId & 0xFF;
241     result.bytes[2] = adjustedFieldId >> 8;
242   };
243   return result;
244 }
245 
246 } // namespace nimble
247 } // namespace detail
248 } // namespace thrift
249 } // namespace apache
250