1 /* 2 * Copyright (c) Facebook, Inc. and its affiliates. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #pragma once 18 19 #include <cstdint> 20 #include <cstring> 21 #include <utility> 22 23 #include <folly/Likely.h> 24 #include <folly/io/Cursor.h> 25 26 #include <thrift/lib/cpp/protocol/TProtocolException.h> 27 #include <thrift/lib/cpp2/protocol/nimble/ControlBitHelpers.h> 28 #include <thrift/lib/cpp2/protocol/nimble/DecodeNimbleBlock.h> 29 30 namespace apache { 31 namespace thrift { 32 namespace detail { 33 34 struct BufferingNimbleDecoderState { 35 public: 36 BufferingNimbleDecoderState() = default; 37 BufferingNimbleDecoderState(const BufferingNimbleDecoderState&) = delete; BufferingNimbleDecoderStateBufferingNimbleDecoderState38 BufferingNimbleDecoderState(BufferingNimbleDecoderState&& other) noexcept { 39 nextChunkToReturn_ = std::exchange(other.nextChunkToReturn_, -1); 40 } 41 BufferingNimbleDecoderState& operator=(const BufferingNimbleDecoderState&) = 42 delete; 43 BufferingNimbleDecoderState& operator=( 44 BufferingNimbleDecoderState&& other) noexcept { 45 nextChunkToReturn_ = std::exchange(other.nextChunkToReturn_, -1); 46 return *this; 47 } 48 49 private: 50 template <ChunkRepr repr> 51 friend class BufferingNimbleDecoder; 52 BufferingNimbleDecoderStateBufferingNimbleDecoderState53 explicit BufferingNimbleDecoderState(ssize_t nextChunkToReturn) noexcept 54 : nextChunkToReturn_(nextChunkToReturn) {} 55 56 ssize_t nextChunkToReturn_ = -1; 57 }; 58 59 // The buffering decoder serves two purposes: 60 // 1. Provides chunk-at-a-time semantics. Naively, lower level decoders only 61 // work in blocks of 4 chunks (because each control stream byte maps to 4 62 // chunks). But often times, consumer classes will want just one or two 63 // chunks, and don't want to have to deal with bookkeeping the extra ones. 64 // 2. It decodes more than 1 block at a time, as an optimization. (We can save 65 // some boundary calculation, as well as do some cache locality tuning). 66 template <ChunkRepr repr> 67 class BufferingNimbleDecoder { 68 public: 69 BufferingNimbleDecoder() = default; 70 ~BufferingNimbleDecoder() = default; 71 BufferingNimbleDecoder(const BufferingNimbleDecoder&) = delete; 72 BufferingNimbleDecoder& operator=(const BufferingNimbleDecoder&) = delete; 73 setControlInput(const folly::io::Cursor & cursor)74 void setControlInput(const folly::io::Cursor& cursor) { 75 controlCursor_ = cursor; 76 } 77 setDataInput(const folly::io::Cursor & cursor)78 void setDataInput(const folly::io::Cursor& cursor) { dataCursor_ = cursor; } 79 borrowState()80 BufferingNimbleDecoderState borrowState() { 81 if (folly::kIsDebug) { 82 DCHECK(!stateBorrowed_); 83 stateBorrowed_ = true; 84 } 85 return BufferingNimbleDecoderState{nextChunkToReturn_}; 86 } 87 returnState(BufferingNimbleDecoderState state)88 void returnState(BufferingNimbleDecoderState state) { 89 DCHECK(state.nextChunkToReturn_ != -1); 90 91 nextChunkToReturn_ = state.nextChunkToReturn_; 92 93 if (folly::kIsDebug) { 94 DCHECK(stateBorrowed_); 95 stateBorrowed_ = false; 96 state.nextChunkToReturn_ = -1; 97 } 98 } 99 nextChunk()100 std::uint32_t nextChunk() { 101 DCHECK(!stateBorrowed_); 102 auto state = borrowState(); 103 std::uint32_t ret = nextChunk(state); 104 returnState(std::move(state)); 105 return ret; 106 } 107 nextChunk(BufferingNimbleDecoderState & state)108 std::uint32_t nextChunk(BufferingNimbleDecoderState& state) { 109 DCHECK(stateBorrowed_); 110 DCHECK(state.nextChunkToReturn_ != -1); 111 112 if (UNLIKELY(state.nextChunkToReturn_ == maxChunkFilled_)) { 113 fillBuffer(); 114 state = BufferingNimbleDecoderState{0}; 115 } 116 std::uint32_t result = chunks_[state.nextChunkToReturn_]; 117 ++state.nextChunkToReturn_; 118 return result; 119 } 120 121 private: fillBuffer()122 void fillBuffer() { 123 maxChunkFilled_ = 0; 124 // We were asked to produce more chunks, but can't advance in the control 125 // stream; the input is invalid. Note that a similar check on the data 126 // stream wouldn't be correct; a control byte may correspond to 0 data 127 // bytes. Instead, we check that we didn't overflow manually, in the slow 128 // path below. 129 if (!controlCursor_.canAdvance(1)) { 130 protocol::TProtocolException::throwTruncatedData(); 131 } 132 while (maxChunkFilled_ < kChunksToBuffer && controlCursor_.canAdvance(1)) { 133 ssize_t minControlBytes = controlCursor_.length(); 134 ssize_t minBlocks = dataCursor_.length() / kMaxBytesPerBlock; 135 ssize_t headRoom = (kChunksToBuffer - maxChunkFilled_) / kChunksPerBlock; 136 ssize_t numUnconditionalIters = 137 std::min({minControlBytes, minBlocks, headRoom}); 138 ssize_t dataBytesConsumed = 0; 139 if (UNLIKELY(numUnconditionalIters == 0)) { 140 // Slow path where we cross IOBufs. 141 std::uint8_t controlByte = controlCursor_.read<std::uint8_t>(); 142 std::array<unsigned char, kMaxBytesPerBlock> buf; 143 size_t bytesPulled = 144 dataCursor_.pullAtMost(buf.data(), kMaxBytesPerBlock); 145 if (LIKELY(bytesPulled == kMaxBytesPerBlock)) { 146 // We're at the end of the current IOBuf, but not necessarily at the 147 // end of the whole chain. 148 size_t bytesDecoded = decodeNimbleBlock<repr>( 149 controlByte, folly::ByteRange(buf), &chunks_[maxChunkFilled_]); 150 dataCursor_.retreat(bytesPulled - bytesDecoded); 151 maxChunkFilled_ += kChunksPerBlock; 152 } else { 153 std::memset(&buf[bytesPulled], 0, kMaxBytesPerBlock - bytesPulled); 154 size_t bytesDecoded = decodeNimbleBlock<repr>( 155 controlByte, folly::ByteRange(buf), &chunks_[maxChunkFilled_]); 156 if (bytesDecoded > bytesPulled) { 157 protocol::TProtocolException::throwTruncatedData(); 158 } 159 dataCursor_.retreat(bytesPulled - bytesDecoded); 160 maxChunkFilled_ += kChunksPerBlock; 161 } 162 } else { 163 // Otherwise, we iterate through until we hit the end or run out of 164 // buffer. 165 166 // We do a little dance where we move values into temporaries, run the 167 // loop, and then move them back. This lets the compiler keep the values 168 // in registers, avoiding having to materialize the memory operations; 169 // this would not be a correct optimization otherwise (what if the 170 // buffers aliased *this?). 171 ssize_t maxChunkFilledTemp = maxChunkFilled_; 172 const std::uint8_t* controlBufTemp = controlCursor_.data(); 173 const std::uint8_t* dataBufTemp = dataCursor_.data(); 174 for (ssize_t i = 0; i < numUnconditionalIters; ++i) { 175 std::uint8_t controlByte = controlBufTemp[i]; 176 dataBytesConsumed += decodeNimbleBlock<repr>( 177 controlByte, 178 // Note that more obvious thing, of subranging a peekBytes() 179 // result, unfortunately has a cost here; it introduces at least 180 // an extra branch per block. 181 folly::ByteRange( 182 dataBufTemp + dataBytesConsumed, 183 dataBufTemp + dataBytesConsumed + kMaxBytesPerBlock), 184 &chunks_[maxChunkFilledTemp]); 185 maxChunkFilledTemp += kChunksPerBlock; 186 } 187 maxChunkFilled_ = maxChunkFilledTemp; 188 controlCursor_.skip(numUnconditionalIters); 189 dataCursor_.skip(dataBytesConsumed); 190 } 191 } 192 } 193 194 // Low level decode works 4 chunks at a time. 195 const static int kChunksToBuffer = 256; 196 static_assert( 197 kChunksToBuffer % kChunksPerBlock == 0, 198 "Must not have partially decoded blocks"); 199 200 std::uint32_t chunks_[kChunksToBuffer]; 201 bool stateBorrowed_ = false; 202 ssize_t nextChunkToReturn_ = 0; 203 ssize_t maxChunkFilled_ = 0; 204 205 folly::io::Cursor controlCursor_{nullptr}; 206 folly::io::Cursor dataCursor_{nullptr}; 207 }; 208 209 } // namespace detail 210 } // namespace thrift 211 } // namespace apache 212