1 //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Lightweight arrays that are backed by an arbitrary BinaryStream. This file 11 /// provides two different array implementations. 12 /// 13 /// VarStreamArray - Arrays of variable length records. The user specifies 14 /// an Extractor type that can extract a record from a given offset and 15 /// return the number of bytes consumed by the record. 16 /// 17 /// FixedStreamArray - Arrays of fixed length records. This is similar in 18 /// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the 19 /// elements of the array need not be laid out in contiguous memory. 20 /// 21 22 #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H 23 #define LLVM_SUPPORT_BINARYSTREAMARRAY_H 24 25 #include "llvm/ADT/ArrayRef.h" 26 #include "llvm/ADT/iterator.h" 27 #include "llvm/Support/Alignment.h" 28 #include "llvm/Support/BinaryStreamRef.h" 29 #include "llvm/Support/Error.h" 30 #include <cassert> 31 #include <cstdint> 32 33 namespace llvm { 34 35 /// VarStreamArrayExtractor is intended to be specialized to provide customized 36 /// extraction logic. On input it receives a BinaryStreamRef pointing to the 37 /// beginning of the next record, but where the length of the record is not yet 38 /// known. Upon completion, it should return an appropriate Error instance if 39 /// a record could not be extracted, or if one could be extracted it should 40 /// return success and set Len to the number of bytes this record occupied in 41 /// the underlying stream, and it should fill out the fields of the value type 42 /// Item appropriately to represent the current record. 43 /// 44 /// You can specialize this template for your own custom value types to avoid 45 /// having to specify a second template argument to VarStreamArray (documented 46 /// below). 47 template <typename T> struct VarStreamArrayExtractor { 48 // Method intentionally deleted. You must provide an explicit specialization 49 // with the following method implemented. 50 Error operator()(BinaryStreamRef Stream, uint32_t &Len, 51 T &Item) const = delete; 52 }; 53 54 /// VarStreamArray represents an array of variable length records backed by a 55 /// stream. This could be a contiguous sequence of bytes in memory, it could 56 /// be a file on disk, or it could be a PDB stream where bytes are stored as 57 /// discontiguous blocks in a file. Usually it is desirable to treat arrays 58 /// as contiguous blocks of memory, but doing so with large PDB files, for 59 /// example, could mean allocating huge amounts of memory just to allow 60 /// re-ordering of stream data to be contiguous before iterating over it. By 61 /// abstracting this out, we need not duplicate this memory, and we can 62 /// iterate over arrays in arbitrarily formatted streams. Elements are parsed 63 /// lazily on iteration, so there is no upfront cost associated with building 64 /// or copying a VarStreamArray, no matter how large it may be. 65 /// 66 /// You create a VarStreamArray by specifying a ValueType and an Extractor type. 67 /// If you do not specify an Extractor type, you are expected to specialize 68 /// VarStreamArrayExtractor<T> for your ValueType. 69 /// 70 /// By default an Extractor is default constructed in the class, but in some 71 /// cases you might find it useful for an Extractor to maintain state across 72 /// extractions. In this case you can provide your own Extractor through a 73 /// secondary constructor. The following examples show various ways of 74 /// creating a VarStreamArray. 75 /// 76 /// // Will use VarStreamArrayExtractor<MyType> as the extractor. 77 /// VarStreamArray<MyType> MyTypeArray; 78 /// 79 /// // Will use a default-constructed MyExtractor as the extractor. 80 /// VarStreamArray<MyType, MyExtractor> MyTypeArray2; 81 /// 82 /// // Will use the specific instance of MyExtractor provided. 83 /// // MyExtractor need not be default-constructible in this case. 84 /// MyExtractor E(SomeContext); 85 /// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E); 86 /// 87 88 template <typename ValueType, typename Extractor> class VarStreamArrayIterator; 89 90 template <typename ValueType, 91 typename Extractor = VarStreamArrayExtractor<ValueType>> 92 class VarStreamArray { 93 friend class VarStreamArrayIterator<ValueType, Extractor>; 94 95 public: 96 typedef VarStreamArrayIterator<ValueType, Extractor> Iterator; 97 98 VarStreamArray() = default; 99 100 explicit VarStreamArray(const Extractor &E) : E(E) {} 101 102 explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0) 103 : Stream(Stream), Skew(Skew) {} 104 105 VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0) 106 : Stream(Stream), E(E), Skew(Skew) {} 107 108 Iterator begin(bool *HadError = nullptr) const { 109 return Iterator(*this, E, Skew, nullptr); 110 } 111 112 bool valid() const { return Stream.valid(); } 113 114 uint32_t skew() const { return Skew; } 115 Iterator end() const { return Iterator(E); } 116 117 bool empty() const { return Stream.getLength() == 0; } 118 119 VarStreamArray<ValueType, Extractor> substream(uint32_t Begin, 120 uint32_t End) const { 121 assert(Begin >= Skew); 122 // We should never cut off the beginning of the stream since it might be 123 // skewed, meaning the initial bytes are important. 124 BinaryStreamRef NewStream = Stream.slice(0, End); 125 return {NewStream, E, Begin}; 126 } 127 128 /// given an offset into the array's underlying stream, return an 129 /// iterator to the record at that offset. This is considered unsafe 130 /// since the behavior is undefined if \p Offset does not refer to the 131 /// beginning of a valid record. 132 Iterator at(uint32_t Offset) const { 133 return Iterator(*this, E, Offset, nullptr); 134 } 135 136 const Extractor &getExtractor() const { return E; } 137 Extractor &getExtractor() { return E; } 138 139 BinaryStreamRef getUnderlyingStream() const { return Stream; } 140 void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) { 141 Stream = NewStream; 142 Skew = NewSkew; 143 } 144 145 void drop_front() { Skew += begin()->length(); } 146 147 private: 148 BinaryStreamRef Stream; 149 Extractor E; 150 uint32_t Skew = 0; 151 }; 152 153 template <typename ValueType, typename Extractor> 154 class VarStreamArrayIterator 155 : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>, 156 std::forward_iterator_tag, ValueType> { 157 typedef VarStreamArrayIterator<ValueType, Extractor> IterType; 158 typedef VarStreamArray<ValueType, Extractor> ArrayType; 159 160 public: 161 VarStreamArrayIterator(const ArrayType &Array, const Extractor &E, 162 uint32_t Offset, bool *HadError) 163 : IterRef(Array.Stream.drop_front(Offset)), Extract(E), 164 Array(&Array), AbsOffset(Offset), HadError(HadError) { 165 if (IterRef.getLength() == 0) 166 moveToEnd(); 167 else { 168 auto EC = Extract(IterRef, ThisLen, ThisValue); 169 if (EC) { 170 consumeError(std::move(EC)); 171 markError(); 172 } 173 } 174 } 175 176 VarStreamArrayIterator() = default; 177 explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {} 178 ~VarStreamArrayIterator() = default; 179 180 bool operator==(const IterType &R) const { 181 if (Array && R.Array) { 182 // Both have a valid array, make sure they're same. 183 assert(Array == R.Array); 184 return IterRef == R.IterRef; 185 } 186 187 // Both iterators are at the end. 188 if (!Array && !R.Array) 189 return true; 190 191 // One is not at the end and one is. 192 return false; 193 } 194 195 const ValueType &operator*() const { 196 assert(Array && !HasError); 197 return ThisValue; 198 } 199 200 ValueType &operator*() { 201 assert(Array && !HasError); 202 return ThisValue; 203 } 204 205 IterType &operator+=(unsigned N) { 206 for (unsigned I = 0; I < N; ++I) { 207 // We are done with the current record, discard it so that we are 208 // positioned at the next record. 209 AbsOffset += ThisLen; 210 IterRef = IterRef.drop_front(ThisLen); 211 if (IterRef.getLength() == 0) { 212 // There is nothing after the current record, we must make this an end 213 // iterator. 214 moveToEnd(); 215 } else { 216 // There is some data after the current record. 217 auto EC = Extract(IterRef, ThisLen, ThisValue); 218 if (EC) { 219 consumeError(std::move(EC)); 220 markError(); 221 } else if (ThisLen == 0) { 222 // An empty record? Make this an end iterator. 223 moveToEnd(); 224 } 225 } 226 } 227 return *this; 228 } 229 230 uint32_t offset() const { return AbsOffset; } 231 uint32_t getRecordLength() const { return ThisLen; } 232 233 private: 234 void moveToEnd() { 235 Array = nullptr; 236 ThisLen = 0; 237 } 238 void markError() { 239 moveToEnd(); 240 HasError = true; 241 if (HadError != nullptr) 242 *HadError = true; 243 } 244 245 ValueType ThisValue; 246 BinaryStreamRef IterRef; 247 Extractor Extract; 248 const ArrayType *Array{nullptr}; 249 uint32_t ThisLen{0}; 250 uint32_t AbsOffset{0}; 251 bool HasError{false}; 252 bool *HadError{nullptr}; 253 }; 254 255 template <typename T> class FixedStreamArrayIterator; 256 257 /// FixedStreamArray is similar to VarStreamArray, except with each record 258 /// having a fixed-length. As with VarStreamArray, there is no upfront 259 /// cost associated with building or copying a FixedStreamArray, as the 260 /// memory for each element is not read from the backing stream until that 261 /// element is iterated. 262 template <typename T> class FixedStreamArray { 263 friend class FixedStreamArrayIterator<T>; 264 265 public: 266 typedef FixedStreamArrayIterator<T> Iterator; 267 268 FixedStreamArray() = default; 269 explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) { 270 assert(Stream.getLength() % sizeof(T) == 0); 271 } 272 273 bool operator==(const FixedStreamArray<T> &Other) const { 274 return Stream == Other.Stream; 275 } 276 277 bool operator!=(const FixedStreamArray<T> &Other) const { 278 return !(*this == Other); 279 } 280 281 FixedStreamArray(const FixedStreamArray &) = default; 282 FixedStreamArray &operator=(const FixedStreamArray &) = default; 283 284 const T &operator[](uint32_t Index) const { 285 assert(Index < size()); 286 uint32_t Off = Index * sizeof(T); 287 ArrayRef<uint8_t> Data; 288 if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) { 289 assert(false && "Unexpected failure reading from stream"); 290 // This should never happen since we asserted that the stream length was 291 // an exact multiple of the element size. 292 consumeError(std::move(EC)); 293 } 294 assert(isAddrAligned(Align::Of<T>(), Data.data())); 295 return *reinterpret_cast<const T *>(Data.data()); 296 } 297 298 uint32_t size() const { return Stream.getLength() / sizeof(T); } 299 300 bool empty() const { return size() == 0; } 301 302 FixedStreamArrayIterator<T> begin() const { 303 return FixedStreamArrayIterator<T>(*this, 0); 304 } 305 306 FixedStreamArrayIterator<T> end() const { 307 return FixedStreamArrayIterator<T>(*this, size()); 308 } 309 310 const T &front() const { return *begin(); } 311 const T &back() const { 312 FixedStreamArrayIterator<T> I = end(); 313 return *(--I); 314 } 315 316 BinaryStreamRef getUnderlyingStream() const { return Stream; } 317 318 private: 319 BinaryStreamRef Stream; 320 }; 321 322 template <typename T> 323 class FixedStreamArrayIterator 324 : public iterator_facade_base<FixedStreamArrayIterator<T>, 325 std::random_access_iterator_tag, const T> { 326 327 public: 328 FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index) 329 : Array(Array), Index(Index) {} 330 331 FixedStreamArrayIterator<T>(const FixedStreamArrayIterator<T> &Other) 332 : Array(Other.Array), Index(Other.Index) {} 333 FixedStreamArrayIterator<T> & 334 operator=(const FixedStreamArrayIterator<T> &Other) { 335 Array = Other.Array; 336 Index = Other.Index; 337 return *this; 338 } 339 340 const T &operator*() const { return Array[Index]; } 341 const T &operator*() { return Array[Index]; } 342 343 bool operator==(const FixedStreamArrayIterator<T> &R) const { 344 assert(Array == R.Array); 345 return (Index == R.Index) && (Array == R.Array); 346 } 347 348 FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) { 349 Index += N; 350 return *this; 351 } 352 353 FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) { 354 assert(std::ptrdiff_t(Index) >= N); 355 Index -= N; 356 return *this; 357 } 358 359 std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const { 360 assert(Array == R.Array); 361 assert(Index >= R.Index); 362 return Index - R.Index; 363 } 364 365 bool operator<(const FixedStreamArrayIterator<T> &RHS) const { 366 assert(Array == RHS.Array); 367 return Index < RHS.Index; 368 } 369 370 private: 371 FixedStreamArray<T> Array; 372 uint32_t Index; 373 }; 374 375 } // namespace llvm 376 377 #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H 378