1 //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// Lightweight arrays that are backed by an arbitrary BinaryStream.  This file
11 /// provides two different array implementations.
12 ///
13 ///     VarStreamArray - Arrays of variable length records.  The user specifies
14 ///       an Extractor type that can extract a record from a given offset and
15 ///       return the number of bytes consumed by the record.
16 ///
17 ///     FixedStreamArray - Arrays of fixed length records.  This is similar in
18 ///       spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the
19 ///       elements of the array need not be laid out in contiguous memory.
20 ///
21 
22 #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H
23 #define LLVM_SUPPORT_BINARYSTREAMARRAY_H
24 
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/iterator.h"
27 #include "llvm/Support/Alignment.h"
28 #include "llvm/Support/BinaryStreamRef.h"
29 #include "llvm/Support/Error.h"
30 #include <cassert>
31 #include <cstdint>
32 
33 namespace llvm {
34 
35 /// VarStreamArrayExtractor is intended to be specialized to provide customized
36 /// extraction logic.  On input it receives a BinaryStreamRef pointing to the
37 /// beginning of the next record, but where the length of the record is not yet
38 /// known.  Upon completion, it should return an appropriate Error instance if
39 /// a record could not be extracted, or if one could be extracted it should
40 /// return success and set Len to the number of bytes this record occupied in
41 /// the underlying stream, and it should fill out the fields of the value type
42 /// Item appropriately to represent the current record.
43 ///
44 /// You can specialize this template for your own custom value types to avoid
45 /// having to specify a second template argument to VarStreamArray (documented
46 /// below).
47 template <typename T> struct VarStreamArrayExtractor {
48   // Method intentionally deleted.  You must provide an explicit specialization
49   // with the following method implemented.
50   Error operator()(BinaryStreamRef Stream, uint32_t &Len,
51                    T &Item) const = delete;
52 };
53 
54 /// VarStreamArray represents an array of variable length records backed by a
55 /// stream.  This could be a contiguous sequence of bytes in memory, it could
56 /// be a file on disk, or it could be a PDB stream where bytes are stored as
57 /// discontiguous blocks in a file.  Usually it is desirable to treat arrays
58 /// as contiguous blocks of memory, but doing so with large PDB files, for
59 /// example, could mean allocating huge amounts of memory just to allow
60 /// re-ordering of stream data to be contiguous before iterating over it.  By
61 /// abstracting this out, we need not duplicate this memory, and we can
62 /// iterate over arrays in arbitrarily formatted streams.  Elements are parsed
63 /// lazily on iteration, so there is no upfront cost associated with building
64 /// or copying a VarStreamArray, no matter how large it may be.
65 ///
66 /// You create a VarStreamArray by specifying a ValueType and an Extractor type.
67 /// If you do not specify an Extractor type, you are expected to specialize
68 /// VarStreamArrayExtractor<T> for your ValueType.
69 ///
70 /// By default an Extractor is default constructed in the class, but in some
71 /// cases you might find it useful for an Extractor to maintain state across
72 /// extractions.  In this case you can provide your own Extractor through a
73 /// secondary constructor.  The following examples show various ways of
74 /// creating a VarStreamArray.
75 ///
76 ///       // Will use VarStreamArrayExtractor<MyType> as the extractor.
77 ///       VarStreamArray<MyType> MyTypeArray;
78 ///
79 ///       // Will use a default-constructed MyExtractor as the extractor.
80 ///       VarStreamArray<MyType, MyExtractor> MyTypeArray2;
81 ///
82 ///       // Will use the specific instance of MyExtractor provided.
83 ///       // MyExtractor need not be default-constructible in this case.
84 ///       MyExtractor E(SomeContext);
85 ///       VarStreamArray<MyType, MyExtractor> MyTypeArray3(E);
86 ///
87 
88 template <typename ValueType, typename Extractor> class VarStreamArrayIterator;
89 
90 template <typename ValueType,
91           typename Extractor = VarStreamArrayExtractor<ValueType>>
92 class VarStreamArray {
93   friend class VarStreamArrayIterator<ValueType, Extractor>;
94 
95 public:
96   typedef VarStreamArrayIterator<ValueType, Extractor> Iterator;
97 
98   VarStreamArray() = default;
99 
100   explicit VarStreamArray(const Extractor &E) : E(E) {}
101 
102   explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0)
103       : Stream(Stream), Skew(Skew) {}
104 
105   VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0)
106       : Stream(Stream), E(E), Skew(Skew) {}
107 
108   Iterator begin(bool *HadError = nullptr) const {
109     return Iterator(*this, E, Skew, nullptr);
110   }
111 
112   bool valid() const { return Stream.valid(); }
113 
114   uint32_t skew() const { return Skew; }
115   Iterator end() const { return Iterator(E); }
116 
117   bool empty() const { return Stream.getLength() == 0; }
118 
119   VarStreamArray<ValueType, Extractor> substream(uint32_t Begin,
120                                                  uint32_t End) const {
121     assert(Begin >= Skew);
122     // We should never cut off the beginning of the stream since it might be
123     // skewed, meaning the initial bytes are important.
124     BinaryStreamRef NewStream = Stream.slice(0, End);
125     return {NewStream, E, Begin};
126   }
127 
128   /// given an offset into the array's underlying stream, return an
129   /// iterator to the record at that offset.  This is considered unsafe
130   /// since the behavior is undefined if \p Offset does not refer to the
131   /// beginning of a valid record.
132   Iterator at(uint32_t Offset) const {
133     return Iterator(*this, E, Offset, nullptr);
134   }
135 
136   const Extractor &getExtractor() const { return E; }
137   Extractor &getExtractor() { return E; }
138 
139   BinaryStreamRef getUnderlyingStream() const { return Stream; }
140   void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) {
141     Stream = NewStream;
142     Skew = NewSkew;
143   }
144 
145   void drop_front() { Skew += begin()->length(); }
146 
147 private:
148   BinaryStreamRef Stream;
149   Extractor E;
150   uint32_t Skew = 0;
151 };
152 
153 template <typename ValueType, typename Extractor>
154 class VarStreamArrayIterator
155     : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
156                                   std::forward_iterator_tag, ValueType> {
157   typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
158   typedef VarStreamArray<ValueType, Extractor> ArrayType;
159 
160 public:
161   VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
162                          uint32_t Offset, bool *HadError)
163       : IterRef(Array.Stream.drop_front(Offset)), Extract(E),
164         Array(&Array), AbsOffset(Offset), HadError(HadError) {
165     if (IterRef.getLength() == 0)
166       moveToEnd();
167     else {
168       auto EC = Extract(IterRef, ThisLen, ThisValue);
169       if (EC) {
170         consumeError(std::move(EC));
171         markError();
172       }
173     }
174   }
175 
176   VarStreamArrayIterator() = default;
177   explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {}
178   ~VarStreamArrayIterator() = default;
179 
180   bool operator==(const IterType &R) const {
181     if (Array && R.Array) {
182       // Both have a valid array, make sure they're same.
183       assert(Array == R.Array);
184       return IterRef == R.IterRef;
185     }
186 
187     // Both iterators are at the end.
188     if (!Array && !R.Array)
189       return true;
190 
191     // One is not at the end and one is.
192     return false;
193   }
194 
195   const ValueType &operator*() const {
196     assert(Array && !HasError);
197     return ThisValue;
198   }
199 
200   ValueType &operator*() {
201     assert(Array && !HasError);
202     return ThisValue;
203   }
204 
205   IterType &operator+=(unsigned N) {
206     for (unsigned I = 0; I < N; ++I) {
207       // We are done with the current record, discard it so that we are
208       // positioned at the next record.
209       AbsOffset += ThisLen;
210       IterRef = IterRef.drop_front(ThisLen);
211       if (IterRef.getLength() == 0) {
212         // There is nothing after the current record, we must make this an end
213         // iterator.
214         moveToEnd();
215       } else {
216         // There is some data after the current record.
217         auto EC = Extract(IterRef, ThisLen, ThisValue);
218         if (EC) {
219           consumeError(std::move(EC));
220           markError();
221         } else if (ThisLen == 0) {
222           // An empty record? Make this an end iterator.
223           moveToEnd();
224         }
225       }
226     }
227     return *this;
228   }
229 
230   uint32_t offset() const { return AbsOffset; }
231   uint32_t getRecordLength() const { return ThisLen; }
232 
233 private:
234   void moveToEnd() {
235     Array = nullptr;
236     ThisLen = 0;
237   }
238   void markError() {
239     moveToEnd();
240     HasError = true;
241     if (HadError != nullptr)
242       *HadError = true;
243   }
244 
245   ValueType ThisValue;
246   BinaryStreamRef IterRef;
247   Extractor Extract;
248   const ArrayType *Array{nullptr};
249   uint32_t ThisLen{0};
250   uint32_t AbsOffset{0};
251   bool HasError{false};
252   bool *HadError{nullptr};
253 };
254 
255 template <typename T> class FixedStreamArrayIterator;
256 
257 /// FixedStreamArray is similar to VarStreamArray, except with each record
258 /// having a fixed-length.  As with VarStreamArray, there is no upfront
259 /// cost associated with building or copying a FixedStreamArray, as the
260 /// memory for each element is not read from the backing stream until that
261 /// element is iterated.
262 template <typename T> class FixedStreamArray {
263   friend class FixedStreamArrayIterator<T>;
264 
265 public:
266   typedef FixedStreamArrayIterator<T> Iterator;
267 
268   FixedStreamArray() = default;
269   explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) {
270     assert(Stream.getLength() % sizeof(T) == 0);
271   }
272 
273   bool operator==(const FixedStreamArray<T> &Other) const {
274     return Stream == Other.Stream;
275   }
276 
277   bool operator!=(const FixedStreamArray<T> &Other) const {
278     return !(*this == Other);
279   }
280 
281   FixedStreamArray(const FixedStreamArray &) = default;
282   FixedStreamArray &operator=(const FixedStreamArray &) = default;
283 
284   const T &operator[](uint32_t Index) const {
285     assert(Index < size());
286     uint32_t Off = Index * sizeof(T);
287     ArrayRef<uint8_t> Data;
288     if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) {
289       assert(false && "Unexpected failure reading from stream");
290       // This should never happen since we asserted that the stream length was
291       // an exact multiple of the element size.
292       consumeError(std::move(EC));
293     }
294     assert(isAddrAligned(Align::Of<T>(), Data.data()));
295     return *reinterpret_cast<const T *>(Data.data());
296   }
297 
298   uint32_t size() const { return Stream.getLength() / sizeof(T); }
299 
300   bool empty() const { return size() == 0; }
301 
302   FixedStreamArrayIterator<T> begin() const {
303     return FixedStreamArrayIterator<T>(*this, 0);
304   }
305 
306   FixedStreamArrayIterator<T> end() const {
307     return FixedStreamArrayIterator<T>(*this, size());
308   }
309 
310   const T &front() const { return *begin(); }
311   const T &back() const {
312     FixedStreamArrayIterator<T> I = end();
313     return *(--I);
314   }
315 
316   BinaryStreamRef getUnderlyingStream() const { return Stream; }
317 
318 private:
319   BinaryStreamRef Stream;
320 };
321 
322 template <typename T>
323 class FixedStreamArrayIterator
324     : public iterator_facade_base<FixedStreamArrayIterator<T>,
325                                   std::random_access_iterator_tag, const T> {
326 
327 public:
328   FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
329       : Array(Array), Index(Index) {}
330 
331   FixedStreamArrayIterator<T>(const FixedStreamArrayIterator<T> &Other)
332       : Array(Other.Array), Index(Other.Index) {}
333   FixedStreamArrayIterator<T> &
334   operator=(const FixedStreamArrayIterator<T> &Other) {
335     Array = Other.Array;
336     Index = Other.Index;
337     return *this;
338   }
339 
340   const T &operator*() const { return Array[Index]; }
341   const T &operator*() { return Array[Index]; }
342 
343   bool operator==(const FixedStreamArrayIterator<T> &R) const {
344     assert(Array == R.Array);
345     return (Index == R.Index) && (Array == R.Array);
346   }
347 
348   FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) {
349     Index += N;
350     return *this;
351   }
352 
353   FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) {
354     assert(std::ptrdiff_t(Index) >= N);
355     Index -= N;
356     return *this;
357   }
358 
359   std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const {
360     assert(Array == R.Array);
361     assert(Index >= R.Index);
362     return Index - R.Index;
363   }
364 
365   bool operator<(const FixedStreamArrayIterator<T> &RHS) const {
366     assert(Array == RHS.Array);
367     return Index < RHS.Index;
368   }
369 
370 private:
371   FixedStreamArray<T> Array;
372   uint32_t Index;
373 };
374 
375 } // namespace llvm
376 
377 #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H
378