1 //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H
10 #define LLVM_SUPPORT_BINARYSTREAMARRAY_H
11 
12 #include "llvm/ADT/ArrayRef.h"
13 #include "llvm/ADT/iterator.h"
14 #include "llvm/Support/BinaryStreamRef.h"
15 #include "llvm/Support/Error.h"
16 #include <cassert>
17 #include <cstdint>
18 
19 /// Lightweight arrays that are backed by an arbitrary BinaryStream.  This file
20 /// provides two different array implementations.
21 ///
22 ///     VarStreamArray - Arrays of variable length records.  The user specifies
23 ///       an Extractor type that can extract a record from a given offset and
24 ///       return the number of bytes consumed by the record.
25 ///
26 ///     FixedStreamArray - Arrays of fixed length records.  This is similar in
27 ///       spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the
28 ///       elements of the array need not be laid out in contiguous memory.
29 namespace llvm {
30 
31 /// VarStreamArrayExtractor is intended to be specialized to provide customized
32 /// extraction logic.  On input it receives a BinaryStreamRef pointing to the
33 /// beginning of the next record, but where the length of the record is not yet
34 /// known.  Upon completion, it should return an appropriate Error instance if
35 /// a record could not be extracted, or if one could be extracted it should
36 /// return success and set Len to the number of bytes this record occupied in
37 /// the underlying stream, and it should fill out the fields of the value type
38 /// Item appropriately to represent the current record.
39 ///
40 /// You can specialize this template for your own custom value types to avoid
41 /// having to specify a second template argument to VarStreamArray (documented
42 /// below).
43 template <typename T> struct VarStreamArrayExtractor {
44   // Method intentionally deleted.  You must provide an explicit specialization
45   // with the following method implemented.
46   Error operator()(BinaryStreamRef Stream, uint32_t &Len,
47                    T &Item) const = delete;
48 };
49 
50 /// VarStreamArray represents an array of variable length records backed by a
51 /// stream.  This could be a contiguous sequence of bytes in memory, it could
52 /// be a file on disk, or it could be a PDB stream where bytes are stored as
53 /// discontiguous blocks in a file.  Usually it is desirable to treat arrays
54 /// as contiguous blocks of memory, but doing so with large PDB files, for
55 /// example, could mean allocating huge amounts of memory just to allow
56 /// re-ordering of stream data to be contiguous before iterating over it.  By
57 /// abstracting this out, we need not duplicate this memory, and we can
58 /// iterate over arrays in arbitrarily formatted streams.  Elements are parsed
59 /// lazily on iteration, so there is no upfront cost associated with building
60 /// or copying a VarStreamArray, no matter how large it may be.
61 ///
62 /// You create a VarStreamArray by specifying a ValueType and an Extractor type.
63 /// If you do not specify an Extractor type, you are expected to specialize
64 /// VarStreamArrayExtractor<T> for your ValueType.
65 ///
66 /// By default an Extractor is default constructed in the class, but in some
67 /// cases you might find it useful for an Extractor to maintain state across
68 /// extractions.  In this case you can provide your own Extractor through a
69 /// secondary constructor.  The following examples show various ways of
70 /// creating a VarStreamArray.
71 ///
72 ///       // Will use VarStreamArrayExtractor<MyType> as the extractor.
73 ///       VarStreamArray<MyType> MyTypeArray;
74 ///
75 ///       // Will use a default-constructed MyExtractor as the extractor.
76 ///       VarStreamArray<MyType, MyExtractor> MyTypeArray2;
77 ///
78 ///       // Will use the specific instance of MyExtractor provided.
79 ///       // MyExtractor need not be default-constructible in this case.
80 ///       MyExtractor E(SomeContext);
81 ///       VarStreamArray<MyType, MyExtractor> MyTypeArray3(E);
82 ///
83 
84 template <typename ValueType, typename Extractor> class VarStreamArrayIterator;
85 
86 template <typename ValueType,
87           typename Extractor = VarStreamArrayExtractor<ValueType>>
88 class VarStreamArray {
89   friend class VarStreamArrayIterator<ValueType, Extractor>;
90 
91 public:
92   typedef VarStreamArrayIterator<ValueType, Extractor> Iterator;
93 
94   VarStreamArray() = default;
95 
96   explicit VarStreamArray(const Extractor &E) : E(E) {}
97 
98   explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0)
99       : Stream(Stream), Skew(Skew) {}
100 
101   VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0)
102       : Stream(Stream), E(E), Skew(Skew) {}
103 
104   Iterator begin(bool *HadError = nullptr) const {
105     return Iterator(*this, E, Skew, nullptr);
106   }
107 
108   bool valid() const { return Stream.valid(); }
109 
110   uint32_t skew() const { return Skew; }
111   Iterator end() const { return Iterator(E); }
112 
113   bool empty() const { return Stream.getLength() == 0; }
114 
115   VarStreamArray<ValueType, Extractor> substream(uint32_t Begin,
116                                                  uint32_t End) const {
117     assert(Begin >= Skew);
118     // We should never cut off the beginning of the stream since it might be
119     // skewed, meaning the initial bytes are important.
120     BinaryStreamRef NewStream = Stream.slice(0, End);
121     return {NewStream, E, Begin};
122   }
123 
124   /// given an offset into the array's underlying stream, return an
125   /// iterator to the record at that offset.  This is considered unsafe
126   /// since the behavior is undefined if \p Offset does not refer to the
127   /// beginning of a valid record.
128   Iterator at(uint32_t Offset) const {
129     return Iterator(*this, E, Offset, nullptr);
130   }
131 
132   const Extractor &getExtractor() const { return E; }
133   Extractor &getExtractor() { return E; }
134 
135   BinaryStreamRef getUnderlyingStream() const { return Stream; }
136   void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) {
137     Stream = NewStream;
138     Skew = NewSkew;
139   }
140 
141   void drop_front() { Skew += begin()->length(); }
142 
143 private:
144   BinaryStreamRef Stream;
145   Extractor E;
146   uint32_t Skew = 0;
147 };
148 
149 template <typename ValueType, typename Extractor>
150 class VarStreamArrayIterator
151     : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
152                                   std::forward_iterator_tag, ValueType> {
153   typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
154   typedef VarStreamArray<ValueType, Extractor> ArrayType;
155 
156 public:
157   VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
158                          uint32_t Offset, bool *HadError)
159       : IterRef(Array.Stream.drop_front(Offset)), Extract(E),
160         Array(&Array), AbsOffset(Offset), HadError(HadError) {
161     if (IterRef.getLength() == 0)
162       moveToEnd();
163     else {
164       auto EC = Extract(IterRef, ThisLen, ThisValue);
165       if (EC) {
166         consumeError(std::move(EC));
167         markError();
168       }
169     }
170   }
171 
172   VarStreamArrayIterator() = default;
173   explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {}
174   ~VarStreamArrayIterator() = default;
175 
176   bool operator==(const IterType &R) const {
177     if (Array && R.Array) {
178       // Both have a valid array, make sure they're same.
179       assert(Array == R.Array);
180       return IterRef == R.IterRef;
181     }
182 
183     // Both iterators are at the end.
184     if (!Array && !R.Array)
185       return true;
186 
187     // One is not at the end and one is.
188     return false;
189   }
190 
191   const ValueType &operator*() const {
192     assert(Array && !HasError);
193     return ThisValue;
194   }
195 
196   ValueType &operator*() {
197     assert(Array && !HasError);
198     return ThisValue;
199   }
200 
201   IterType &operator+=(unsigned N) {
202     for (unsigned I = 0; I < N; ++I) {
203       // We are done with the current record, discard it so that we are
204       // positioned at the next record.
205       AbsOffset += ThisLen;
206       IterRef = IterRef.drop_front(ThisLen);
207       if (IterRef.getLength() == 0) {
208         // There is nothing after the current record, we must make this an end
209         // iterator.
210         moveToEnd();
211       } else {
212         // There is some data after the current record.
213         auto EC = Extract(IterRef, ThisLen, ThisValue);
214         if (EC) {
215           consumeError(std::move(EC));
216           markError();
217         } else if (ThisLen == 0) {
218           // An empty record? Make this an end iterator.
219           moveToEnd();
220         }
221       }
222     }
223     return *this;
224   }
225 
226   uint32_t offset() const { return AbsOffset; }
227   uint32_t getRecordLength() const { return ThisLen; }
228 
229 private:
230   void moveToEnd() {
231     Array = nullptr;
232     ThisLen = 0;
233   }
234   void markError() {
235     moveToEnd();
236     HasError = true;
237     if (HadError != nullptr)
238       *HadError = true;
239   }
240 
241   ValueType ThisValue;
242   BinaryStreamRef IterRef;
243   Extractor Extract;
244   const ArrayType *Array{nullptr};
245   uint32_t ThisLen{0};
246   uint32_t AbsOffset{0};
247   bool HasError{false};
248   bool *HadError{nullptr};
249 };
250 
251 template <typename T> class FixedStreamArrayIterator;
252 
253 /// FixedStreamArray is similar to VarStreamArray, except with each record
254 /// having a fixed-length.  As with VarStreamArray, there is no upfront
255 /// cost associated with building or copying a FixedStreamArray, as the
256 /// memory for each element is not read from the backing stream until that
257 /// element is iterated.
258 template <typename T> class FixedStreamArray {
259   friend class FixedStreamArrayIterator<T>;
260 
261 public:
262   typedef FixedStreamArrayIterator<T> Iterator;
263 
264   FixedStreamArray() = default;
265   explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) {
266     assert(Stream.getLength() % sizeof(T) == 0);
267   }
268 
269   bool operator==(const FixedStreamArray<T> &Other) const {
270     return Stream == Other.Stream;
271   }
272 
273   bool operator!=(const FixedStreamArray<T> &Other) const {
274     return !(*this == Other);
275   }
276 
277   FixedStreamArray(const FixedStreamArray &) = default;
278   FixedStreamArray &operator=(const FixedStreamArray &) = default;
279 
280   const T &operator[](uint32_t Index) const {
281     assert(Index < size());
282     uint32_t Off = Index * sizeof(T);
283     ArrayRef<uint8_t> Data;
284     if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) {
285       assert(false && "Unexpected failure reading from stream");
286       // This should never happen since we asserted that the stream length was
287       // an exact multiple of the element size.
288       consumeError(std::move(EC));
289     }
290     assert(isAddrAligned(Align::Of<T>(), Data.data()));
291     return *reinterpret_cast<const T *>(Data.data());
292   }
293 
294   uint32_t size() const { return Stream.getLength() / sizeof(T); }
295 
296   bool empty() const { return size() == 0; }
297 
298   FixedStreamArrayIterator<T> begin() const {
299     return FixedStreamArrayIterator<T>(*this, 0);
300   }
301 
302   FixedStreamArrayIterator<T> end() const {
303     return FixedStreamArrayIterator<T>(*this, size());
304   }
305 
306   const T &front() const { return *begin(); }
307   const T &back() const {
308     FixedStreamArrayIterator<T> I = end();
309     return *(--I);
310   }
311 
312   BinaryStreamRef getUnderlyingStream() const { return Stream; }
313 
314 private:
315   BinaryStreamRef Stream;
316 };
317 
318 template <typename T>
319 class FixedStreamArrayIterator
320     : public iterator_facade_base<FixedStreamArrayIterator<T>,
321                                   std::random_access_iterator_tag, const T> {
322 
323 public:
324   FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
325       : Array(Array), Index(Index) {}
326 
327   FixedStreamArrayIterator<T>(const FixedStreamArrayIterator<T> &Other)
328       : Array(Other.Array), Index(Other.Index) {}
329   FixedStreamArrayIterator<T> &
330   operator=(const FixedStreamArrayIterator<T> &Other) {
331     Array = Other.Array;
332     Index = Other.Index;
333     return *this;
334   }
335 
336   const T &operator*() const { return Array[Index]; }
337   const T &operator*() { return Array[Index]; }
338 
339   bool operator==(const FixedStreamArrayIterator<T> &R) const {
340     assert(Array == R.Array);
341     return (Index == R.Index) && (Array == R.Array);
342   }
343 
344   FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) {
345     Index += N;
346     return *this;
347   }
348 
349   FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) {
350     assert(std::ptrdiff_t(Index) >= N);
351     Index -= N;
352     return *this;
353   }
354 
355   std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const {
356     assert(Array == R.Array);
357     assert(Index >= R.Index);
358     return Index - R.Index;
359   }
360 
361   bool operator<(const FixedStreamArrayIterator<T> &RHS) const {
362     assert(Array == RHS.Array);
363     return Index < RHS.Index;
364   }
365 
366 private:
367   FixedStreamArray<T> Array;
368   uint32_t Index;
369 };
370 
371 } // namespace llvm
372 
373 #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H
374