BinaryStreamArray.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===- BinaryStreamArray.h - Array backed by an arbitrary stream *- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. ///
  14. /// \file
  15. /// Lightweight arrays that are backed by an arbitrary BinaryStream. This file
  16. /// provides two different array implementations.
  17. ///
  18. /// VarStreamArray - Arrays of variable length records. The user specifies
  19. /// an Extractor type that can extract a record from a given offset and
  20. /// return the number of bytes consumed by the record.
  21. ///
  22. /// FixedStreamArray - Arrays of fixed length records. This is similar in
  23. /// spirit to ArrayRef<T>, but since it is backed by a BinaryStream, the
  24. /// elements of the array need not be laid out in contiguous memory.
  25. ///
  26. #ifndef LLVM_SUPPORT_BINARYSTREAMARRAY_H
  27. #define LLVM_SUPPORT_BINARYSTREAMARRAY_H
  28. #include "llvm/ADT/ArrayRef.h"
  29. #include "llvm/ADT/iterator.h"
  30. #include "llvm/Support/Alignment.h"
  31. #include "llvm/Support/BinaryStreamRef.h"
  32. #include "llvm/Support/Error.h"
  33. #include <cassert>
  34. #include <cstdint>
  35. namespace llvm {
  36. /// VarStreamArrayExtractor is intended to be specialized to provide customized
  37. /// extraction logic. On input it receives a BinaryStreamRef pointing to the
  38. /// beginning of the next record, but where the length of the record is not yet
  39. /// known. Upon completion, it should return an appropriate Error instance if
  40. /// a record could not be extracted, or if one could be extracted it should
  41. /// return success and set Len to the number of bytes this record occupied in
  42. /// the underlying stream, and it should fill out the fields of the value type
  43. /// Item appropriately to represent the current record.
  44. ///
  45. /// You can specialize this template for your own custom value types to avoid
  46. /// having to specify a second template argument to VarStreamArray (documented
  47. /// below).
  48. template <typename T> struct VarStreamArrayExtractor {
  49. // Method intentionally deleted. You must provide an explicit specialization
  50. // with the following method implemented.
  51. Error operator()(BinaryStreamRef Stream, uint32_t &Len,
  52. T &Item) const = delete;
  53. };
  54. /// VarStreamArray represents an array of variable length records backed by a
  55. /// stream. This could be a contiguous sequence of bytes in memory, it could
  56. /// be a file on disk, or it could be a PDB stream where bytes are stored as
  57. /// discontiguous blocks in a file. Usually it is desirable to treat arrays
  58. /// as contiguous blocks of memory, but doing so with large PDB files, for
  59. /// example, could mean allocating huge amounts of memory just to allow
  60. /// re-ordering of stream data to be contiguous before iterating over it. By
  61. /// abstracting this out, we need not duplicate this memory, and we can
  62. /// iterate over arrays in arbitrarily formatted streams. Elements are parsed
  63. /// lazily on iteration, so there is no upfront cost associated with building
  64. /// or copying a VarStreamArray, no matter how large it may be.
  65. ///
  66. /// You create a VarStreamArray by specifying a ValueType and an Extractor type.
  67. /// If you do not specify an Extractor type, you are expected to specialize
  68. /// VarStreamArrayExtractor<T> for your ValueType.
  69. ///
  70. /// By default an Extractor is default constructed in the class, but in some
  71. /// cases you might find it useful for an Extractor to maintain state across
  72. /// extractions. In this case you can provide your own Extractor through a
  73. /// secondary constructor. The following examples show various ways of
  74. /// creating a VarStreamArray.
  75. ///
  76. /// // Will use VarStreamArrayExtractor<MyType> as the extractor.
  77. /// VarStreamArray<MyType> MyTypeArray;
  78. ///
  79. /// // Will use a default-constructed MyExtractor as the extractor.
  80. /// VarStreamArray<MyType, MyExtractor> MyTypeArray2;
  81. ///
  82. /// // Will use the specific instance of MyExtractor provided.
  83. /// // MyExtractor need not be default-constructible in this case.
  84. /// MyExtractor E(SomeContext);
  85. /// VarStreamArray<MyType, MyExtractor> MyTypeArray3(E);
  86. ///
  87. template <typename ValueType, typename Extractor> class VarStreamArrayIterator;
  88. template <typename ValueType,
  89. typename Extractor = VarStreamArrayExtractor<ValueType>>
  90. class VarStreamArray {
  91. friend class VarStreamArrayIterator<ValueType, Extractor>;
  92. public:
  93. typedef VarStreamArrayIterator<ValueType, Extractor> Iterator;
  94. VarStreamArray() = default;
  95. explicit VarStreamArray(const Extractor &E) : E(E) {}
  96. explicit VarStreamArray(BinaryStreamRef Stream, uint32_t Skew = 0)
  97. : Stream(Stream), Skew(Skew) {}
  98. VarStreamArray(BinaryStreamRef Stream, const Extractor &E, uint32_t Skew = 0)
  99. : Stream(Stream), E(E), Skew(Skew) {}
  100. Iterator begin(bool *HadError = nullptr) const {
  101. return Iterator(*this, E, Skew, nullptr);
  102. }
  103. bool valid() const { return Stream.valid(); }
  104. bool isOffsetValid(uint32_t Offset) const { return at(Offset) != end(); }
  105. uint32_t skew() const { return Skew; }
  106. Iterator end() const { return Iterator(E); }
  107. bool empty() const { return Stream.getLength() == 0; }
  108. VarStreamArray<ValueType, Extractor> substream(uint32_t Begin,
  109. uint32_t End) const {
  110. assert(Begin >= Skew);
  111. // We should never cut off the beginning of the stream since it might be
  112. // skewed, meaning the initial bytes are important.
  113. BinaryStreamRef NewStream = Stream.slice(0, End);
  114. return {NewStream, E, Begin};
  115. }
  116. /// given an offset into the array's underlying stream, return an
  117. /// iterator to the record at that offset. This is considered unsafe
  118. /// since the behavior is undefined if \p Offset does not refer to the
  119. /// beginning of a valid record.
  120. Iterator at(uint32_t Offset) const {
  121. return Iterator(*this, E, Offset, nullptr);
  122. }
  123. const Extractor &getExtractor() const { return E; }
  124. Extractor &getExtractor() { return E; }
  125. BinaryStreamRef getUnderlyingStream() const { return Stream; }
  126. void setUnderlyingStream(BinaryStreamRef NewStream, uint32_t NewSkew = 0) {
  127. Stream = NewStream;
  128. Skew = NewSkew;
  129. }
  130. void drop_front() { Skew += begin()->length(); }
  131. private:
  132. BinaryStreamRef Stream;
  133. Extractor E;
  134. uint32_t Skew = 0;
  135. };
  136. template <typename ValueType, typename Extractor>
  137. class VarStreamArrayIterator
  138. : public iterator_facade_base<VarStreamArrayIterator<ValueType, Extractor>,
  139. std::forward_iterator_tag, const ValueType> {
  140. typedef VarStreamArrayIterator<ValueType, Extractor> IterType;
  141. typedef VarStreamArray<ValueType, Extractor> ArrayType;
  142. public:
  143. VarStreamArrayIterator(const ArrayType &Array, const Extractor &E,
  144. uint32_t Offset, bool *HadError)
  145. : IterRef(Array.Stream.drop_front(Offset)), Extract(E),
  146. Array(&Array), AbsOffset(Offset), HadError(HadError) {
  147. if (IterRef.getLength() == 0)
  148. moveToEnd();
  149. else {
  150. auto EC = Extract(IterRef, ThisLen, ThisValue);
  151. if (EC) {
  152. consumeError(std::move(EC));
  153. markError();
  154. }
  155. }
  156. }
  157. VarStreamArrayIterator() = default;
  158. explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {}
  159. ~VarStreamArrayIterator() = default;
  160. bool operator==(const IterType &R) const {
  161. if (Array && R.Array) {
  162. // Both have a valid array, make sure they're same.
  163. assert(Array == R.Array);
  164. return IterRef == R.IterRef;
  165. }
  166. // Both iterators are at the end.
  167. if (!Array && !R.Array)
  168. return true;
  169. // One is not at the end and one is.
  170. return false;
  171. }
  172. const ValueType &operator*() const {
  173. assert(Array && !HasError);
  174. return ThisValue;
  175. }
  176. IterType &operator+=(unsigned N) {
  177. for (unsigned I = 0; I < N; ++I) {
  178. // We are done with the current record, discard it so that we are
  179. // positioned at the next record.
  180. AbsOffset += ThisLen;
  181. IterRef = IterRef.drop_front(ThisLen);
  182. if (IterRef.getLength() == 0) {
  183. // There is nothing after the current record, we must make this an end
  184. // iterator.
  185. moveToEnd();
  186. } else {
  187. // There is some data after the current record.
  188. auto EC = Extract(IterRef, ThisLen, ThisValue);
  189. if (EC) {
  190. consumeError(std::move(EC));
  191. markError();
  192. } else if (ThisLen == 0) {
  193. // An empty record? Make this an end iterator.
  194. moveToEnd();
  195. }
  196. }
  197. }
  198. return *this;
  199. }
  200. uint32_t offset() const { return AbsOffset; }
  201. uint32_t getRecordLength() const { return ThisLen; }
  202. private:
  203. void moveToEnd() {
  204. Array = nullptr;
  205. ThisLen = 0;
  206. }
  207. void markError() {
  208. moveToEnd();
  209. HasError = true;
  210. if (HadError != nullptr)
  211. *HadError = true;
  212. }
  213. ValueType ThisValue;
  214. BinaryStreamRef IterRef;
  215. Extractor Extract;
  216. const ArrayType *Array{nullptr};
  217. uint32_t ThisLen{0};
  218. uint32_t AbsOffset{0};
  219. bool HasError{false};
  220. bool *HadError{nullptr};
  221. };
  222. template <typename T> class FixedStreamArrayIterator;
  223. /// FixedStreamArray is similar to VarStreamArray, except with each record
  224. /// having a fixed-length. As with VarStreamArray, there is no upfront
  225. /// cost associated with building or copying a FixedStreamArray, as the
  226. /// memory for each element is not read from the backing stream until that
  227. /// element is iterated.
  228. template <typename T> class FixedStreamArray {
  229. friend class FixedStreamArrayIterator<T>;
  230. public:
  231. typedef FixedStreamArrayIterator<T> Iterator;
  232. FixedStreamArray() = default;
  233. explicit FixedStreamArray(BinaryStreamRef Stream) : Stream(Stream) {
  234. assert(Stream.getLength() % sizeof(T) == 0);
  235. }
  236. bool operator==(const FixedStreamArray<T> &Other) const {
  237. return Stream == Other.Stream;
  238. }
  239. bool operator!=(const FixedStreamArray<T> &Other) const {
  240. return !(*this == Other);
  241. }
  242. FixedStreamArray(const FixedStreamArray &) = default;
  243. FixedStreamArray &operator=(const FixedStreamArray &) = default;
  244. const T &operator[](uint32_t Index) const {
  245. assert(Index < size());
  246. uint32_t Off = Index * sizeof(T);
  247. ArrayRef<uint8_t> Data;
  248. if (auto EC = Stream.readBytes(Off, sizeof(T), Data)) {
  249. assert(false && "Unexpected failure reading from stream");
  250. // This should never happen since we asserted that the stream length was
  251. // an exact multiple of the element size.
  252. consumeError(std::move(EC));
  253. }
  254. assert(isAddrAligned(Align::Of<T>(), Data.data()));
  255. return *reinterpret_cast<const T *>(Data.data());
  256. }
  257. uint32_t size() const { return Stream.getLength() / sizeof(T); }
  258. bool empty() const { return size() == 0; }
  259. FixedStreamArrayIterator<T> begin() const {
  260. return FixedStreamArrayIterator<T>(*this, 0);
  261. }
  262. FixedStreamArrayIterator<T> end() const {
  263. return FixedStreamArrayIterator<T>(*this, size());
  264. }
  265. const T &front() const { return *begin(); }
  266. const T &back() const {
  267. FixedStreamArrayIterator<T> I = end();
  268. return *(--I);
  269. }
  270. BinaryStreamRef getUnderlyingStream() const { return Stream; }
  271. private:
  272. BinaryStreamRef Stream;
  273. };
  274. template <typename T>
  275. class FixedStreamArrayIterator
  276. : public iterator_facade_base<FixedStreamArrayIterator<T>,
  277. std::random_access_iterator_tag, const T> {
  278. public:
  279. FixedStreamArrayIterator(const FixedStreamArray<T> &Array, uint32_t Index)
  280. : Array(Array), Index(Index) {}
  281. FixedStreamArrayIterator(const FixedStreamArrayIterator<T> &Other)
  282. : Array(Other.Array), Index(Other.Index) {}
  283. FixedStreamArrayIterator<T> &
  284. operator=(const FixedStreamArrayIterator<T> &Other) {
  285. Array = Other.Array;
  286. Index = Other.Index;
  287. return *this;
  288. }
  289. const T &operator*() const { return Array[Index]; }
  290. const T &operator*() { return Array[Index]; }
  291. bool operator==(const FixedStreamArrayIterator<T> &R) const {
  292. assert(Array == R.Array);
  293. return (Index == R.Index) && (Array == R.Array);
  294. }
  295. FixedStreamArrayIterator<T> &operator+=(std::ptrdiff_t N) {
  296. Index += N;
  297. return *this;
  298. }
  299. FixedStreamArrayIterator<T> &operator-=(std::ptrdiff_t N) {
  300. assert(std::ptrdiff_t(Index) >= N);
  301. Index -= N;
  302. return *this;
  303. }
  304. std::ptrdiff_t operator-(const FixedStreamArrayIterator<T> &R) const {
  305. assert(Array == R.Array);
  306. assert(Index >= R.Index);
  307. return Index - R.Index;
  308. }
  309. bool operator<(const FixedStreamArrayIterator<T> &RHS) const {
  310. assert(Array == RHS.Array);
  311. return Index < RHS.Index;
  312. }
  313. private:
  314. FixedStreamArray<T> Array;
  315. uint32_t Index;
  316. };
  317. } // namespace llvm
  318. #endif // LLVM_SUPPORT_BINARYSTREAMARRAY_H
  319. #ifdef __GNUC__
  320. #pragma GCC diagnostic pop
  321. #endif