SourceLocationEncoding.h 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. #pragma once
  2. #ifdef __GNUC__
  3. #pragma GCC diagnostic push
  4. #pragma GCC diagnostic ignored "-Wunused-parameter"
  5. #endif
  6. //===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===//
  7. //
  8. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  9. // See https://llvm.org/LICENSE.txt for license information.
  10. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  11. //
  12. //===----------------------------------------------------------------------===//
  13. //
  14. // Source locations are stored pervasively in the AST, making up a third of
  15. // the size of typical serialized files. Storing them efficiently is important.
  16. //
  17. // We use integers optimized by VBR-encoding, because:
  18. // - when abbreviations cannot be used, VBR6 encoding is our only choice
  19. // - in the worst case a SourceLocation can be ~any 32-bit number, but in
  20. // practice they are highly predictable
  21. //
  22. // We encode the integer so that likely values encode as small numbers that
  23. // turn into few VBR chunks:
  24. // - the invalid sentinel location is a very common value: it encodes as 0
  25. // - the "macro or not" bit is stored at the bottom of the integer
  26. // (rather than at the top, as in memory), so macro locations can have
  27. // small representations.
  28. // - related locations (e.g. of a left and right paren pair) are usually
  29. // similar, so when encoding a sequence of locations we store only
  30. // differences between successive elements.
  31. //
  32. //===----------------------------------------------------------------------===//
  33. #include <climits>
  34. #include "clang/Basic/SourceLocation.h"
  35. #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
  36. #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
  37. namespace clang {
  38. class SourceLocationSequence;
  39. /// Serialized encoding of SourceLocations without context.
  40. /// Optimized to have small unsigned values (=> small after VBR encoding).
  41. ///
  42. // Macro locations have the top bit set, we rotate by one so it is the low bit.
  43. class SourceLocationEncoding {
  44. using UIntTy = SourceLocation::UIntTy;
  45. constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy);
  46. static UIntTy encodeRaw(UIntTy Raw) {
  47. return (Raw << 1) | (Raw >> (UIntBits - 1));
  48. }
  49. static UIntTy decodeRaw(UIntTy Raw) {
  50. return (Raw >> 1) | (Raw << (UIntBits - 1));
  51. }
  52. friend SourceLocationSequence;
  53. public:
  54. static uint64_t encode(SourceLocation Loc,
  55. SourceLocationSequence * = nullptr);
  56. static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
  57. };
  58. /// Serialized encoding of a sequence of SourceLocations.
  59. ///
  60. /// Optimized to produce small values when locations with the sequence are
  61. /// similar. Each element can be delta-encoded against the last nonzero element.
  62. ///
  63. /// Sequences should be started by creating a SourceLocationSequence::State,
  64. /// and then passed around as SourceLocationSequence*. Example:
  65. ///
  66. /// // establishes a sequence
  67. /// void EmitTopLevelThing() {
  68. /// SourceLocationSequence::State Seq;
  69. /// EmitContainedThing(Seq);
  70. /// EmitRecursiveThing(Seq);
  71. /// }
  72. ///
  73. /// // optionally part of a sequence
  74. /// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
  75. /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
  76. /// }
  77. ///
  78. /// // establishes a sequence if there isn't one already
  79. /// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
  80. /// SourceLocationSequence::State Seq(ParentSeq);
  81. /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
  82. /// EmitRecursiveThing(Seq);
  83. /// }
  84. ///
  85. class SourceLocationSequence {
  86. using UIntTy = SourceLocation::UIntTy;
  87. using EncodedTy = uint64_t;
  88. constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
  89. static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
  90. // Prev stores the rotated last nonzero location.
  91. UIntTy &Prev;
  92. // Zig-zag encoding turns small signed integers into small unsigned integers.
  93. // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
  94. static UIntTy zigZag(UIntTy V) {
  95. UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
  96. return Sign ^ (V << 1);
  97. }
  98. static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
  99. SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
  100. EncodedTy encodeRaw(UIntTy Raw) {
  101. if (Raw == 0)
  102. return 0;
  103. UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
  104. if (Prev == 0)
  105. return Prev = Rotated;
  106. UIntTy Delta = Rotated - Prev;
  107. Prev = Rotated;
  108. // Exactly one 33 bit value is possible! (1 << 32).
  109. // This is because we have two representations of zero: trivial & relative.
  110. return 1 + EncodedTy{zigZag(Delta)};
  111. }
  112. UIntTy decodeRaw(EncodedTy Encoded) {
  113. if (Encoded == 0)
  114. return 0;
  115. if (Prev == 0)
  116. return SourceLocationEncoding::decodeRaw(Prev = Encoded);
  117. return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
  118. }
  119. public:
  120. SourceLocation decode(EncodedTy Encoded) {
  121. return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
  122. }
  123. EncodedTy encode(SourceLocation Loc) {
  124. return encodeRaw(Loc.getRawEncoding());
  125. }
  126. class State;
  127. };
  128. /// This object establishes a SourceLocationSequence.
  129. class SourceLocationSequence::State {
  130. UIntTy Prev = 0;
  131. SourceLocationSequence Seq;
  132. public:
  133. // If Parent is provided and non-null, then this root becomes part of that
  134. // enclosing sequence instead of establishing a new one.
  135. State(SourceLocationSequence *Parent = nullptr)
  136. : Seq(Parent ? Parent->Prev : Prev) {}
  137. // Implicit conversion for uniform use of roots vs propagated sequences.
  138. operator SourceLocationSequence *() { return &Seq; }
  139. };
  140. inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
  141. SourceLocationSequence *Seq) {
  142. return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
  143. }
  144. inline SourceLocation
  145. SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
  146. return Seq ? Seq->decode(Encoded)
  147. : SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
  148. }
  149. } // namespace clang
  150. #endif
  151. #ifdef __GNUC__
  152. #pragma GCC diagnostic pop
  153. #endif