123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 |
- #pragma once
- #ifdef __GNUC__
- #pragma GCC diagnostic push
- #pragma GCC diagnostic ignored "-Wunused-parameter"
- #endif
- //===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // Source locations are stored pervasively in the AST, making up a third of
- // the size of typical serialized files. Storing them efficiently is important.
- //
- // We use integers optimized by VBR-encoding, because:
- // - when abbreviations cannot be used, VBR6 encoding is our only choice
- // - in the worst case a SourceLocation can be ~any 32-bit number, but in
- // practice they are highly predictable
- //
- // We encode the integer so that likely values encode as small numbers that
- // turn into few VBR chunks:
- // - the invalid sentinel location is a very common value: it encodes as 0
- // - the "macro or not" bit is stored at the bottom of the integer
- // (rather than at the top, as in memory), so macro locations can have
- // small representations.
- // - related locations (e.g. of a left and right paren pair) are usually
- // similar, so when encoding a sequence of locations we store only
- // differences between successive elements.
- //
- //===----------------------------------------------------------------------===//
- #include <climits>
- #include "clang/Basic/SourceLocation.h"
- #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
- #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
- namespace clang {
- class SourceLocationSequence;
- /// Serialized encoding of SourceLocations without context.
- /// Optimized to have small unsigned values (=> small after VBR encoding).
- ///
- // Macro locations have the top bit set, we rotate by one so it is the low bit.
- class SourceLocationEncoding {
- using UIntTy = SourceLocation::UIntTy;
- constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy);
- static UIntTy encodeRaw(UIntTy Raw) {
- return (Raw << 1) | (Raw >> (UIntBits - 1));
- }
- static UIntTy decodeRaw(UIntTy Raw) {
- return (Raw >> 1) | (Raw << (UIntBits - 1));
- }
- friend SourceLocationSequence;
- public:
- static uint64_t encode(SourceLocation Loc,
- SourceLocationSequence * = nullptr);
- static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr);
- };
- /// Serialized encoding of a sequence of SourceLocations.
- ///
- /// Optimized to produce small values when locations with the sequence are
- /// similar. Each element can be delta-encoded against the last nonzero element.
- ///
- /// Sequences should be started by creating a SourceLocationSequence::State,
- /// and then passed around as SourceLocationSequence*. Example:
- ///
- /// // establishes a sequence
- /// void EmitTopLevelThing() {
- /// SourceLocationSequence::State Seq;
- /// EmitContainedThing(Seq);
- /// EmitRecursiveThing(Seq);
- /// }
- ///
- /// // optionally part of a sequence
- /// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
- /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
- /// }
- ///
- /// // establishes a sequence if there isn't one already
- /// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
- /// SourceLocationSequence::State Seq(ParentSeq);
- /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
- /// EmitRecursiveThing(Seq);
- /// }
- ///
- class SourceLocationSequence {
- using UIntTy = SourceLocation::UIntTy;
- using EncodedTy = uint64_t;
- constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
- static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
- // Prev stores the rotated last nonzero location.
- UIntTy &Prev;
- // Zig-zag encoding turns small signed integers into small unsigned integers.
- // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
- static UIntTy zigZag(UIntTy V) {
- UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
- return Sign ^ (V << 1);
- }
- static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
- SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
- EncodedTy encodeRaw(UIntTy Raw) {
- if (Raw == 0)
- return 0;
- UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
- if (Prev == 0)
- return Prev = Rotated;
- UIntTy Delta = Rotated - Prev;
- Prev = Rotated;
- // Exactly one 33 bit value is possible! (1 << 32).
- // This is because we have two representations of zero: trivial & relative.
- return 1 + EncodedTy{zigZag(Delta)};
- }
- UIntTy decodeRaw(EncodedTy Encoded) {
- if (Encoded == 0)
- return 0;
- if (Prev == 0)
- return SourceLocationEncoding::decodeRaw(Prev = Encoded);
- return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
- }
- public:
- SourceLocation decode(EncodedTy Encoded) {
- return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
- }
- EncodedTy encode(SourceLocation Loc) {
- return encodeRaw(Loc.getRawEncoding());
- }
- class State;
- };
- /// This object establishes a SourceLocationSequence.
- class SourceLocationSequence::State {
- UIntTy Prev = 0;
- SourceLocationSequence Seq;
- public:
- // If Parent is provided and non-null, then this root becomes part of that
- // enclosing sequence instead of establishing a new one.
- State(SourceLocationSequence *Parent = nullptr)
- : Seq(Parent ? Parent->Prev : Prev) {}
- // Implicit conversion for uniform use of roots vs propagated sequences.
- operator SourceLocationSequence *() { return &Seq; }
- };
- inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc,
- SourceLocationSequence *Seq) {
- return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
- }
- inline SourceLocation
- SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) {
- return Seq ? Seq->decode(Encoded)
- : SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
- }
- } // namespace clang
- #endif
- #ifdef __GNUC__
- #pragma GCC diagnostic pop
- #endif
|