1 //===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Source locations are stored pervasively in the AST, making up a third of 10 // the size of typical serialized files. Storing them efficiently is important. 11 // 12 // We use integers optimized by VBR-encoding, because: 13 // - when abbrevations cannot be used, VBR6 encoding is our only choice 14 // - in the worst case a SourceLocation can be ~any 32-bit number, but in 15 // practice they are highly predictable 16 // 17 // We encode the integer so that likely values encode as small numbers that 18 // turn into few VBR chunks: 19 // - the invalid sentinel location is a very common value: it encodes as 0 20 // - the "macro or not" bit is stored at the bottom of the integer 21 // (rather than at the top, as in memory), so macro locations can have 22 // small representations. 23 // - related locations (e.g. of a left and right paren pair) are usually 24 // similar, so when encoding a sequence of locations we store only 25 // differences between successive elements. 26 // 27 //===----------------------------------------------------------------------===// 28 29 #include <climits> 30 #include "clang/Basic/SourceLocation.h" 31 32 #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H 33 #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H 34 35 namespace clang { 36 class SourceLocationSequence; 37 38 /// Serialized encoding of SourceLocations without context. 39 /// Optimized to have small unsigned values (=> small after VBR encoding). 40 /// 41 // Macro locations have the top bit set, we rotate by one so it is the low bit. 42 class SourceLocationEncoding { 43 using UIntTy = SourceLocation::UIntTy; 44 constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy); 45 46 static UIntTy encodeRaw(UIntTy Raw) { 47 return (Raw << 1) | (Raw >> (UIntBits - 1)); 48 } 49 static UIntTy decodeRaw(UIntTy Raw) { 50 return (Raw >> 1) | (Raw << (UIntBits - 1)); 51 } 52 friend SourceLocationSequence; 53 54 public: 55 static uint64_t encode(SourceLocation Loc, 56 SourceLocationSequence * = nullptr); 57 static SourceLocation decode(uint64_t, SourceLocationSequence * = nullptr); 58 }; 59 60 /// Serialized encoding of a sequence of SourceLocations. 61 /// 62 /// Optimized to produce small values when locations with the sequence are 63 /// similar. Each element can be delta-encoded against the last nonzero element. 64 /// 65 /// Sequences should be started by creating a SourceLocationSequence::State, 66 /// and then passed around as SourceLocationSequence*. Example: 67 /// 68 /// // establishes a sequence 69 /// void EmitTopLevelThing() { 70 /// SourceLocationSequence::State Seq; 71 /// EmitContainedThing(Seq); 72 /// EmitRecursiveThing(Seq); 73 /// } 74 /// 75 /// // optionally part of a sequence 76 /// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) { 77 /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); 78 /// } 79 /// 80 /// // establishes a sequence if there isn't one already 81 /// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) { 82 /// SourceLocationSequence::State Seq(ParentSeq); 83 /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); 84 /// EmitRecursiveThing(Seq); 85 /// } 86 /// 87 class SourceLocationSequence { 88 using UIntTy = SourceLocation::UIntTy; 89 using EncodedTy = uint64_t; 90 constexpr static auto UIntBits = SourceLocationEncoding::UIntBits; 91 static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!"); 92 93 // Prev stores the rotated last nonzero location. 94 UIntTy &Prev; 95 96 // Zig-zag encoding turns small signed integers into small unsigned integers. 97 // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ... 98 static UIntTy zigZag(UIntTy V) { 99 UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0); 100 return Sign ^ (V << 1); 101 } 102 static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); } 103 104 SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {} 105 106 EncodedTy encodeRaw(UIntTy Raw) { 107 if (Raw == 0) 108 return 0; 109 UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw); 110 if (Prev == 0) 111 return Prev = Rotated; 112 UIntTy Delta = Rotated - Prev; 113 Prev = Rotated; 114 // Exactly one 33 bit value is possible! (1 << 32). 115 // This is because we have two representations of zero: trivial & relative. 116 return 1 + EncodedTy{zigZag(Delta)}; 117 } 118 UIntTy decodeRaw(EncodedTy Encoded) { 119 if (Encoded == 0) 120 return 0; 121 if (Prev == 0) 122 return SourceLocationEncoding::decodeRaw(Prev = Encoded); 123 return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1)); 124 } 125 126 public: 127 SourceLocation decode(EncodedTy Encoded) { 128 return SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); 129 } 130 EncodedTy encode(SourceLocation Loc) { 131 return encodeRaw(Loc.getRawEncoding()); 132 } 133 134 class State; 135 }; 136 137 /// This object establishes a SourceLocationSequence. 138 class SourceLocationSequence::State { 139 UIntTy Prev = 0; 140 SourceLocationSequence Seq; 141 142 public: 143 // If Parent is provided and non-null, then this root becomes part of that 144 // enclosing sequence instead of establishing a new one. 145 State(SourceLocationSequence *Parent = nullptr) 146 : Seq(Parent ? Parent->Prev : Prev) {} 147 148 // Implicit conversion for uniform use of roots vs propagated sequences. 149 operator SourceLocationSequence *() { return &Seq; } 150 }; 151 152 inline uint64_t SourceLocationEncoding::encode(SourceLocation Loc, 153 SourceLocationSequence *Seq) { 154 return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding()); 155 } 156 inline SourceLocation 157 SourceLocationEncoding::decode(uint64_t Encoded, SourceLocationSequence *Seq) { 158 return Seq ? Seq->decode(Encoded) 159 : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); 160 } 161 162 } // namespace clang 163 #endif 164