PDBStringTableBuilder.cpp 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. //===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "llvm/DebugInfo/PDB/Native/PDBStringTableBuilder.h"
  9. #include "llvm/ADT/ArrayRef.h"
  10. #include "llvm/DebugInfo/PDB/Native/Hash.h"
  11. #include "llvm/DebugInfo/PDB/Native/RawTypes.h"
  12. #include "llvm/Support/BinaryStreamWriter.h"
  13. #include "llvm/Support/Endian.h"
  14. #include <map>
  15. using namespace llvm;
  16. using namespace llvm::msf;
  17. using namespace llvm::support;
  18. using namespace llvm::support::endian;
  19. using namespace llvm::pdb;
  20. StringTableHashTraits::StringTableHashTraits(PDBStringTableBuilder &Table)
  21. : Table(&Table) {}
  22. uint32_t StringTableHashTraits::hashLookupKey(StringRef S) const {
  23. // The reference implementation doesn't include code for /src/headerblock
  24. // handling, but it can only read natvis entries lld's PDB files if
  25. // this hash function truncates the hash to 16 bit.
  26. // PDB/include/misc.h in the reference implementation has a hashSz() function
  27. // that returns an unsigned short, that seems what's being used for
  28. // /src/headerblock.
  29. return static_cast<uint16_t>(Table->getIdForString(S));
  30. }
  31. StringRef StringTableHashTraits::storageKeyToLookupKey(uint32_t Offset) const {
  32. return Table->getStringForId(Offset);
  33. }
  34. uint32_t StringTableHashTraits::lookupKeyToStorageKey(StringRef S) {
  35. return Table->insert(S);
  36. }
  37. uint32_t PDBStringTableBuilder::insert(StringRef S) {
  38. return Strings.insert(S);
  39. }
  40. uint32_t PDBStringTableBuilder::getIdForString(StringRef S) const {
  41. return Strings.getIdForString(S);
  42. }
  43. StringRef PDBStringTableBuilder::getStringForId(uint32_t Id) const {
  44. return Strings.getStringForId(Id);
  45. }
  46. static uint32_t computeBucketCount(uint32_t NumStrings) {
  47. // This is a precomputed list of Buckets given the specified number of
  48. // strings. Matching the reference algorithm exactly is not strictly
  49. // necessary for correctness, but it helps when comparing LLD's PDBs with
  50. // Microsoft's PDBs so as to eliminate superfluous differences.
  51. // The reference implementation does (in nmt.h, NMT::grow()):
  52. // unsigned StringCount = 0;
  53. // unsigned BucketCount = 1;
  54. // fn insert() {
  55. // ++StringCount;
  56. // if (BucketCount * 3 / 4 < StringCount)
  57. // BucketCount = BucketCount * 3 / 2 + 1;
  58. // }
  59. // This list contains all StringCount, BucketCount pairs where BucketCount was
  60. // just incremented. It ends before the first BucketCount entry where
  61. // BucketCount * 3 would overflow a 32-bit unsigned int.
  62. static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = {
  63. {0, 1},
  64. {1, 2},
  65. {2, 4},
  66. {4, 7},
  67. {6, 11},
  68. {9, 17},
  69. {13, 26},
  70. {20, 40},
  71. {31, 61},
  72. {46, 92},
  73. {70, 139},
  74. {105, 209},
  75. {157, 314},
  76. {236, 472},
  77. {355, 709},
  78. {532, 1064},
  79. {799, 1597},
  80. {1198, 2396},
  81. {1798, 3595},
  82. {2697, 5393},
  83. {4045, 8090},
  84. {6068, 12136},
  85. {9103, 18205},
  86. {13654, 27308},
  87. {20482, 40963},
  88. {30723, 61445},
  89. {46084, 92168},
  90. {69127, 138253},
  91. {103690, 207380},
  92. {155536, 311071},
  93. {233304, 466607},
  94. {349956, 699911},
  95. {524934, 1049867},
  96. {787401, 1574801},
  97. {1181101, 2362202},
  98. {1771652, 3543304},
  99. {2657479, 5314957},
  100. {3986218, 7972436},
  101. {5979328, 11958655},
  102. {8968992, 17937983},
  103. {13453488, 26906975},
  104. {20180232, 40360463},
  105. {30270348, 60540695},
  106. {45405522, 90811043},
  107. {68108283, 136216565},
  108. {102162424, 204324848},
  109. {153243637, 306487273},
  110. {229865455, 459730910},
  111. {344798183, 689596366},
  112. {517197275, 1034394550},
  113. {775795913, 1551591826},
  114. {1163693870, 2327387740}};
  115. const auto *Entry = llvm::lower_bound(
  116. StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first());
  117. assert(Entry != std::end(StringsToBuckets));
  118. return Entry->second;
  119. }
  120. uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
  121. uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
  122. Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
  123. return Size;
  124. }
  125. uint32_t PDBStringTableBuilder::calculateSerializedSize() const {
  126. uint32_t Size = 0;
  127. Size += sizeof(PDBStringTableHeader);
  128. Size += Strings.calculateSerializedSize();
  129. Size += calculateHashTableSize();
  130. Size += sizeof(uint32_t); // The /names stream ends with the string count.
  131. return Size;
  132. }
  133. void PDBStringTableBuilder::setStrings(
  134. const codeview::DebugStringTableSubsection &Strings) {
  135. this->Strings = Strings;
  136. }
  137. Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
  138. // Write a header
  139. PDBStringTableHeader H;
  140. H.Signature = PDBStringTableSignature;
  141. H.HashVersion = 1;
  142. H.ByteSize = Strings.calculateSerializedSize();
  143. if (auto EC = Writer.writeObject(H))
  144. return EC;
  145. assert(Writer.bytesRemaining() == 0);
  146. return Error::success();
  147. }
  148. Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
  149. if (auto EC = Strings.commit(Writer))
  150. return EC;
  151. assert(Writer.bytesRemaining() == 0);
  152. return Error::success();
  153. }
  154. Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
  155. // Write a hash table.
  156. uint32_t BucketCount = computeBucketCount(Strings.size());
  157. if (auto EC = Writer.writeInteger(BucketCount))
  158. return EC;
  159. std::vector<ulittle32_t> Buckets(BucketCount);
  160. for (const auto &Pair : Strings) {
  161. StringRef S = Pair.getKey();
  162. uint32_t Offset = Pair.getValue();
  163. uint32_t Hash = hashStringV1(S);
  164. for (uint32_t I = 0; I != BucketCount; ++I) {
  165. uint32_t Slot = (Hash + I) % BucketCount;
  166. if (Buckets[Slot] != 0)
  167. continue;
  168. Buckets[Slot] = Offset;
  169. break;
  170. }
  171. }
  172. if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
  173. return EC;
  174. assert(Writer.bytesRemaining() == 0);
  175. return Error::success();
  176. }
  177. Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
  178. if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
  179. return EC;
  180. assert(Writer.bytesRemaining() == 0);
  181. return Error::success();
  182. }
  183. Error PDBStringTableBuilder::commit(BinaryStreamWriter &Writer) const {
  184. BinaryStreamWriter SectionWriter;
  185. std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
  186. if (auto EC = writeHeader(SectionWriter))
  187. return EC;
  188. std::tie(SectionWriter, Writer) =
  189. Writer.split(Strings.calculateSerializedSize());
  190. if (auto EC = writeStrings(SectionWriter))
  191. return EC;
  192. std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
  193. if (auto EC = writeHashTable(SectionWriter))
  194. return EC;
  195. std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
  196. if (auto EC = writeEpilogue(SectionWriter))
  197. return EC;
  198. return Error::success();
  199. }