elf_mem_image.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. // Copyright 2017 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // Allow dynamic symbol lookup in an in-memory Elf image.
  15. //
  16. #include "absl/debugging/internal/elf_mem_image.h"
  17. #ifdef ABSL_HAVE_ELF_MEM_IMAGE // defined in elf_mem_image.h
  18. #include <string.h>
  19. #include <cassert>
  20. #include <cstddef>
  21. #include <cstdint>
  22. #include "absl/base/config.h"
  23. #include "absl/base/internal/raw_logging.h"
  24. // From binutils/include/elf/common.h (this doesn't appear to be documented
  25. // anywhere else).
  26. //
  27. // /* This flag appears in a Versym structure. It means that the symbol
  28. // is hidden, and is only visible with an explicit version number.
  29. // This is a GNU extension. */
  30. // #define VERSYM_HIDDEN 0x8000
  31. //
  32. // /* This is the mask for the rest of the Versym information. */
  33. // #define VERSYM_VERSION 0x7fff
  34. #define VERSYM_VERSION 0x7fff
  35. namespace absl {
  36. ABSL_NAMESPACE_BEGIN
  37. namespace debugging_internal {
  38. namespace {
  39. #if __SIZEOF_POINTER__ == 4
  40. const int kElfClass = ELFCLASS32;
  41. int ElfBind(const ElfW(Sym) *symbol) { return ELF32_ST_BIND(symbol->st_info); }
  42. int ElfType(const ElfW(Sym) *symbol) { return ELF32_ST_TYPE(symbol->st_info); }
  43. #elif __SIZEOF_POINTER__ == 8
  44. const int kElfClass = ELFCLASS64;
  45. int ElfBind(const ElfW(Sym) *symbol) { return ELF64_ST_BIND(symbol->st_info); }
  46. int ElfType(const ElfW(Sym) *symbol) { return ELF64_ST_TYPE(symbol->st_info); }
  47. #else
  48. const int kElfClass = -1;
  49. int ElfBind(const ElfW(Sym) *) {
  50. ABSL_RAW_LOG(FATAL, "Unexpected word size");
  51. return 0;
  52. }
  53. int ElfType(const ElfW(Sym) *) {
  54. ABSL_RAW_LOG(FATAL, "Unexpected word size");
  55. return 0;
  56. }
  57. #endif
  58. // Extract an element from one of the ELF tables, cast it to desired type.
  59. // This is just a simple arithmetic and a glorified cast.
  60. // Callers are responsible for bounds checking.
  61. template <typename T>
  62. const T *GetTableElement(const ElfW(Ehdr) * ehdr, ElfW(Off) table_offset,
  63. ElfW(Word) element_size, size_t index) {
  64. return reinterpret_cast<const T*>(reinterpret_cast<const char *>(ehdr)
  65. + table_offset
  66. + index * element_size);
  67. }
  68. } // namespace
  69. // The value of this variable doesn't matter; it's used only for its
  70. // unique address.
  71. const int ElfMemImage::kInvalidBaseSentinel = 0;
  72. ElfMemImage::ElfMemImage(const void *base) {
  73. ABSL_RAW_CHECK(base != kInvalidBase, "bad pointer");
  74. Init(base);
  75. }
  76. uint32_t ElfMemImage::GetNumSymbols() const { return num_syms_; }
  77. const ElfW(Sym) * ElfMemImage::GetDynsym(uint32_t index) const {
  78. ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
  79. return dynsym_ + index;
  80. }
  81. const ElfW(Versym) *ElfMemImage::GetVersym(uint32_t index) const {
  82. ABSL_RAW_CHECK(index < GetNumSymbols(), "index out of range");
  83. return versym_ + index;
  84. }
  85. const ElfW(Phdr) *ElfMemImage::GetPhdr(int index) const {
  86. ABSL_RAW_CHECK(index >= 0 && index < ehdr_->e_phnum, "index out of range");
  87. return GetTableElement<ElfW(Phdr)>(ehdr_, ehdr_->e_phoff, ehdr_->e_phentsize,
  88. static_cast<size_t>(index));
  89. }
  90. const char *ElfMemImage::GetDynstr(ElfW(Word) offset) const {
  91. ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
  92. return dynstr_ + offset;
  93. }
  94. const void *ElfMemImage::GetSymAddr(const ElfW(Sym) *sym) const {
  95. if (sym->st_shndx == SHN_UNDEF || sym->st_shndx >= SHN_LORESERVE) {
  96. // Symbol corresponds to "special" (e.g. SHN_ABS) section.
  97. return reinterpret_cast<const void *>(sym->st_value);
  98. }
  99. ABSL_RAW_CHECK(link_base_ < sym->st_value, "symbol out of range");
  100. return GetTableElement<char>(ehdr_, 0, 1, sym->st_value - link_base_);
  101. }
  102. const ElfW(Verdef) *ElfMemImage::GetVerdef(int index) const {
  103. ABSL_RAW_CHECK(0 <= index && static_cast<size_t>(index) <= verdefnum_,
  104. "index out of range");
  105. const ElfW(Verdef) *version_definition = verdef_;
  106. while (version_definition->vd_ndx < index && version_definition->vd_next) {
  107. const char *const version_definition_as_char =
  108. reinterpret_cast<const char *>(version_definition);
  109. version_definition =
  110. reinterpret_cast<const ElfW(Verdef) *>(version_definition_as_char +
  111. version_definition->vd_next);
  112. }
  113. return version_definition->vd_ndx == index ? version_definition : nullptr;
  114. }
  115. const ElfW(Verdaux) *ElfMemImage::GetVerdefAux(
  116. const ElfW(Verdef) *verdef) const {
  117. return reinterpret_cast<const ElfW(Verdaux) *>(verdef+1);
  118. }
  119. const char *ElfMemImage::GetVerstr(ElfW(Word) offset) const {
  120. ABSL_RAW_CHECK(offset < strsize_, "offset out of range");
  121. return dynstr_ + offset;
  122. }
  123. void ElfMemImage::Init(const void *base) {
  124. ehdr_ = nullptr;
  125. dynsym_ = nullptr;
  126. dynstr_ = nullptr;
  127. versym_ = nullptr;
  128. verdef_ = nullptr;
  129. num_syms_ = 0;
  130. strsize_ = 0;
  131. verdefnum_ = 0;
  132. // Sentinel: PT_LOAD .p_vaddr can't possibly be this.
  133. link_base_ = ~ElfW(Addr){0}; // NOLINT(readability/braces)
  134. if (!base) {
  135. return;
  136. }
  137. const char *const base_as_char = reinterpret_cast<const char *>(base);
  138. if (base_as_char[EI_MAG0] != ELFMAG0 || base_as_char[EI_MAG1] != ELFMAG1 ||
  139. base_as_char[EI_MAG2] != ELFMAG2 || base_as_char[EI_MAG3] != ELFMAG3) {
  140. assert(false);
  141. return;
  142. }
  143. int elf_class = base_as_char[EI_CLASS];
  144. if (elf_class != kElfClass) {
  145. assert(false);
  146. return;
  147. }
  148. switch (base_as_char[EI_DATA]) {
  149. case ELFDATA2LSB: {
  150. #ifndef ABSL_IS_LITTLE_ENDIAN
  151. assert(false);
  152. return;
  153. #endif
  154. break;
  155. }
  156. case ELFDATA2MSB: {
  157. #ifndef ABSL_IS_BIG_ENDIAN
  158. assert(false);
  159. return;
  160. #endif
  161. break;
  162. }
  163. default: {
  164. assert(false);
  165. return;
  166. }
  167. }
  168. ehdr_ = reinterpret_cast<const ElfW(Ehdr) *>(base);
  169. const ElfW(Phdr) *dynamic_program_header = nullptr;
  170. for (int i = 0; i < ehdr_->e_phnum; ++i) {
  171. const ElfW(Phdr) *const program_header = GetPhdr(i);
  172. switch (program_header->p_type) {
  173. case PT_LOAD:
  174. if (!~link_base_) {
  175. link_base_ = program_header->p_vaddr;
  176. }
  177. break;
  178. case PT_DYNAMIC:
  179. dynamic_program_header = program_header;
  180. break;
  181. }
  182. }
  183. if (!~link_base_ || !dynamic_program_header) {
  184. assert(false);
  185. // Mark this image as not present. Can not recur infinitely.
  186. Init(nullptr);
  187. return;
  188. }
  189. ptrdiff_t relocation =
  190. base_as_char - reinterpret_cast<const char *>(link_base_);
  191. ElfW(Dyn)* dynamic_entry = reinterpret_cast<ElfW(Dyn)*>(
  192. static_cast<intptr_t>(dynamic_program_header->p_vaddr) + relocation);
  193. uint32_t *sysv_hash = nullptr;
  194. uint32_t *gnu_hash = nullptr;
  195. for (; dynamic_entry->d_tag != DT_NULL; ++dynamic_entry) {
  196. const auto value =
  197. static_cast<intptr_t>(dynamic_entry->d_un.d_val) + relocation;
  198. switch (dynamic_entry->d_tag) {
  199. case DT_HASH:
  200. sysv_hash = reinterpret_cast<uint32_t *>(value);
  201. break;
  202. case DT_GNU_HASH:
  203. gnu_hash = reinterpret_cast<uint32_t *>(value);
  204. break;
  205. case DT_SYMTAB:
  206. dynsym_ = reinterpret_cast<ElfW(Sym) *>(value);
  207. break;
  208. case DT_STRTAB:
  209. dynstr_ = reinterpret_cast<const char *>(value);
  210. break;
  211. case DT_VERSYM:
  212. versym_ = reinterpret_cast<ElfW(Versym) *>(value);
  213. break;
  214. case DT_VERDEF:
  215. verdef_ = reinterpret_cast<ElfW(Verdef) *>(value);
  216. break;
  217. case DT_VERDEFNUM:
  218. verdefnum_ = static_cast<size_t>(dynamic_entry->d_un.d_val);
  219. break;
  220. case DT_STRSZ:
  221. strsize_ = static_cast<size_t>(dynamic_entry->d_un.d_val);
  222. break;
  223. default:
  224. // Unrecognized entries explicitly ignored.
  225. break;
  226. }
  227. }
  228. if ((!sysv_hash && !gnu_hash) || !dynsym_ || !dynstr_ || !versym_ ||
  229. !verdef_ || !verdefnum_ || !strsize_) {
  230. assert(false); // invalid VDSO
  231. // Mark this image as not present. Can not recur infinitely.
  232. Init(nullptr);
  233. return;
  234. }
  235. if (sysv_hash) {
  236. num_syms_ = sysv_hash[1];
  237. } else {
  238. assert(gnu_hash);
  239. // Compute the number of symbols for DT_GNU_HASH, which is specified by
  240. // https://sourceware.org/gnu-gabi/program-loading-and-dynamic-linking.txt
  241. uint32_t nbuckets = gnu_hash[0];
  242. // The buckets array is located after the header (4 uint32) and the bloom
  243. // filter (size_t array of gnu_hash[2] elements).
  244. uint32_t *buckets = gnu_hash + 4 + sizeof(size_t) / 4 * gnu_hash[2];
  245. // Find the chain of the last non-empty bucket.
  246. uint32_t idx = 0;
  247. for (uint32_t i = nbuckets; i > 0;) {
  248. idx = buckets[--i];
  249. if (idx != 0) break;
  250. }
  251. if (idx != 0) {
  252. // Find the last element of the chain, which has an odd value.
  253. // Add one to get the number of symbols.
  254. uint32_t *chain = buckets + nbuckets - gnu_hash[1];
  255. while (chain[idx++] % 2 == 0) {
  256. }
  257. }
  258. num_syms_ = idx;
  259. }
  260. }
  261. bool ElfMemImage::LookupSymbol(const char *name,
  262. const char *version,
  263. int type,
  264. SymbolInfo *info_out) const {
  265. for (const SymbolInfo& info : *this) {
  266. if (strcmp(info.name, name) == 0 && strcmp(info.version, version) == 0 &&
  267. ElfType(info.symbol) == type) {
  268. if (info_out) {
  269. *info_out = info;
  270. }
  271. return true;
  272. }
  273. }
  274. return false;
  275. }
  276. bool ElfMemImage::LookupSymbolByAddress(const void *address,
  277. SymbolInfo *info_out) const {
  278. for (const SymbolInfo& info : *this) {
  279. const char *const symbol_start =
  280. reinterpret_cast<const char *>(info.address);
  281. const char *const symbol_end = symbol_start + info.symbol->st_size;
  282. if (symbol_start <= address && address < symbol_end) {
  283. if (info_out) {
  284. // Client wants to know details for that symbol (the usual case).
  285. if (ElfBind(info.symbol) == STB_GLOBAL) {
  286. // Strong symbol; just return it.
  287. *info_out = info;
  288. return true;
  289. } else {
  290. // Weak or local. Record it, but keep looking for a strong one.
  291. *info_out = info;
  292. }
  293. } else {
  294. // Client only cares if there is an overlapping symbol.
  295. return true;
  296. }
  297. }
  298. }
  299. return false;
  300. }
  301. ElfMemImage::SymbolIterator::SymbolIterator(const void *const image,
  302. uint32_t index)
  303. : index_(index), image_(image) {}
  304. const ElfMemImage::SymbolInfo *ElfMemImage::SymbolIterator::operator->() const {
  305. return &info_;
  306. }
  307. const ElfMemImage::SymbolInfo& ElfMemImage::SymbolIterator::operator*() const {
  308. return info_;
  309. }
  310. bool ElfMemImage::SymbolIterator::operator==(const SymbolIterator &rhs) const {
  311. return this->image_ == rhs.image_ && this->index_ == rhs.index_;
  312. }
  313. bool ElfMemImage::SymbolIterator::operator!=(const SymbolIterator &rhs) const {
  314. return !(*this == rhs);
  315. }
  316. ElfMemImage::SymbolIterator &ElfMemImage::SymbolIterator::operator++() {
  317. this->Update(1);
  318. return *this;
  319. }
  320. ElfMemImage::SymbolIterator ElfMemImage::begin() const {
  321. SymbolIterator it(this, 0);
  322. it.Update(0);
  323. return it;
  324. }
  325. ElfMemImage::SymbolIterator ElfMemImage::end() const {
  326. return SymbolIterator(this, GetNumSymbols());
  327. }
  328. void ElfMemImage::SymbolIterator::Update(uint32_t increment) {
  329. const ElfMemImage *image = reinterpret_cast<const ElfMemImage *>(image_);
  330. ABSL_RAW_CHECK(image->IsPresent() || increment == 0, "");
  331. if (!image->IsPresent()) {
  332. return;
  333. }
  334. index_ += increment;
  335. if (index_ >= image->GetNumSymbols()) {
  336. index_ = image->GetNumSymbols();
  337. return;
  338. }
  339. const ElfW(Sym) *symbol = image->GetDynsym(index_);
  340. const ElfW(Versym) *version_symbol = image->GetVersym(index_);
  341. ABSL_RAW_CHECK(symbol && version_symbol, "");
  342. const char *const symbol_name = image->GetDynstr(symbol->st_name);
  343. #if defined(__NetBSD__)
  344. const int version_index = version_symbol->vs_vers & VERSYM_VERSION;
  345. #else
  346. const ElfW(Versym) version_index = version_symbol[0] & VERSYM_VERSION;
  347. #endif
  348. const ElfW(Verdef) *version_definition = nullptr;
  349. const char *version_name = "";
  350. if (symbol->st_shndx == SHN_UNDEF) {
  351. // Undefined symbols reference DT_VERNEED, not DT_VERDEF, and
  352. // version_index could well be greater than verdefnum_, so calling
  353. // GetVerdef(version_index) may trigger assertion.
  354. } else {
  355. version_definition = image->GetVerdef(version_index);
  356. }
  357. if (version_definition) {
  358. // I am expecting 1 or 2 auxiliary entries: 1 for the version itself,
  359. // optional 2nd if the version has a parent.
  360. ABSL_RAW_CHECK(
  361. version_definition->vd_cnt == 1 || version_definition->vd_cnt == 2,
  362. "wrong number of entries");
  363. const ElfW(Verdaux) *version_aux = image->GetVerdefAux(version_definition);
  364. version_name = image->GetVerstr(version_aux->vda_name);
  365. }
  366. info_.name = symbol_name;
  367. info_.version = version_name;
  368. info_.address = image->GetSymAddr(symbol);
  369. info_.symbol = symbol;
  370. }
  371. } // namespace debugging_internal
  372. ABSL_NAMESPACE_END
  373. } // namespace absl
  374. #endif // ABSL_HAVE_ELF_MEM_IMAGE