sanitizer_procmaps_mac.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. //===-- sanitizer_procmaps_mac.cpp ----------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Information about the process mappings (Mac-specific parts).
  10. //===----------------------------------------------------------------------===//
  11. #include "sanitizer_platform.h"
  12. #if SANITIZER_MAC
  13. #include "sanitizer_common.h"
  14. #include "sanitizer_placement_new.h"
  15. #include "sanitizer_procmaps.h"
  16. #include <mach-o/dyld.h>
  17. #include <mach-o/loader.h>
  18. #include <mach/mach.h>
  19. // These are not available in older macOS SDKs.
  20. #ifndef CPU_SUBTYPE_X86_64_H
  21. #define CPU_SUBTYPE_X86_64_H ((cpu_subtype_t)8) /* Haswell */
  22. #endif
  23. #ifndef CPU_SUBTYPE_ARM_V7S
  24. #define CPU_SUBTYPE_ARM_V7S ((cpu_subtype_t)11) /* Swift */
  25. #endif
  26. #ifndef CPU_SUBTYPE_ARM_V7K
  27. #define CPU_SUBTYPE_ARM_V7K ((cpu_subtype_t)12)
  28. #endif
  29. #ifndef CPU_TYPE_ARM64
  30. #define CPU_TYPE_ARM64 (CPU_TYPE_ARM | CPU_ARCH_ABI64)
  31. #endif
  32. namespace __sanitizer {
  33. // Contains information used to iterate through sections.
  34. struct MemoryMappedSegmentData {
  35. char name[kMaxSegName];
  36. uptr nsects;
  37. const char *current_load_cmd_addr;
  38. u32 lc_type;
  39. uptr base_virt_addr;
  40. uptr addr_mask;
  41. };
  42. template <typename Section>
  43. static void NextSectionLoad(LoadedModule *module, MemoryMappedSegmentData *data,
  44. bool isWritable) {
  45. const Section *sc = (const Section *)data->current_load_cmd_addr;
  46. data->current_load_cmd_addr += sizeof(Section);
  47. uptr sec_start = (sc->addr & data->addr_mask) + data->base_virt_addr;
  48. uptr sec_end = sec_start + sc->size;
  49. module->addAddressRange(sec_start, sec_end, /*executable=*/false, isWritable,
  50. sc->sectname);
  51. }
  52. void MemoryMappedSegment::AddAddressRanges(LoadedModule *module) {
  53. // Don't iterate over sections when the caller hasn't set up the
  54. // data pointer, when there are no sections, or when the segment
  55. // is executable. Avoid iterating over executable sections because
  56. // it will confuse libignore, and because the extra granularity
  57. // of information is not needed by any sanitizers.
  58. if (!data_ || !data_->nsects || IsExecutable()) {
  59. module->addAddressRange(start, end, IsExecutable(), IsWritable(),
  60. data_ ? data_->name : nullptr);
  61. return;
  62. }
  63. do {
  64. if (data_->lc_type == LC_SEGMENT) {
  65. NextSectionLoad<struct section>(module, data_, IsWritable());
  66. #ifdef MH_MAGIC_64
  67. } else if (data_->lc_type == LC_SEGMENT_64) {
  68. NextSectionLoad<struct section_64>(module, data_, IsWritable());
  69. #endif
  70. }
  71. } while (--data_->nsects);
  72. }
  73. MemoryMappingLayout::MemoryMappingLayout(bool cache_enabled) {
  74. Reset();
  75. }
  76. MemoryMappingLayout::~MemoryMappingLayout() {
  77. }
  78. bool MemoryMappingLayout::Error() const {
  79. return false;
  80. }
  81. // More information about Mach-O headers can be found in mach-o/loader.h
  82. // Each Mach-O image has a header (mach_header or mach_header_64) starting with
  83. // a magic number, and a list of linker load commands directly following the
  84. // header.
  85. // A load command is at least two 32-bit words: the command type and the
  86. // command size in bytes. We're interested only in segment load commands
  87. // (LC_SEGMENT and LC_SEGMENT_64), which tell that a part of the file is mapped
  88. // into the task's address space.
  89. // The |vmaddr|, |vmsize| and |fileoff| fields of segment_command or
  90. // segment_command_64 correspond to the memory address, memory size and the
  91. // file offset of the current memory segment.
  92. // Because these fields are taken from the images as is, one needs to add
  93. // _dyld_get_image_vmaddr_slide() to get the actual addresses at runtime.
  94. void MemoryMappingLayout::Reset() {
  95. // Count down from the top.
  96. // TODO(glider): as per man 3 dyld, iterating over the headers with
  97. // _dyld_image_count is thread-unsafe. We need to register callbacks for
  98. // adding and removing images which will invalidate the MemoryMappingLayout
  99. // state.
  100. data_.current_image = _dyld_image_count();
  101. data_.current_load_cmd_count = -1;
  102. data_.current_load_cmd_addr = 0;
  103. data_.current_magic = 0;
  104. data_.current_filetype = 0;
  105. data_.current_arch = kModuleArchUnknown;
  106. internal_memset(data_.current_uuid, 0, kModuleUUIDSize);
  107. }
  108. // The dyld load address should be unchanged throughout process execution,
  109. // and it is expensive to compute once many libraries have been loaded,
  110. // so cache it here and do not reset.
  111. static mach_header *dyld_hdr = 0;
  112. static const char kDyldPath[] = "/usr/lib/dyld";
  113. static const int kDyldImageIdx = -1;
  114. // static
  115. void MemoryMappingLayout::CacheMemoryMappings() {
  116. // No-op on Mac for now.
  117. }
  118. void MemoryMappingLayout::LoadFromCache() {
  119. // No-op on Mac for now.
  120. }
  121. // _dyld_get_image_header() and related APIs don't report dyld itself.
  122. // We work around this by manually recursing through the memory map
  123. // until we hit a Mach header matching dyld instead. These recurse
  124. // calls are expensive, but the first memory map generation occurs
  125. // early in the process, when dyld is one of the only images loaded,
  126. // so it will be hit after only a few iterations.
  127. static mach_header *get_dyld_image_header() {
  128. vm_address_t address = 0;
  129. while (true) {
  130. vm_size_t size = 0;
  131. unsigned depth = 1;
  132. struct vm_region_submap_info_64 info;
  133. mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
  134. kern_return_t err =
  135. vm_region_recurse_64(mach_task_self(), &address, &size, &depth,
  136. (vm_region_info_t)&info, &count);
  137. if (err != KERN_SUCCESS) return nullptr;
  138. if (size >= sizeof(mach_header) && info.protection & kProtectionRead) {
  139. mach_header *hdr = (mach_header *)address;
  140. if ((hdr->magic == MH_MAGIC || hdr->magic == MH_MAGIC_64) &&
  141. hdr->filetype == MH_DYLINKER) {
  142. return hdr;
  143. }
  144. }
  145. address += size;
  146. }
  147. }
  148. const mach_header *get_dyld_hdr() {
  149. if (!dyld_hdr) dyld_hdr = get_dyld_image_header();
  150. return dyld_hdr;
  151. }
  152. // Next and NextSegmentLoad were inspired by base/sysinfo.cc in
  153. // Google Perftools, https://github.com/gperftools/gperftools.
  154. // NextSegmentLoad scans the current image for the next segment load command
  155. // and returns the start and end addresses and file offset of the corresponding
  156. // segment.
  157. // Note that the segment addresses are not necessarily sorted.
  158. template <u32 kLCSegment, typename SegmentCommand>
  159. static bool NextSegmentLoad(MemoryMappedSegment *segment,
  160. MemoryMappedSegmentData *seg_data,
  161. MemoryMappingLayoutData *layout_data) {
  162. const char *lc = layout_data->current_load_cmd_addr;
  163. layout_data->current_load_cmd_addr += ((const load_command *)lc)->cmdsize;
  164. if (((const load_command *)lc)->cmd == kLCSegment) {
  165. const SegmentCommand* sc = (const SegmentCommand *)lc;
  166. uptr base_virt_addr, addr_mask;
  167. if (layout_data->current_image == kDyldImageIdx) {
  168. base_virt_addr = (uptr)get_dyld_hdr();
  169. // vmaddr is masked with 0xfffff because on macOS versions < 10.12,
  170. // it contains an absolute address rather than an offset for dyld.
  171. // To make matters even more complicated, this absolute address
  172. // isn't actually the absolute segment address, but the offset portion
  173. // of the address is accurate when combined with the dyld base address,
  174. // and the mask will give just this offset.
  175. addr_mask = 0xfffff;
  176. } else {
  177. base_virt_addr =
  178. (uptr)_dyld_get_image_vmaddr_slide(layout_data->current_image);
  179. addr_mask = ~0;
  180. }
  181. segment->start = (sc->vmaddr & addr_mask) + base_virt_addr;
  182. segment->end = segment->start + sc->vmsize;
  183. // Most callers don't need section information, so only fill this struct
  184. // when required.
  185. if (seg_data) {
  186. seg_data->nsects = sc->nsects;
  187. seg_data->current_load_cmd_addr =
  188. (const char *)lc + sizeof(SegmentCommand);
  189. seg_data->lc_type = kLCSegment;
  190. seg_data->base_virt_addr = base_virt_addr;
  191. seg_data->addr_mask = addr_mask;
  192. internal_strncpy(seg_data->name, sc->segname,
  193. ARRAY_SIZE(seg_data->name));
  194. }
  195. // Return the initial protection.
  196. segment->protection = sc->initprot;
  197. segment->offset = (layout_data->current_filetype ==
  198. /*MH_EXECUTE*/ 0x2)
  199. ? sc->vmaddr
  200. : sc->fileoff;
  201. if (segment->filename) {
  202. const char *src = (layout_data->current_image == kDyldImageIdx)
  203. ? kDyldPath
  204. : _dyld_get_image_name(layout_data->current_image);
  205. internal_strncpy(segment->filename, src, segment->filename_size);
  206. }
  207. segment->arch = layout_data->current_arch;
  208. internal_memcpy(segment->uuid, layout_data->current_uuid, kModuleUUIDSize);
  209. return true;
  210. }
  211. return false;
  212. }
  213. ModuleArch ModuleArchFromCpuType(cpu_type_t cputype, cpu_subtype_t cpusubtype) {
  214. cpusubtype = cpusubtype & ~CPU_SUBTYPE_MASK;
  215. switch (cputype) {
  216. case CPU_TYPE_I386:
  217. return kModuleArchI386;
  218. case CPU_TYPE_X86_64:
  219. if (cpusubtype == CPU_SUBTYPE_X86_64_ALL) return kModuleArchX86_64;
  220. if (cpusubtype == CPU_SUBTYPE_X86_64_H) return kModuleArchX86_64H;
  221. CHECK(0 && "Invalid subtype of x86_64");
  222. return kModuleArchUnknown;
  223. case CPU_TYPE_ARM:
  224. if (cpusubtype == CPU_SUBTYPE_ARM_V6) return kModuleArchARMV6;
  225. if (cpusubtype == CPU_SUBTYPE_ARM_V7) return kModuleArchARMV7;
  226. if (cpusubtype == CPU_SUBTYPE_ARM_V7S) return kModuleArchARMV7S;
  227. if (cpusubtype == CPU_SUBTYPE_ARM_V7K) return kModuleArchARMV7K;
  228. CHECK(0 && "Invalid subtype of ARM");
  229. return kModuleArchUnknown;
  230. case CPU_TYPE_ARM64:
  231. return kModuleArchARM64;
  232. default:
  233. CHECK(0 && "Invalid CPU type");
  234. return kModuleArchUnknown;
  235. }
  236. }
  237. static const load_command *NextCommand(const load_command *lc) {
  238. return (const load_command *)((const char *)lc + lc->cmdsize);
  239. }
  240. static void FindUUID(const load_command *first_lc, u8 *uuid_output) {
  241. for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) {
  242. if (lc->cmd != LC_UUID) continue;
  243. const uuid_command *uuid_lc = (const uuid_command *)lc;
  244. const uint8_t *uuid = &uuid_lc->uuid[0];
  245. internal_memcpy(uuid_output, uuid, kModuleUUIDSize);
  246. return;
  247. }
  248. }
  249. static bool IsModuleInstrumented(const load_command *first_lc) {
  250. for (const load_command *lc = first_lc; lc->cmd != 0; lc = NextCommand(lc)) {
  251. if (lc->cmd != LC_LOAD_DYLIB) continue;
  252. const dylib_command *dylib_lc = (const dylib_command *)lc;
  253. uint32_t dylib_name_offset = dylib_lc->dylib.name.offset;
  254. const char *dylib_name = ((const char *)dylib_lc) + dylib_name_offset;
  255. dylib_name = StripModuleName(dylib_name);
  256. if (dylib_name != 0 && (internal_strstr(dylib_name, "libclang_rt."))) {
  257. return true;
  258. }
  259. }
  260. return false;
  261. }
  262. bool MemoryMappingLayout::Next(MemoryMappedSegment *segment) {
  263. for (; data_.current_image >= kDyldImageIdx; data_.current_image--) {
  264. const mach_header *hdr = (data_.current_image == kDyldImageIdx)
  265. ? get_dyld_hdr()
  266. : _dyld_get_image_header(data_.current_image);
  267. if (!hdr) continue;
  268. if (data_.current_load_cmd_count < 0) {
  269. // Set up for this image;
  270. data_.current_load_cmd_count = hdr->ncmds;
  271. data_.current_magic = hdr->magic;
  272. data_.current_filetype = hdr->filetype;
  273. data_.current_arch = ModuleArchFromCpuType(hdr->cputype, hdr->cpusubtype);
  274. switch (data_.current_magic) {
  275. #ifdef MH_MAGIC_64
  276. case MH_MAGIC_64: {
  277. data_.current_load_cmd_addr =
  278. (const char *)hdr + sizeof(mach_header_64);
  279. break;
  280. }
  281. #endif
  282. case MH_MAGIC: {
  283. data_.current_load_cmd_addr = (const char *)hdr + sizeof(mach_header);
  284. break;
  285. }
  286. default: {
  287. continue;
  288. }
  289. }
  290. FindUUID((const load_command *)data_.current_load_cmd_addr,
  291. data_.current_uuid);
  292. data_.current_instrumented = IsModuleInstrumented(
  293. (const load_command *)data_.current_load_cmd_addr);
  294. }
  295. for (; data_.current_load_cmd_count >= 0; data_.current_load_cmd_count--) {
  296. switch (data_.current_magic) {
  297. // data_.current_magic may be only one of MH_MAGIC, MH_MAGIC_64.
  298. #ifdef MH_MAGIC_64
  299. case MH_MAGIC_64: {
  300. if (NextSegmentLoad<LC_SEGMENT_64, struct segment_command_64>(
  301. segment, segment->data_, &data_))
  302. return true;
  303. break;
  304. }
  305. #endif
  306. case MH_MAGIC: {
  307. if (NextSegmentLoad<LC_SEGMENT, struct segment_command>(
  308. segment, segment->data_, &data_))
  309. return true;
  310. break;
  311. }
  312. }
  313. }
  314. // If we get here, no more load_cmd's in this image talk about
  315. // segments. Go on to the next image.
  316. }
  317. return false;
  318. }
  319. void MemoryMappingLayout::DumpListOfModules(
  320. InternalMmapVectorNoCtor<LoadedModule> *modules) {
  321. Reset();
  322. InternalMmapVector<char> module_name(kMaxPathLength);
  323. MemoryMappedSegment segment(module_name.data(), module_name.size());
  324. MemoryMappedSegmentData data;
  325. segment.data_ = &data;
  326. while (Next(&segment)) {
  327. if (segment.filename[0] == '\0') continue;
  328. LoadedModule *cur_module = nullptr;
  329. if (!modules->empty() &&
  330. 0 == internal_strcmp(segment.filename, modules->back().full_name())) {
  331. cur_module = &modules->back();
  332. } else {
  333. modules->push_back(LoadedModule());
  334. cur_module = &modules->back();
  335. cur_module->set(segment.filename, segment.start, segment.arch,
  336. segment.uuid, data_.current_instrumented);
  337. }
  338. segment.AddAddressRanges(cur_module);
  339. }
  340. }
  341. } // namespace __sanitizer
  342. #endif // SANITIZER_MAC