X86IndirectThunks.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. //==- X86IndirectThunks.cpp - Construct indirect call/jump thunks for x86 --=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. ///
  10. /// Pass that injects an MI thunk that is used to lower indirect calls in a way
  11. /// that prevents speculation on some x86 processors and can be used to mitigate
  12. /// security vulnerabilities due to targeted speculative execution and side
  13. /// channels such as CVE-2017-5715.
  14. ///
  15. /// Currently supported thunks include:
  16. /// - Retpoline -- A RET-implemented trampoline that lowers indirect calls
  17. /// - LVI Thunk -- A CALL/JMP-implemented thunk that forces load serialization
  18. /// before making an indirect call/jump
  19. ///
  20. /// Note that the reason that this is implemented as a MachineFunctionPass and
  21. /// not a ModulePass is that ModulePasses at this point in the LLVM X86 pipeline
  22. /// serialize all transformations, which can consume lots of memory.
  23. ///
  24. /// TODO(chandlerc): All of this code could use better comments and
  25. /// documentation.
  26. ///
  27. //===----------------------------------------------------------------------===//
  28. #include "X86.h"
  29. #include "X86InstrBuilder.h"
  30. #include "X86Subtarget.h"
  31. #include "llvm/CodeGen/IndirectThunks.h"
  32. #include "llvm/CodeGen/MachineFunction.h"
  33. #include "llvm/CodeGen/MachineFunctionPass.h"
  34. #include "llvm/CodeGen/MachineInstrBuilder.h"
  35. #include "llvm/CodeGen/MachineModuleInfo.h"
  36. #include "llvm/CodeGen/Passes.h"
  37. #include "llvm/CodeGen/TargetPassConfig.h"
  38. #include "llvm/IR/IRBuilder.h"
  39. #include "llvm/IR/Instructions.h"
  40. #include "llvm/IR/Module.h"
  41. #include "llvm/Support/CommandLine.h"
  42. #include "llvm/Support/Debug.h"
  43. #include "llvm/Support/raw_ostream.h"
  44. #include "llvm/Target/TargetMachine.h"
  45. using namespace llvm;
  46. #define DEBUG_TYPE "x86-retpoline-thunks"
  47. static const char RetpolineNamePrefix[] = "__llvm_retpoline_";
  48. static const char R11RetpolineName[] = "__llvm_retpoline_r11";
  49. static const char EAXRetpolineName[] = "__llvm_retpoline_eax";
  50. static const char ECXRetpolineName[] = "__llvm_retpoline_ecx";
  51. static const char EDXRetpolineName[] = "__llvm_retpoline_edx";
  52. static const char EDIRetpolineName[] = "__llvm_retpoline_edi";
  53. static const char LVIThunkNamePrefix[] = "__llvm_lvi_thunk_";
  54. static const char R11LVIThunkName[] = "__llvm_lvi_thunk_r11";
  55. namespace {
  56. struct RetpolineThunkInserter : ThunkInserter<RetpolineThunkInserter> {
  57. const char *getThunkPrefix() { return RetpolineNamePrefix; }
  58. bool mayUseThunk(const MachineFunction &MF, bool InsertedThunks) {
  59. if (InsertedThunks)
  60. return false;
  61. const auto &STI = MF.getSubtarget<X86Subtarget>();
  62. return (STI.useRetpolineIndirectCalls() ||
  63. STI.useRetpolineIndirectBranches()) &&
  64. !STI.useRetpolineExternalThunk();
  65. }
  66. bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF);
  67. void populateThunk(MachineFunction &MF);
  68. };
  69. struct LVIThunkInserter : ThunkInserter<LVIThunkInserter> {
  70. const char *getThunkPrefix() { return LVIThunkNamePrefix; }
  71. bool mayUseThunk(const MachineFunction &MF, bool InsertedThunks) {
  72. if (InsertedThunks)
  73. return false;
  74. return MF.getSubtarget<X86Subtarget>().useLVIControlFlowIntegrity();
  75. }
  76. bool insertThunks(MachineModuleInfo &MMI, MachineFunction &MF) {
  77. createThunkFunction(MMI, R11LVIThunkName);
  78. return true;
  79. }
  80. void populateThunk(MachineFunction &MF) {
  81. assert (MF.size() == 1);
  82. MachineBasicBlock *Entry = &MF.front();
  83. Entry->clear();
  84. // This code mitigates LVI by replacing each indirect call/jump with a
  85. // direct call/jump to a thunk that looks like:
  86. // ```
  87. // lfence
  88. // jmpq *%r11
  89. // ```
  90. // This ensures that if the value in register %r11 was loaded from memory,
  91. // then the value in %r11 is (architecturally) correct prior to the jump.
  92. const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
  93. BuildMI(&MF.front(), DebugLoc(), TII->get(X86::LFENCE));
  94. BuildMI(&MF.front(), DebugLoc(), TII->get(X86::JMP64r)).addReg(X86::R11);
  95. MF.front().addLiveIn(X86::R11);
  96. }
  97. };
  98. class X86IndirectThunks : public MachineFunctionPass {
  99. public:
  100. static char ID;
  101. X86IndirectThunks() : MachineFunctionPass(ID) {}
  102. StringRef getPassName() const override { return "X86 Indirect Thunks"; }
  103. bool doInitialization(Module &M) override;
  104. bool runOnMachineFunction(MachineFunction &MF) override;
  105. private:
  106. std::tuple<RetpolineThunkInserter, LVIThunkInserter> TIs;
  107. // FIXME: When LLVM moves to C++17, these can become folds
  108. template <typename... ThunkInserterT>
  109. static void initTIs(Module &M,
  110. std::tuple<ThunkInserterT...> &ThunkInserters) {
  111. (void)std::initializer_list<int>{
  112. (std::get<ThunkInserterT>(ThunkInserters).init(M), 0)...};
  113. }
  114. template <typename... ThunkInserterT>
  115. static bool runTIs(MachineModuleInfo &MMI, MachineFunction &MF,
  116. std::tuple<ThunkInserterT...> &ThunkInserters) {
  117. bool Modified = false;
  118. (void)std::initializer_list<int>{
  119. Modified |= std::get<ThunkInserterT>(ThunkInserters).run(MMI, MF)...};
  120. return Modified;
  121. }
  122. };
  123. } // end anonymous namespace
  124. bool RetpolineThunkInserter::insertThunks(MachineModuleInfo &MMI,
  125. MachineFunction &MF) {
  126. if (MMI.getTarget().getTargetTriple().getArch() == Triple::x86_64)
  127. createThunkFunction(MMI, R11RetpolineName);
  128. else
  129. for (StringRef Name : {EAXRetpolineName, ECXRetpolineName, EDXRetpolineName,
  130. EDIRetpolineName})
  131. createThunkFunction(MMI, Name);
  132. return true;
  133. }
  134. void RetpolineThunkInserter::populateThunk(MachineFunction &MF) {
  135. bool Is64Bit = MF.getTarget().getTargetTriple().getArch() == Triple::x86_64;
  136. Register ThunkReg;
  137. if (Is64Bit) {
  138. assert(MF.getName() == "__llvm_retpoline_r11" &&
  139. "Should only have an r11 thunk on 64-bit targets");
  140. // __llvm_retpoline_r11:
  141. // callq .Lr11_call_target
  142. // .Lr11_capture_spec:
  143. // pause
  144. // lfence
  145. // jmp .Lr11_capture_spec
  146. // .align 16
  147. // .Lr11_call_target:
  148. // movq %r11, (%rsp)
  149. // retq
  150. ThunkReg = X86::R11;
  151. } else {
  152. // For 32-bit targets we need to emit a collection of thunks for various
  153. // possible scratch registers as well as a fallback that uses EDI, which is
  154. // normally callee saved.
  155. // __llvm_retpoline_eax:
  156. // calll .Leax_call_target
  157. // .Leax_capture_spec:
  158. // pause
  159. // jmp .Leax_capture_spec
  160. // .align 16
  161. // .Leax_call_target:
  162. // movl %eax, (%esp) # Clobber return addr
  163. // retl
  164. //
  165. // __llvm_retpoline_ecx:
  166. // ... # Same setup
  167. // movl %ecx, (%esp)
  168. // retl
  169. //
  170. // __llvm_retpoline_edx:
  171. // ... # Same setup
  172. // movl %edx, (%esp)
  173. // retl
  174. //
  175. // __llvm_retpoline_edi:
  176. // ... # Same setup
  177. // movl %edi, (%esp)
  178. // retl
  179. if (MF.getName() == EAXRetpolineName)
  180. ThunkReg = X86::EAX;
  181. else if (MF.getName() == ECXRetpolineName)
  182. ThunkReg = X86::ECX;
  183. else if (MF.getName() == EDXRetpolineName)
  184. ThunkReg = X86::EDX;
  185. else if (MF.getName() == EDIRetpolineName)
  186. ThunkReg = X86::EDI;
  187. else
  188. llvm_unreachable("Invalid thunk name on x86-32!");
  189. }
  190. const TargetInstrInfo *TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
  191. assert (MF.size() == 1);
  192. MachineBasicBlock *Entry = &MF.front();
  193. Entry->clear();
  194. MachineBasicBlock *CaptureSpec =
  195. MF.CreateMachineBasicBlock(Entry->getBasicBlock());
  196. MachineBasicBlock *CallTarget =
  197. MF.CreateMachineBasicBlock(Entry->getBasicBlock());
  198. MCSymbol *TargetSym = MF.getContext().createTempSymbol();
  199. MF.push_back(CaptureSpec);
  200. MF.push_back(CallTarget);
  201. const unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
  202. const unsigned RetOpc = Is64Bit ? X86::RET64 : X86::RET32;
  203. Entry->addLiveIn(ThunkReg);
  204. BuildMI(Entry, DebugLoc(), TII->get(CallOpc)).addSym(TargetSym);
  205. // The MIR verifier thinks that the CALL in the entry block will fall through
  206. // to CaptureSpec, so mark it as the successor. Technically, CaptureTarget is
  207. // the successor, but the MIR verifier doesn't know how to cope with that.
  208. Entry->addSuccessor(CaptureSpec);
  209. // In the capture loop for speculation, we want to stop the processor from
  210. // speculating as fast as possible. On Intel processors, the PAUSE instruction
  211. // will block speculation without consuming any execution resources. On AMD
  212. // processors, the PAUSE instruction is (essentially) a nop, so we also use an
  213. // LFENCE instruction which they have advised will stop speculation as well
  214. // with minimal resource utilization. We still end the capture with a jump to
  215. // form an infinite loop to fully guarantee that no matter what implementation
  216. // of the x86 ISA, speculating this code path never escapes.
  217. BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::PAUSE));
  218. BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::LFENCE));
  219. BuildMI(CaptureSpec, DebugLoc(), TII->get(X86::JMP_1)).addMBB(CaptureSpec);
  220. CaptureSpec->setMachineBlockAddressTaken();
  221. CaptureSpec->addSuccessor(CaptureSpec);
  222. CallTarget->addLiveIn(ThunkReg);
  223. CallTarget->setMachineBlockAddressTaken();
  224. CallTarget->setAlignment(Align(16));
  225. // Insert return address clobber
  226. const unsigned MovOpc = Is64Bit ? X86::MOV64mr : X86::MOV32mr;
  227. const Register SPReg = Is64Bit ? X86::RSP : X86::ESP;
  228. addRegOffset(BuildMI(CallTarget, DebugLoc(), TII->get(MovOpc)), SPReg, false,
  229. 0)
  230. .addReg(ThunkReg);
  231. CallTarget->back().setPreInstrSymbol(MF, TargetSym);
  232. BuildMI(CallTarget, DebugLoc(), TII->get(RetOpc));
  233. }
  234. FunctionPass *llvm::createX86IndirectThunksPass() {
  235. return new X86IndirectThunks();
  236. }
  237. char X86IndirectThunks::ID = 0;
  238. bool X86IndirectThunks::doInitialization(Module &M) {
  239. initTIs(M, TIs);
  240. return false;
  241. }
  242. bool X86IndirectThunks::runOnMachineFunction(MachineFunction &MF) {
  243. LLVM_DEBUG(dbgs() << getPassName() << '\n');
  244. auto &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
  245. return runTIs(MMI, MF, TIs);
  246. }