AArch64SpeculationHardening.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702
  1. //===- AArch64SpeculationHardening.cpp - Harden Against Missspeculation --===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass to insert code to mitigate against side channel
  10. // vulnerabilities that may happen under control flow miss-speculation.
  11. //
  12. // The pass implements tracking of control flow miss-speculation into a "taint"
  13. // register. That taint register can then be used to mask off registers with
  14. // sensitive data when executing under miss-speculation, a.k.a. "transient
  15. // execution".
  16. // This pass is aimed at mitigating against SpectreV1-style vulnarabilities.
  17. //
  18. // It also implements speculative load hardening, i.e. using the taint register
  19. // to automatically mask off loaded data.
  20. //
  21. // As a possible follow-on improvement, also an intrinsics-based approach as
  22. // explained at https://lwn.net/Articles/759423/ could be implemented on top of
  23. // the current design.
  24. //
  25. // For AArch64, the following implementation choices are made to implement the
  26. // tracking of control flow miss-speculation into a taint register:
  27. // Some of these are different than the implementation choices made in
  28. // the similar pass implemented in X86SpeculativeLoadHardening.cpp, as
  29. // the instruction set characteristics result in different trade-offs.
  30. // - The speculation hardening is done after register allocation. With a
  31. // relative abundance of registers, one register is reserved (X16) to be
  32. // the taint register. X16 is expected to not clash with other register
  33. // reservation mechanisms with very high probability because:
  34. // . The AArch64 ABI doesn't guarantee X16 to be retained across any call.
  35. // . The only way to request X16 to be used as a programmer is through
  36. // inline assembly. In the rare case a function explicitly demands to
  37. // use X16/W16, this pass falls back to hardening against speculation
  38. // by inserting a DSB SYS/ISB barrier pair which will prevent control
  39. // flow speculation.
  40. // - It is easy to insert mask operations at this late stage as we have
  41. // mask operations available that don't set flags.
  42. // - The taint variable contains all-ones when no miss-speculation is detected,
  43. // and contains all-zeros when miss-speculation is detected. Therefore, when
  44. // masking, an AND instruction (which only changes the register to be masked,
  45. // no other side effects) can easily be inserted anywhere that's needed.
  46. // - The tracking of miss-speculation is done by using a data-flow conditional
  47. // select instruction (CSEL) to evaluate the flags that were also used to
  48. // make conditional branch direction decisions. Speculation of the CSEL
  49. // instruction can be limited with a CSDB instruction - so the combination of
  50. // CSEL + a later CSDB gives the guarantee that the flags as used in the CSEL
  51. // aren't speculated. When conditional branch direction gets miss-speculated,
  52. // the semantics of the inserted CSEL instruction is such that the taint
  53. // register will contain all zero bits.
  54. // One key requirement for this to work is that the conditional branch is
  55. // followed by an execution of the CSEL instruction, where the CSEL
  56. // instruction needs to use the same flags status as the conditional branch.
  57. // This means that the conditional branches must not be implemented as one
  58. // of the AArch64 conditional branches that do not use the flags as input
  59. // (CB(N)Z and TB(N)Z). This is implemented by ensuring in the instruction
  60. // selectors to not produce these instructions when speculation hardening
  61. // is enabled. This pass will assert if it does encounter such an instruction.
  62. // - On function call boundaries, the miss-speculation state is transferred from
  63. // the taint register X16 to be encoded in the SP register as value 0.
  64. //
  65. // For the aspect of automatically hardening loads, using the taint register,
  66. // (a.k.a. speculative load hardening, see
  67. // https://llvm.org/docs/SpeculativeLoadHardening.html), the following
  68. // implementation choices are made for AArch64:
  69. // - Many of the optimizations described at
  70. // https://llvm.org/docs/SpeculativeLoadHardening.html to harden fewer
  71. // loads haven't been implemented yet - but for some of them there are
  72. // FIXMEs in the code.
  73. // - loads that load into general purpose (X or W) registers get hardened by
  74. // masking the loaded data. For loads that load into other registers, the
  75. // address loaded from gets hardened. It is expected that hardening the
  76. // loaded data may be more efficient; but masking data in registers other
  77. // than X or W is not easy and may result in being slower than just
  78. // hardening the X address register loaded from.
  79. // - On AArch64, CSDB instructions are inserted between the masking of the
  80. // register and its first use, to ensure there's no non-control-flow
  81. // speculation that might undermine the hardening mechanism.
  82. //
  83. // Future extensions/improvements could be:
  84. // - Implement this functionality using full speculation barriers, akin to the
  85. // x86-slh-lfence option. This may be more useful for the intrinsics-based
  86. // approach than for the SLH approach to masking.
  87. // Note that this pass already inserts the full speculation barriers if the
  88. // function for some niche reason makes use of X16/W16.
  89. // - no indirect branch misprediction gets protected/instrumented; but this
  90. // could be done for some indirect branches, such as switch jump tables.
  91. //===----------------------------------------------------------------------===//
  92. #include "AArch64InstrInfo.h"
  93. #include "AArch64Subtarget.h"
  94. #include "Utils/AArch64BaseInfo.h"
  95. #include "llvm/ADT/BitVector.h"
  96. #include "llvm/ADT/SmallVector.h"
  97. #include "llvm/CodeGen/MachineBasicBlock.h"
  98. #include "llvm/CodeGen/MachineFunction.h"
  99. #include "llvm/CodeGen/MachineFunctionPass.h"
  100. #include "llvm/CodeGen/MachineInstr.h"
  101. #include "llvm/CodeGen/MachineInstrBuilder.h"
  102. #include "llvm/CodeGen/MachineOperand.h"
  103. #include "llvm/CodeGen/MachineRegisterInfo.h"
  104. #include "llvm/CodeGen/RegisterScavenging.h"
  105. #include "llvm/IR/DebugLoc.h"
  106. #include "llvm/Pass.h"
  107. #include "llvm/Support/CodeGen.h"
  108. #include "llvm/Support/Debug.h"
  109. #include "llvm/Target/TargetMachine.h"
  110. #include <cassert>
  111. using namespace llvm;
  112. #define DEBUG_TYPE "aarch64-speculation-hardening"
  113. #define AARCH64_SPECULATION_HARDENING_NAME "AArch64 speculation hardening pass"
  114. static cl::opt<bool> HardenLoads("aarch64-slh-loads", cl::Hidden,
  115. cl::desc("Sanitize loads from memory."),
  116. cl::init(true));
  117. namespace {
  118. class AArch64SpeculationHardening : public MachineFunctionPass {
  119. public:
  120. const TargetInstrInfo *TII;
  121. const TargetRegisterInfo *TRI;
  122. static char ID;
  123. AArch64SpeculationHardening() : MachineFunctionPass(ID) {
  124. initializeAArch64SpeculationHardeningPass(*PassRegistry::getPassRegistry());
  125. }
  126. bool runOnMachineFunction(MachineFunction &Fn) override;
  127. StringRef getPassName() const override {
  128. return AARCH64_SPECULATION_HARDENING_NAME;
  129. }
  130. private:
  131. unsigned MisspeculatingTaintReg;
  132. unsigned MisspeculatingTaintReg32Bit;
  133. bool UseControlFlowSpeculationBarrier;
  134. BitVector RegsNeedingCSDBBeforeUse;
  135. BitVector RegsAlreadyMasked;
  136. bool functionUsesHardeningRegister(MachineFunction &MF) const;
  137. bool instrumentControlFlow(MachineBasicBlock &MBB,
  138. bool &UsesFullSpeculationBarrier);
  139. bool endsWithCondControlFlow(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
  140. MachineBasicBlock *&FBB,
  141. AArch64CC::CondCode &CondCode) const;
  142. void insertTrackingCode(MachineBasicBlock &SplitEdgeBB,
  143. AArch64CC::CondCode &CondCode, DebugLoc DL) const;
  144. void insertSPToRegTaintPropagation(MachineBasicBlock &MBB,
  145. MachineBasicBlock::iterator MBBI) const;
  146. void insertRegToSPTaintPropagation(MachineBasicBlock &MBB,
  147. MachineBasicBlock::iterator MBBI,
  148. unsigned TmpReg) const;
  149. void insertFullSpeculationBarrier(MachineBasicBlock &MBB,
  150. MachineBasicBlock::iterator MBBI,
  151. DebugLoc DL) const;
  152. bool slhLoads(MachineBasicBlock &MBB);
  153. bool makeGPRSpeculationSafe(MachineBasicBlock &MBB,
  154. MachineBasicBlock::iterator MBBI,
  155. MachineInstr &MI, unsigned Reg);
  156. bool lowerSpeculationSafeValuePseudos(MachineBasicBlock &MBB,
  157. bool UsesFullSpeculationBarrier);
  158. bool expandSpeculationSafeValue(MachineBasicBlock &MBB,
  159. MachineBasicBlock::iterator MBBI,
  160. bool UsesFullSpeculationBarrier);
  161. bool insertCSDB(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  162. DebugLoc DL);
  163. };
  164. } // end anonymous namespace
  165. char AArch64SpeculationHardening::ID = 0;
  166. INITIALIZE_PASS(AArch64SpeculationHardening, "aarch64-speculation-hardening",
  167. AARCH64_SPECULATION_HARDENING_NAME, false, false)
  168. bool AArch64SpeculationHardening::endsWithCondControlFlow(
  169. MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
  170. AArch64CC::CondCode &CondCode) const {
  171. SmallVector<MachineOperand, 1> analyzeBranchCondCode;
  172. if (TII->analyzeBranch(MBB, TBB, FBB, analyzeBranchCondCode, false))
  173. return false;
  174. // Ignore if the BB ends in an unconditional branch/fall-through.
  175. if (analyzeBranchCondCode.empty())
  176. return false;
  177. // If the BB ends with a single conditional branch, FBB will be set to
  178. // nullptr (see API docs for TII->analyzeBranch). For the rest of the
  179. // analysis we want the FBB block to be set always.
  180. assert(TBB != nullptr);
  181. if (FBB == nullptr)
  182. FBB = MBB.getFallThrough();
  183. // If both the true and the false condition jump to the same basic block,
  184. // there isn't need for any protection - whether the branch is speculated
  185. // correctly or not, we end up executing the architecturally correct code.
  186. if (TBB == FBB)
  187. return false;
  188. assert(MBB.succ_size() == 2);
  189. // translate analyzeBranchCondCode to CondCode.
  190. assert(analyzeBranchCondCode.size() == 1 && "unknown Cond array format");
  191. CondCode = AArch64CC::CondCode(analyzeBranchCondCode[0].getImm());
  192. return true;
  193. }
  194. void AArch64SpeculationHardening::insertFullSpeculationBarrier(
  195. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  196. DebugLoc DL) const {
  197. // A full control flow speculation barrier consists of (DSB SYS + ISB)
  198. BuildMI(MBB, MBBI, DL, TII->get(AArch64::DSB)).addImm(0xf);
  199. BuildMI(MBB, MBBI, DL, TII->get(AArch64::ISB)).addImm(0xf);
  200. }
  201. void AArch64SpeculationHardening::insertTrackingCode(
  202. MachineBasicBlock &SplitEdgeBB, AArch64CC::CondCode &CondCode,
  203. DebugLoc DL) const {
  204. if (UseControlFlowSpeculationBarrier) {
  205. insertFullSpeculationBarrier(SplitEdgeBB, SplitEdgeBB.begin(), DL);
  206. } else {
  207. BuildMI(SplitEdgeBB, SplitEdgeBB.begin(), DL, TII->get(AArch64::CSELXr))
  208. .addDef(MisspeculatingTaintReg)
  209. .addUse(MisspeculatingTaintReg)
  210. .addUse(AArch64::XZR)
  211. .addImm(CondCode);
  212. SplitEdgeBB.addLiveIn(AArch64::NZCV);
  213. }
  214. }
  215. bool AArch64SpeculationHardening::instrumentControlFlow(
  216. MachineBasicBlock &MBB, bool &UsesFullSpeculationBarrier) {
  217. LLVM_DEBUG(dbgs() << "Instrument control flow tracking on MBB: " << MBB);
  218. bool Modified = false;
  219. MachineBasicBlock *TBB = nullptr;
  220. MachineBasicBlock *FBB = nullptr;
  221. AArch64CC::CondCode CondCode;
  222. if (!endsWithCondControlFlow(MBB, TBB, FBB, CondCode)) {
  223. LLVM_DEBUG(dbgs() << "... doesn't end with CondControlFlow\n");
  224. } else {
  225. // Now insert:
  226. // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, cond" on the True edge and
  227. // "CSEL MisSpeculatingR, MisSpeculatingR, XZR, Invertcond" on the False
  228. // edge.
  229. AArch64CC::CondCode InvCondCode = AArch64CC::getInvertedCondCode(CondCode);
  230. MachineBasicBlock *SplitEdgeTBB = MBB.SplitCriticalEdge(TBB, *this);
  231. MachineBasicBlock *SplitEdgeFBB = MBB.SplitCriticalEdge(FBB, *this);
  232. assert(SplitEdgeTBB != nullptr);
  233. assert(SplitEdgeFBB != nullptr);
  234. DebugLoc DL;
  235. if (MBB.instr_end() != MBB.instr_begin())
  236. DL = (--MBB.instr_end())->getDebugLoc();
  237. insertTrackingCode(*SplitEdgeTBB, CondCode, DL);
  238. insertTrackingCode(*SplitEdgeFBB, InvCondCode, DL);
  239. LLVM_DEBUG(dbgs() << "SplitEdgeTBB: " << *SplitEdgeTBB << "\n");
  240. LLVM_DEBUG(dbgs() << "SplitEdgeFBB: " << *SplitEdgeFBB << "\n");
  241. Modified = true;
  242. }
  243. // Perform correct code generation around function calls and before returns.
  244. // The below variables record the return/terminator instructions and the call
  245. // instructions respectively; including which register is available as a
  246. // temporary register just before the recorded instructions.
  247. SmallVector<std::pair<MachineInstr *, unsigned>, 4> ReturnInstructions;
  248. SmallVector<std::pair<MachineInstr *, unsigned>, 4> CallInstructions;
  249. // if a temporary register is not available for at least one of the
  250. // instructions for which we need to transfer taint to the stack pointer, we
  251. // need to insert a full speculation barrier.
  252. // TmpRegisterNotAvailableEverywhere tracks that condition.
  253. bool TmpRegisterNotAvailableEverywhere = false;
  254. RegScavenger RS;
  255. RS.enterBasicBlock(MBB);
  256. for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); I++) {
  257. MachineInstr &MI = *I;
  258. if (!MI.isReturn() && !MI.isCall())
  259. continue;
  260. // The RegScavenger represents registers available *after* the MI
  261. // instruction pointed to by RS.getCurrentPosition().
  262. // We need to have a register that is available *before* the MI is executed.
  263. if (I != MBB.begin())
  264. RS.forward(std::prev(I));
  265. // FIXME: The below just finds *a* unused register. Maybe code could be
  266. // optimized more if this looks for the register that isn't used for the
  267. // longest time around this place, to enable more scheduling freedom. Not
  268. // sure if that would actually result in a big performance difference
  269. // though. Maybe RegisterScavenger::findSurvivorBackwards has some logic
  270. // already to do this - but it's unclear if that could easily be used here.
  271. Register TmpReg = RS.FindUnusedReg(&AArch64::GPR64commonRegClass);
  272. LLVM_DEBUG(dbgs() << "RS finds "
  273. << ((TmpReg == 0) ? "no register " : "register ");
  274. if (TmpReg != 0) dbgs() << printReg(TmpReg, TRI) << " ";
  275. dbgs() << "to be available at MI " << MI);
  276. if (TmpReg == 0)
  277. TmpRegisterNotAvailableEverywhere = true;
  278. if (MI.isReturn())
  279. ReturnInstructions.push_back({&MI, TmpReg});
  280. else if (MI.isCall())
  281. CallInstructions.push_back({&MI, TmpReg});
  282. }
  283. if (TmpRegisterNotAvailableEverywhere) {
  284. // When a temporary register is not available everywhere in this basic
  285. // basic block where a propagate-taint-to-sp operation is needed, just
  286. // emit a full speculation barrier at the start of this basic block, which
  287. // renders the taint/speculation tracking in this basic block unnecessary.
  288. insertFullSpeculationBarrier(MBB, MBB.begin(),
  289. (MBB.begin())->getDebugLoc());
  290. UsesFullSpeculationBarrier = true;
  291. Modified = true;
  292. } else {
  293. for (auto MI_Reg : ReturnInstructions) {
  294. assert(MI_Reg.second != 0);
  295. LLVM_DEBUG(
  296. dbgs()
  297. << " About to insert Reg to SP taint propagation with temp register "
  298. << printReg(MI_Reg.second, TRI)
  299. << " on instruction: " << *MI_Reg.first);
  300. insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
  301. Modified = true;
  302. }
  303. for (auto MI_Reg : CallInstructions) {
  304. assert(MI_Reg.second != 0);
  305. LLVM_DEBUG(dbgs() << " About to insert Reg to SP and back taint "
  306. "propagation with temp register "
  307. << printReg(MI_Reg.second, TRI)
  308. << " around instruction: " << *MI_Reg.first);
  309. // Just after the call:
  310. insertSPToRegTaintPropagation(
  311. MBB, std::next((MachineBasicBlock::iterator)MI_Reg.first));
  312. // Just before the call:
  313. insertRegToSPTaintPropagation(MBB, MI_Reg.first, MI_Reg.second);
  314. Modified = true;
  315. }
  316. }
  317. return Modified;
  318. }
  319. void AArch64SpeculationHardening::insertSPToRegTaintPropagation(
  320. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
  321. // If full control flow speculation barriers are used, emit a control flow
  322. // barrier to block potential miss-speculation in flight coming in to this
  323. // function.
  324. if (UseControlFlowSpeculationBarrier) {
  325. insertFullSpeculationBarrier(MBB, MBBI, DebugLoc());
  326. return;
  327. }
  328. // CMP SP, #0 === SUBS xzr, SP, #0
  329. BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::SUBSXri))
  330. .addDef(AArch64::XZR)
  331. .addUse(AArch64::SP)
  332. .addImm(0)
  333. .addImm(0); // no shift
  334. // CSETM x16, NE === CSINV x16, xzr, xzr, EQ
  335. BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::CSINVXr))
  336. .addDef(MisspeculatingTaintReg)
  337. .addUse(AArch64::XZR)
  338. .addUse(AArch64::XZR)
  339. .addImm(AArch64CC::EQ);
  340. }
  341. void AArch64SpeculationHardening::insertRegToSPTaintPropagation(
  342. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  343. unsigned TmpReg) const {
  344. // If full control flow speculation barriers are used, there will not be
  345. // miss-speculation when returning from this function, and therefore, also
  346. // no need to encode potential miss-speculation into the stack pointer.
  347. if (UseControlFlowSpeculationBarrier)
  348. return;
  349. // mov Xtmp, SP === ADD Xtmp, SP, #0
  350. BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
  351. .addDef(TmpReg)
  352. .addUse(AArch64::SP)
  353. .addImm(0)
  354. .addImm(0); // no shift
  355. // and Xtmp, Xtmp, TaintReg === AND Xtmp, Xtmp, TaintReg, #0
  356. BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ANDXrs))
  357. .addDef(TmpReg, RegState::Renamable)
  358. .addUse(TmpReg, RegState::Kill | RegState::Renamable)
  359. .addUse(MisspeculatingTaintReg, RegState::Kill)
  360. .addImm(0);
  361. // mov SP, Xtmp === ADD SP, Xtmp, #0
  362. BuildMI(MBB, MBBI, DebugLoc(), TII->get(AArch64::ADDXri))
  363. .addDef(AArch64::SP)
  364. .addUse(TmpReg, RegState::Kill)
  365. .addImm(0)
  366. .addImm(0); // no shift
  367. }
  368. bool AArch64SpeculationHardening::functionUsesHardeningRegister(
  369. MachineFunction &MF) const {
  370. for (MachineBasicBlock &MBB : MF) {
  371. for (MachineInstr &MI : MBB) {
  372. // treat function calls specially, as the hardening register does not
  373. // need to remain live across function calls.
  374. if (MI.isCall())
  375. continue;
  376. if (MI.readsRegister(MisspeculatingTaintReg, TRI) ||
  377. MI.modifiesRegister(MisspeculatingTaintReg, TRI))
  378. return true;
  379. }
  380. }
  381. return false;
  382. }
  383. // Make GPR register Reg speculation-safe by putting it through the
  384. // SpeculationSafeValue pseudo instruction, if we can't prove that
  385. // the value in the register has already been hardened.
  386. bool AArch64SpeculationHardening::makeGPRSpeculationSafe(
  387. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineInstr &MI,
  388. unsigned Reg) {
  389. assert(AArch64::GPR32allRegClass.contains(Reg) ||
  390. AArch64::GPR64allRegClass.contains(Reg));
  391. // Loads cannot directly load a value into the SP (nor WSP).
  392. // Therefore, if Reg is SP or WSP, it is because the instruction loads from
  393. // the stack through the stack pointer.
  394. //
  395. // Since the stack pointer is never dynamically controllable, don't harden it.
  396. if (Reg == AArch64::SP || Reg == AArch64::WSP)
  397. return false;
  398. // Do not harden the register again if already hardened before.
  399. if (RegsAlreadyMasked[Reg])
  400. return false;
  401. const bool Is64Bit = AArch64::GPR64allRegClass.contains(Reg);
  402. LLVM_DEBUG(dbgs() << "About to harden register : " << Reg << "\n");
  403. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  404. TII->get(Is64Bit ? AArch64::SpeculationSafeValueX
  405. : AArch64::SpeculationSafeValueW))
  406. .addDef(Reg)
  407. .addUse(Reg);
  408. RegsAlreadyMasked.set(Reg);
  409. return true;
  410. }
  411. bool AArch64SpeculationHardening::slhLoads(MachineBasicBlock &MBB) {
  412. bool Modified = false;
  413. LLVM_DEBUG(dbgs() << "slhLoads running on MBB: " << MBB);
  414. RegsAlreadyMasked.reset();
  415. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  416. MachineBasicBlock::iterator NextMBBI;
  417. for (; MBBI != E; MBBI = NextMBBI) {
  418. MachineInstr &MI = *MBBI;
  419. NextMBBI = std::next(MBBI);
  420. // Only harden loaded values or addresses used in loads.
  421. if (!MI.mayLoad())
  422. continue;
  423. LLVM_DEBUG(dbgs() << "About to harden: " << MI);
  424. // For general purpose register loads, harden the registers loaded into.
  425. // For other loads, harden the address loaded from.
  426. // Masking the loaded value is expected to result in less performance
  427. // overhead, as the load can still execute speculatively in comparison to
  428. // when the address loaded from gets masked. However, masking is only
  429. // easy to do efficiently on GPR registers, so for loads into non-GPR
  430. // registers (e.g. floating point loads), mask the address loaded from.
  431. bool AllDefsAreGPR = llvm::all_of(MI.defs(), [&](MachineOperand &Op) {
  432. return Op.isReg() && (AArch64::GPR32allRegClass.contains(Op.getReg()) ||
  433. AArch64::GPR64allRegClass.contains(Op.getReg()));
  434. });
  435. // FIXME: it might be a worthwhile optimization to not mask loaded
  436. // values if all the registers involved in address calculation are already
  437. // hardened, leading to this load not able to execute on a miss-speculated
  438. // path.
  439. bool HardenLoadedData = AllDefsAreGPR;
  440. bool HardenAddressLoadedFrom = !HardenLoadedData;
  441. // First remove registers from AlreadyMaskedRegisters if their value is
  442. // updated by this instruction - it makes them contain a new value that is
  443. // not guaranteed to already have been masked.
  444. for (MachineOperand Op : MI.defs())
  445. for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
  446. RegsAlreadyMasked.reset(*AI);
  447. // FIXME: loads from the stack with an immediate offset from the stack
  448. // pointer probably shouldn't be hardened, which could result in a
  449. // significant optimization. See section "Don’t check loads from
  450. // compile-time constant stack offsets", in
  451. // https://llvm.org/docs/SpeculativeLoadHardening.html
  452. if (HardenLoadedData)
  453. for (auto Def : MI.defs()) {
  454. if (Def.isDead())
  455. // Do not mask a register that is not used further.
  456. continue;
  457. // FIXME: For pre/post-increment addressing modes, the base register
  458. // used in address calculation is also defined by this instruction.
  459. // It might be a worthwhile optimization to not harden that
  460. // base register increment/decrement when the increment/decrement is
  461. // an immediate.
  462. Modified |= makeGPRSpeculationSafe(MBB, NextMBBI, MI, Def.getReg());
  463. }
  464. if (HardenAddressLoadedFrom)
  465. for (auto Use : MI.uses()) {
  466. if (!Use.isReg())
  467. continue;
  468. Register Reg = Use.getReg();
  469. // Some loads of floating point data have implicit defs/uses on a
  470. // super register of that floating point data. Some examples:
  471. // $s0 = LDRSui $sp, 22, implicit-def $q0
  472. // $q0 = LD1i64 $q0, 1, renamable $x0
  473. // We need to filter out these uses for non-GPR register which occur
  474. // because the load partially fills a non-GPR register with the loaded
  475. // data. Just skipping all non-GPR registers is safe (for now) as all
  476. // AArch64 load instructions only use GPR registers to perform the
  477. // address calculation. FIXME: However that might change once we can
  478. // produce SVE gather instructions.
  479. if (!(AArch64::GPR32allRegClass.contains(Reg) ||
  480. AArch64::GPR64allRegClass.contains(Reg)))
  481. continue;
  482. Modified |= makeGPRSpeculationSafe(MBB, MBBI, MI, Reg);
  483. }
  484. }
  485. return Modified;
  486. }
  487. /// \brief If MBBI references a pseudo instruction that should be expanded
  488. /// here, do the expansion and return true. Otherwise return false.
  489. bool AArch64SpeculationHardening::expandSpeculationSafeValue(
  490. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  491. bool UsesFullSpeculationBarrier) {
  492. MachineInstr &MI = *MBBI;
  493. unsigned Opcode = MI.getOpcode();
  494. bool Is64Bit = true;
  495. switch (Opcode) {
  496. default:
  497. break;
  498. case AArch64::SpeculationSafeValueW:
  499. Is64Bit = false;
  500. [[fallthrough]];
  501. case AArch64::SpeculationSafeValueX:
  502. // Just remove the SpeculationSafe pseudo's if control flow
  503. // miss-speculation isn't happening because we're already inserting barriers
  504. // to guarantee that.
  505. if (!UseControlFlowSpeculationBarrier && !UsesFullSpeculationBarrier) {
  506. Register DstReg = MI.getOperand(0).getReg();
  507. Register SrcReg = MI.getOperand(1).getReg();
  508. // Mark this register and all its aliasing registers as needing to be
  509. // value speculation hardened before its next use, by using a CSDB
  510. // barrier instruction.
  511. for (MachineOperand Op : MI.defs())
  512. for (MCRegAliasIterator AI(Op.getReg(), TRI, true); AI.isValid(); ++AI)
  513. RegsNeedingCSDBBeforeUse.set(*AI);
  514. // Mask off with taint state.
  515. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  516. Is64Bit ? TII->get(AArch64::ANDXrs) : TII->get(AArch64::ANDWrs))
  517. .addDef(DstReg)
  518. .addUse(SrcReg, RegState::Kill)
  519. .addUse(Is64Bit ? MisspeculatingTaintReg
  520. : MisspeculatingTaintReg32Bit)
  521. .addImm(0);
  522. }
  523. MI.eraseFromParent();
  524. return true;
  525. }
  526. return false;
  527. }
  528. bool AArch64SpeculationHardening::insertCSDB(MachineBasicBlock &MBB,
  529. MachineBasicBlock::iterator MBBI,
  530. DebugLoc DL) {
  531. assert(!UseControlFlowSpeculationBarrier && "No need to insert CSDBs when "
  532. "control flow miss-speculation "
  533. "is already blocked");
  534. // insert data value speculation barrier (CSDB)
  535. BuildMI(MBB, MBBI, DL, TII->get(AArch64::HINT)).addImm(0x14);
  536. RegsNeedingCSDBBeforeUse.reset();
  537. return true;
  538. }
  539. bool AArch64SpeculationHardening::lowerSpeculationSafeValuePseudos(
  540. MachineBasicBlock &MBB, bool UsesFullSpeculationBarrier) {
  541. bool Modified = false;
  542. RegsNeedingCSDBBeforeUse.reset();
  543. // The following loop iterates over all instructions in the basic block,
  544. // and performs 2 operations:
  545. // 1. Insert a CSDB at this location if needed.
  546. // 2. Expand the SpeculationSafeValuePseudo if the current instruction is
  547. // one.
  548. //
  549. // The insertion of the CSDB is done as late as possible (i.e. just before
  550. // the use of a masked register), in the hope that that will reduce the
  551. // total number of CSDBs in a block when there are multiple masked registers
  552. // in the block.
  553. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  554. DebugLoc DL;
  555. while (MBBI != E) {
  556. MachineInstr &MI = *MBBI;
  557. DL = MI.getDebugLoc();
  558. MachineBasicBlock::iterator NMBBI = std::next(MBBI);
  559. // First check if a CSDB needs to be inserted due to earlier registers
  560. // that were masked and that are used by the next instruction.
  561. // Also emit the barrier on any potential control flow changes.
  562. bool NeedToEmitBarrier = false;
  563. if (RegsNeedingCSDBBeforeUse.any() && (MI.isCall() || MI.isTerminator()))
  564. NeedToEmitBarrier = true;
  565. if (!NeedToEmitBarrier)
  566. for (MachineOperand Op : MI.uses())
  567. if (Op.isReg() && RegsNeedingCSDBBeforeUse[Op.getReg()]) {
  568. NeedToEmitBarrier = true;
  569. break;
  570. }
  571. if (NeedToEmitBarrier && !UsesFullSpeculationBarrier)
  572. Modified |= insertCSDB(MBB, MBBI, DL);
  573. Modified |=
  574. expandSpeculationSafeValue(MBB, MBBI, UsesFullSpeculationBarrier);
  575. MBBI = NMBBI;
  576. }
  577. if (RegsNeedingCSDBBeforeUse.any() && !UsesFullSpeculationBarrier)
  578. Modified |= insertCSDB(MBB, MBBI, DL);
  579. return Modified;
  580. }
  581. bool AArch64SpeculationHardening::runOnMachineFunction(MachineFunction &MF) {
  582. if (!MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
  583. return false;
  584. MisspeculatingTaintReg = AArch64::X16;
  585. MisspeculatingTaintReg32Bit = AArch64::W16;
  586. TII = MF.getSubtarget().getInstrInfo();
  587. TRI = MF.getSubtarget().getRegisterInfo();
  588. RegsNeedingCSDBBeforeUse.resize(TRI->getNumRegs());
  589. RegsAlreadyMasked.resize(TRI->getNumRegs());
  590. UseControlFlowSpeculationBarrier = functionUsesHardeningRegister(MF);
  591. bool Modified = false;
  592. // Step 1: Enable automatic insertion of SpeculationSafeValue.
  593. if (HardenLoads) {
  594. LLVM_DEBUG(
  595. dbgs() << "***** AArch64SpeculationHardening - automatic insertion of "
  596. "SpeculationSafeValue intrinsics *****\n");
  597. for (auto &MBB : MF)
  598. Modified |= slhLoads(MBB);
  599. }
  600. // 2. Add instrumentation code to function entry and exits.
  601. LLVM_DEBUG(
  602. dbgs()
  603. << "***** AArch64SpeculationHardening - track control flow *****\n");
  604. SmallVector<MachineBasicBlock *, 2> EntryBlocks;
  605. EntryBlocks.push_back(&MF.front());
  606. for (const LandingPadInfo &LPI : MF.getLandingPads())
  607. EntryBlocks.push_back(LPI.LandingPadBlock);
  608. for (auto *Entry : EntryBlocks)
  609. insertSPToRegTaintPropagation(
  610. *Entry, Entry->SkipPHIsLabelsAndDebug(Entry->begin()));
  611. // 3. Add instrumentation code to every basic block.
  612. for (auto &MBB : MF) {
  613. bool UsesFullSpeculationBarrier = false;
  614. Modified |= instrumentControlFlow(MBB, UsesFullSpeculationBarrier);
  615. Modified |=
  616. lowerSpeculationSafeValuePseudos(MBB, UsesFullSpeculationBarrier);
  617. }
  618. return Modified;
  619. }
  620. /// \brief Returns an instance of the pseudo instruction expansion pass.
  621. FunctionPass *llvm::createAArch64SpeculationHardeningPass() {
  622. return new AArch64SpeculationHardening();
  623. }