ARMFixCortexA57AES1742098Pass.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. //===-- ARMFixCortexA57AES1742098Pass.cpp ---------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. // This pass works around a Cortex Core Fused AES erratum:
  9. // - Cortex-A57 Erratum 1742098
  10. // - Cortex-A72 Erratum 1655431
  11. //
  12. // The erratum may be triggered if an input vector register to AESE or AESD was
  13. // last written by an instruction that only updated 32 bits of it. This can
  14. // occur for either of the input registers.
  15. //
  16. // The workaround chosen is to update the input register using `r = VORRq r, r`,
  17. // as this updates all 128 bits of the register unconditionally, but does not
  18. // change the values observed in `r`, making the input safe.
  19. //
  20. // This pass has to be conservative in a few cases:
  21. // - an input vector register to the AES instruction is defined outside the
  22. // current function, where we have to assume the register was updated in an
  23. // unsafe way; and
  24. // - an input vector register to the AES instruction is updated along multiple
  25. // different control-flow paths, where we have to ensure all the register
  26. // updating instructions are safe.
  27. //
  28. // Both of these cases may apply to a input vector register. In either case, we
  29. // need to ensure that, when the pass is finished, there exists a safe
  30. // instruction between every unsafe register updating instruction and the AES
  31. // instruction.
  32. //
  33. //===----------------------------------------------------------------------===//
  34. #include "ARM.h"
  35. #include "ARMBaseInstrInfo.h"
  36. #include "ARMBaseRegisterInfo.h"
  37. #include "ARMSubtarget.h"
  38. #include "Utils/ARMBaseInfo.h"
  39. #include "llvm/ADT/STLExtras.h"
  40. #include "llvm/ADT/SmallPtrSet.h"
  41. #include "llvm/ADT/SmallVector.h"
  42. #include "llvm/ADT/StringRef.h"
  43. #include "llvm/CodeGen/MachineBasicBlock.h"
  44. #include "llvm/CodeGen/MachineFunction.h"
  45. #include "llvm/CodeGen/MachineFunctionPass.h"
  46. #include "llvm/CodeGen/MachineInstr.h"
  47. #include "llvm/CodeGen/MachineInstrBuilder.h"
  48. #include "llvm/CodeGen/MachineInstrBundleIterator.h"
  49. #include "llvm/CodeGen/MachineOperand.h"
  50. #include "llvm/CodeGen/ReachingDefAnalysis.h"
  51. #include "llvm/CodeGen/Register.h"
  52. #include "llvm/CodeGen/TargetRegisterInfo.h"
  53. #include "llvm/IR/DebugLoc.h"
  54. #include "llvm/InitializePasses.h"
  55. #include "llvm/MC/MCInstrDesc.h"
  56. #include "llvm/Pass.h"
  57. #include "llvm/PassRegistry.h"
  58. #include "llvm/Support/Debug.h"
  59. #include "llvm/Support/raw_ostream.h"
  60. #include <assert.h>
  61. #include <stdint.h>
  62. using namespace llvm;
  63. #define DEBUG_TYPE "arm-fix-cortex-a57-aes-1742098"
  64. //===----------------------------------------------------------------------===//
  65. namespace {
  66. class ARMFixCortexA57AES1742098 : public MachineFunctionPass {
  67. public:
  68. static char ID;
  69. explicit ARMFixCortexA57AES1742098() : MachineFunctionPass(ID) {
  70. initializeARMFixCortexA57AES1742098Pass(*PassRegistry::getPassRegistry());
  71. }
  72. bool runOnMachineFunction(MachineFunction &F) override;
  73. MachineFunctionProperties getRequiredProperties() const override {
  74. return MachineFunctionProperties().set(
  75. MachineFunctionProperties::Property::NoVRegs);
  76. }
  77. StringRef getPassName() const override {
  78. return "ARM fix for Cortex-A57 AES Erratum 1742098";
  79. }
  80. void getAnalysisUsage(AnalysisUsage &AU) const override {
  81. AU.addRequired<ReachingDefAnalysis>();
  82. AU.setPreservesCFG();
  83. MachineFunctionPass::getAnalysisUsage(AU);
  84. }
  85. private:
  86. // This is the information needed to insert the fixup in the right place.
  87. struct AESFixupLocation {
  88. MachineBasicBlock *Block;
  89. // The fixup instruction will be inserted *before* InsertionPt.
  90. MachineInstr *InsertionPt;
  91. MachineOperand *MOp;
  92. };
  93. void analyzeMF(MachineFunction &MF, ReachingDefAnalysis &RDA,
  94. const ARMBaseRegisterInfo *TRI,
  95. SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const;
  96. void insertAESFixup(AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
  97. const ARMBaseRegisterInfo *TRI) const;
  98. static bool isFirstAESPairInstr(MachineInstr &MI);
  99. static bool isSafeAESInput(MachineInstr &MI);
  100. };
  101. char ARMFixCortexA57AES1742098::ID = 0;
  102. } // end anonymous namespace
  103. INITIALIZE_PASS_BEGIN(ARMFixCortexA57AES1742098, DEBUG_TYPE,
  104. "ARM fix for Cortex-A57 AES Erratum 1742098", false,
  105. false)
  106. INITIALIZE_PASS_DEPENDENCY(ReachingDefAnalysis);
  107. INITIALIZE_PASS_END(ARMFixCortexA57AES1742098, DEBUG_TYPE,
  108. "ARM fix for Cortex-A57 AES Erratum 1742098", false, false)
  109. //===----------------------------------------------------------------------===//
  110. bool ARMFixCortexA57AES1742098::isFirstAESPairInstr(MachineInstr &MI) {
  111. unsigned Opc = MI.getOpcode();
  112. return Opc == ARM::AESD || Opc == ARM::AESE;
  113. }
  114. bool ARMFixCortexA57AES1742098::isSafeAESInput(MachineInstr &MI) {
  115. auto CondCodeIsAL = [](MachineInstr &MI) -> bool {
  116. int CCIdx = MI.findFirstPredOperandIdx();
  117. if (CCIdx == -1)
  118. return false;
  119. return MI.getOperand(CCIdx).getImm() == (int64_t)ARMCC::AL;
  120. };
  121. switch (MI.getOpcode()) {
  122. // Unknown: Assume not safe.
  123. default:
  124. return false;
  125. // 128-bit wide AES instructions
  126. case ARM::AESD:
  127. case ARM::AESE:
  128. case ARM::AESMC:
  129. case ARM::AESIMC:
  130. // No CondCode.
  131. return true;
  132. // 128-bit and 64-bit wide bitwise ops (when condition = al)
  133. case ARM::VANDd:
  134. case ARM::VANDq:
  135. case ARM::VORRd:
  136. case ARM::VORRq:
  137. case ARM::VEORd:
  138. case ARM::VEORq:
  139. case ARM::VMVNd:
  140. case ARM::VMVNq:
  141. // VMOV of 64-bit value between D registers (when condition = al)
  142. case ARM::VMOVD:
  143. // VMOV of 64 bit value from GPRs (when condition = al)
  144. case ARM::VMOVDRR:
  145. // VMOV of immediate into D or Q registers (when condition = al)
  146. case ARM::VMOVv2i64:
  147. case ARM::VMOVv1i64:
  148. case ARM::VMOVv2f32:
  149. case ARM::VMOVv4f32:
  150. case ARM::VMOVv2i32:
  151. case ARM::VMOVv4i32:
  152. case ARM::VMOVv4i16:
  153. case ARM::VMOVv8i16:
  154. case ARM::VMOVv8i8:
  155. case ARM::VMOVv16i8:
  156. // Loads (when condition = al)
  157. // VLD Dn, [Rn, #imm]
  158. case ARM::VLDRD:
  159. // VLDM
  160. case ARM::VLDMDDB_UPD:
  161. case ARM::VLDMDIA_UPD:
  162. case ARM::VLDMDIA:
  163. // VLDn to all lanes.
  164. case ARM::VLD1d64:
  165. case ARM::VLD1q64:
  166. case ARM::VLD1d32:
  167. case ARM::VLD1q32:
  168. case ARM::VLD2b32:
  169. case ARM::VLD2d32:
  170. case ARM::VLD2q32:
  171. case ARM::VLD1d16:
  172. case ARM::VLD1q16:
  173. case ARM::VLD2d16:
  174. case ARM::VLD2q16:
  175. case ARM::VLD1d8:
  176. case ARM::VLD1q8:
  177. case ARM::VLD2b8:
  178. case ARM::VLD2d8:
  179. case ARM::VLD2q8:
  180. case ARM::VLD3d32:
  181. case ARM::VLD3q32:
  182. case ARM::VLD3d16:
  183. case ARM::VLD3q16:
  184. case ARM::VLD3d8:
  185. case ARM::VLD3q8:
  186. case ARM::VLD4d32:
  187. case ARM::VLD4q32:
  188. case ARM::VLD4d16:
  189. case ARM::VLD4q16:
  190. case ARM::VLD4d8:
  191. case ARM::VLD4q8:
  192. // VLD1 (single element to one lane)
  193. case ARM::VLD1LNd32:
  194. case ARM::VLD1LNd32_UPD:
  195. case ARM::VLD1LNd8:
  196. case ARM::VLD1LNd8_UPD:
  197. case ARM::VLD1LNd16:
  198. case ARM::VLD1LNd16_UPD:
  199. // VLD1 (single element to all lanes)
  200. case ARM::VLD1DUPd32:
  201. case ARM::VLD1DUPd32wb_fixed:
  202. case ARM::VLD1DUPd32wb_register:
  203. case ARM::VLD1DUPd16:
  204. case ARM::VLD1DUPd16wb_fixed:
  205. case ARM::VLD1DUPd16wb_register:
  206. case ARM::VLD1DUPd8:
  207. case ARM::VLD1DUPd8wb_fixed:
  208. case ARM::VLD1DUPd8wb_register:
  209. case ARM::VLD1DUPq32:
  210. case ARM::VLD1DUPq32wb_fixed:
  211. case ARM::VLD1DUPq32wb_register:
  212. case ARM::VLD1DUPq16:
  213. case ARM::VLD1DUPq16wb_fixed:
  214. case ARM::VLD1DUPq16wb_register:
  215. case ARM::VLD1DUPq8:
  216. case ARM::VLD1DUPq8wb_fixed:
  217. case ARM::VLD1DUPq8wb_register:
  218. // VMOV
  219. case ARM::VSETLNi32:
  220. case ARM::VSETLNi16:
  221. case ARM::VSETLNi8:
  222. return CondCodeIsAL(MI);
  223. };
  224. return false;
  225. }
  226. bool ARMFixCortexA57AES1742098::runOnMachineFunction(MachineFunction &F) {
  227. LLVM_DEBUG(dbgs() << "***** ARMFixCortexA57AES1742098 *****\n");
  228. auto &STI = F.getSubtarget<ARMSubtarget>();
  229. // Fix not requested or AES instructions not present: skip pass.
  230. if (!STI.hasAES() || !STI.fixCortexA57AES1742098())
  231. return false;
  232. const ARMBaseRegisterInfo *TRI = STI.getRegisterInfo();
  233. const ARMBaseInstrInfo *TII = STI.getInstrInfo();
  234. auto &RDA = getAnalysis<ReachingDefAnalysis>();
  235. // Analyze whole function to find instructions which need fixing up...
  236. SmallVector<AESFixupLocation> FixupLocsForFn{};
  237. analyzeMF(F, RDA, TRI, FixupLocsForFn);
  238. // ... and fix the instructions up all at the same time.
  239. bool Changed = false;
  240. LLVM_DEBUG(dbgs() << "Inserting " << FixupLocsForFn.size() << " fixup(s)\n");
  241. for (AESFixupLocation &FixupLoc : FixupLocsForFn) {
  242. insertAESFixup(FixupLoc, TII, TRI);
  243. Changed |= true;
  244. }
  245. return Changed;
  246. }
  247. void ARMFixCortexA57AES1742098::analyzeMF(
  248. MachineFunction &MF, ReachingDefAnalysis &RDA,
  249. const ARMBaseRegisterInfo *TRI,
  250. SmallVectorImpl<AESFixupLocation> &FixupLocsForFn) const {
  251. unsigned MaxAllowedFixups = 0;
  252. for (MachineBasicBlock &MBB : MF) {
  253. for (MachineInstr &MI : MBB) {
  254. if (!isFirstAESPairInstr(MI))
  255. continue;
  256. // Found an instruction to check the operands of.
  257. LLVM_DEBUG(dbgs() << "Found AES Pair starting: " << MI);
  258. assert(MI.getNumExplicitOperands() == 3 && MI.getNumExplicitDefs() == 1 &&
  259. "Unknown AES Instruction Format. Expected 1 def, 2 uses.");
  260. // A maximum of two fixups should be inserted for each AES pair (one per
  261. // register use).
  262. MaxAllowedFixups += 2;
  263. // Inspect all operands, choosing whether to insert a fixup.
  264. for (MachineOperand &MOp : MI.uses()) {
  265. SmallPtrSet<MachineInstr *, 1> AllDefs{};
  266. RDA.getGlobalReachingDefs(&MI, MOp.getReg(), AllDefs);
  267. // Planned Fixup: This should be added to FixupLocsForFn at most once.
  268. AESFixupLocation NewLoc{&MBB, &MI, &MOp};
  269. // In small functions with loops, this operand may be both a live-in and
  270. // have definitions within the function itself. These will need a fixup.
  271. bool IsLiveIn = MF.front().isLiveIn(MOp.getReg());
  272. // If the register doesn't have defining instructions, and is not a
  273. // live-in, then something is wrong and the fixup must always be
  274. // inserted to be safe.
  275. if (!IsLiveIn && AllDefs.size() == 0) {
  276. LLVM_DEBUG(dbgs()
  277. << "Fixup Planned: No Defining Instrs found, not live-in: "
  278. << printReg(MOp.getReg(), TRI) << "\n");
  279. FixupLocsForFn.emplace_back(NewLoc);
  280. continue;
  281. }
  282. auto IsUnsafe = [](MachineInstr *MI) -> bool {
  283. return !isSafeAESInput(*MI);
  284. };
  285. size_t UnsafeCount = llvm::count_if(AllDefs, IsUnsafe);
  286. // If there are no unsafe definitions...
  287. if (UnsafeCount == 0) {
  288. // ... and the register is not live-in ...
  289. if (!IsLiveIn) {
  290. // ... then skip the fixup.
  291. LLVM_DEBUG(dbgs() << "No Fixup: Defining instrs are all safe: "
  292. << printReg(MOp.getReg(), TRI) << "\n");
  293. continue;
  294. }
  295. // Otherwise, the only unsafe "definition" is a live-in, so insert the
  296. // fixup at the start of the function.
  297. LLVM_DEBUG(dbgs()
  298. << "Fixup Planned: Live-In (with safe defining instrs): "
  299. << printReg(MOp.getReg(), TRI) << "\n");
  300. NewLoc.Block = &MF.front();
  301. NewLoc.InsertionPt = &*NewLoc.Block->begin();
  302. LLVM_DEBUG(dbgs() << "Moving Fixup for Live-In to immediately before "
  303. << *NewLoc.InsertionPt);
  304. FixupLocsForFn.emplace_back(NewLoc);
  305. continue;
  306. }
  307. // If a fixup is needed in more than one place, then the best place to
  308. // insert it is adjacent to the use rather than introducing a fixup
  309. // adjacent to each def.
  310. //
  311. // FIXME: It might be better to hoist this to the start of the BB, if
  312. // possible.
  313. if (IsLiveIn || UnsafeCount > 1) {
  314. LLVM_DEBUG(dbgs() << "Fixup Planned: Multiple unsafe defining instrs "
  315. "(including live-ins): "
  316. << printReg(MOp.getReg(), TRI) << "\n");
  317. FixupLocsForFn.emplace_back(NewLoc);
  318. continue;
  319. }
  320. assert(UnsafeCount == 1 && !IsLiveIn &&
  321. "At this point, there should be one unsafe defining instrs "
  322. "and the defined register should not be a live-in.");
  323. SmallPtrSetIterator<MachineInstr *> It =
  324. llvm::find_if(AllDefs, IsUnsafe);
  325. assert(It != AllDefs.end() &&
  326. "UnsafeCount == 1 but No Unsafe MachineInstr found.");
  327. MachineInstr *DefMI = *It;
  328. LLVM_DEBUG(
  329. dbgs() << "Fixup Planned: Found single unsafe defining instrs for "
  330. << printReg(MOp.getReg(), TRI) << ": " << *DefMI);
  331. // There is one unsafe defining instruction, which needs a fixup. It is
  332. // generally good to hoist the fixup to be adjacent to the defining
  333. // instruction rather than the using instruction, as the using
  334. // instruction may be inside a loop when the defining instruction is
  335. // not.
  336. MachineBasicBlock::iterator DefIt = DefMI;
  337. ++DefIt;
  338. if (DefIt != DefMI->getParent()->end()) {
  339. LLVM_DEBUG(dbgs() << "Moving Fixup to immediately after " << *DefMI
  340. << "And immediately before " << *DefIt);
  341. NewLoc.Block = DefIt->getParent();
  342. NewLoc.InsertionPt = &*DefIt;
  343. }
  344. FixupLocsForFn.emplace_back(NewLoc);
  345. }
  346. }
  347. }
  348. assert(FixupLocsForFn.size() <= MaxAllowedFixups &&
  349. "Inserted too many fixups for this function.");
  350. (void)MaxAllowedFixups;
  351. }
  352. void ARMFixCortexA57AES1742098::insertAESFixup(
  353. AESFixupLocation &FixupLoc, const ARMBaseInstrInfo *TII,
  354. const ARMBaseRegisterInfo *TRI) const {
  355. MachineOperand *OperandToFixup = FixupLoc.MOp;
  356. assert(OperandToFixup->isReg() && "OperandToFixup must be a register");
  357. Register RegToFixup = OperandToFixup->getReg();
  358. LLVM_DEBUG(dbgs() << "Inserting VORRq of " << printReg(RegToFixup, TRI)
  359. << " before: " << *FixupLoc.InsertionPt);
  360. // Insert the new `VORRq qN, qN, qN`. There are a few details here:
  361. //
  362. // The uses are marked as killed, even if the original use of OperandToFixup
  363. // is not killed, as the new instruction is clobbering the register. This is
  364. // safe even if there are other uses of `qN`, as the VORRq value-wise a no-op
  365. // (it is inserted for microarchitectural reasons).
  366. //
  367. // The def and the uses are still marked as Renamable if the original register
  368. // was, to avoid having to rummage through all the other uses and defs and
  369. // unset their renamable bits.
  370. unsigned Renamable = OperandToFixup->isRenamable() ? RegState::Renamable : 0;
  371. BuildMI(*FixupLoc.Block, FixupLoc.InsertionPt, DebugLoc(),
  372. TII->get(ARM::VORRq))
  373. .addReg(RegToFixup, RegState::Define | Renamable)
  374. .addReg(RegToFixup, RegState::Kill | Renamable)
  375. .addReg(RegToFixup, RegState::Kill | Renamable)
  376. .addImm((uint64_t)ARMCC::AL)
  377. .addReg(ARM::NoRegister);
  378. }
  379. // Factory function used by AArch64TargetMachine to add the pass to
  380. // the passmanager.
  381. FunctionPass *llvm::createARMFixCortexA57AES1742098Pass() {
  382. return new ARMFixCortexA57AES1742098();
  383. }