RISCVExpandAtomicPseudoInsts.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. //===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass that expands atomic pseudo instructions into
  10. // target instructions. This pass should be run at the last possible moment,
  11. // avoiding the possibility for other passes to break the requirements for
  12. // forward progress in the LR/SC block.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "RISCV.h"
  16. #include "RISCVInstrInfo.h"
  17. #include "RISCVTargetMachine.h"
  18. #include "llvm/CodeGen/LivePhysRegs.h"
  19. #include "llvm/CodeGen/MachineFunctionPass.h"
  20. #include "llvm/CodeGen/MachineInstrBuilder.h"
  21. using namespace llvm;
  22. #define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \
  23. "RISCV atomic pseudo instruction expansion pass"
  24. namespace {
  25. class RISCVExpandAtomicPseudo : public MachineFunctionPass {
  26. public:
  27. const RISCVInstrInfo *TII;
  28. static char ID;
  29. RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {
  30. initializeRISCVExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
  31. }
  32. bool runOnMachineFunction(MachineFunction &MF) override;
  33. StringRef getPassName() const override {
  34. return RISCV_EXPAND_ATOMIC_PSEUDO_NAME;
  35. }
  36. private:
  37. bool expandMBB(MachineBasicBlock &MBB);
  38. bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  39. MachineBasicBlock::iterator &NextMBBI);
  40. bool expandAtomicBinOp(MachineBasicBlock &MBB,
  41. MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
  42. bool IsMasked, int Width,
  43. MachineBasicBlock::iterator &NextMBBI);
  44. bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
  45. MachineBasicBlock::iterator MBBI,
  46. AtomicRMWInst::BinOp, bool IsMasked, int Width,
  47. MachineBasicBlock::iterator &NextMBBI);
  48. bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
  49. MachineBasicBlock::iterator MBBI, bool IsMasked,
  50. int Width, MachineBasicBlock::iterator &NextMBBI);
  51. };
  52. char RISCVExpandAtomicPseudo::ID = 0;
  53. bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
  54. TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
  55. bool Modified = false;
  56. for (auto &MBB : MF)
  57. Modified |= expandMBB(MBB);
  58. return Modified;
  59. }
  60. bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
  61. bool Modified = false;
  62. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  63. while (MBBI != E) {
  64. MachineBasicBlock::iterator NMBBI = std::next(MBBI);
  65. Modified |= expandMI(MBB, MBBI, NMBBI);
  66. MBBI = NMBBI;
  67. }
  68. return Modified;
  69. }
  70. bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
  71. MachineBasicBlock::iterator MBBI,
  72. MachineBasicBlock::iterator &NextMBBI) {
  73. // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
  74. // expanded instructions for each pseudo is correct in the Size field of the
  75. // tablegen definition for the pseudo.
  76. switch (MBBI->getOpcode()) {
  77. case RISCV::PseudoAtomicLoadNand32:
  78. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
  79. NextMBBI);
  80. case RISCV::PseudoAtomicLoadNand64:
  81. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
  82. NextMBBI);
  83. case RISCV::PseudoMaskedAtomicSwap32:
  84. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
  85. NextMBBI);
  86. case RISCV::PseudoMaskedAtomicLoadAdd32:
  87. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
  88. case RISCV::PseudoMaskedAtomicLoadSub32:
  89. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
  90. case RISCV::PseudoMaskedAtomicLoadNand32:
  91. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
  92. NextMBBI);
  93. case RISCV::PseudoMaskedAtomicLoadMax32:
  94. return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
  95. NextMBBI);
  96. case RISCV::PseudoMaskedAtomicLoadMin32:
  97. return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
  98. NextMBBI);
  99. case RISCV::PseudoMaskedAtomicLoadUMax32:
  100. return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
  101. NextMBBI);
  102. case RISCV::PseudoMaskedAtomicLoadUMin32:
  103. return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
  104. NextMBBI);
  105. case RISCV::PseudoCmpXchg32:
  106. return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
  107. case RISCV::PseudoCmpXchg64:
  108. return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
  109. case RISCV::PseudoMaskedCmpXchg32:
  110. return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
  111. }
  112. return false;
  113. }
  114. static unsigned getLRForRMW32(AtomicOrdering Ordering) {
  115. switch (Ordering) {
  116. default:
  117. llvm_unreachable("Unexpected AtomicOrdering");
  118. case AtomicOrdering::Monotonic:
  119. return RISCV::LR_W;
  120. case AtomicOrdering::Acquire:
  121. return RISCV::LR_W_AQ;
  122. case AtomicOrdering::Release:
  123. return RISCV::LR_W;
  124. case AtomicOrdering::AcquireRelease:
  125. return RISCV::LR_W_AQ;
  126. case AtomicOrdering::SequentiallyConsistent:
  127. return RISCV::LR_W_AQ_RL;
  128. }
  129. }
  130. static unsigned getSCForRMW32(AtomicOrdering Ordering) {
  131. switch (Ordering) {
  132. default:
  133. llvm_unreachable("Unexpected AtomicOrdering");
  134. case AtomicOrdering::Monotonic:
  135. return RISCV::SC_W;
  136. case AtomicOrdering::Acquire:
  137. return RISCV::SC_W;
  138. case AtomicOrdering::Release:
  139. return RISCV::SC_W_RL;
  140. case AtomicOrdering::AcquireRelease:
  141. return RISCV::SC_W_RL;
  142. case AtomicOrdering::SequentiallyConsistent:
  143. return RISCV::SC_W_AQ_RL;
  144. }
  145. }
  146. static unsigned getLRForRMW64(AtomicOrdering Ordering) {
  147. switch (Ordering) {
  148. default:
  149. llvm_unreachable("Unexpected AtomicOrdering");
  150. case AtomicOrdering::Monotonic:
  151. return RISCV::LR_D;
  152. case AtomicOrdering::Acquire:
  153. return RISCV::LR_D_AQ;
  154. case AtomicOrdering::Release:
  155. return RISCV::LR_D;
  156. case AtomicOrdering::AcquireRelease:
  157. return RISCV::LR_D_AQ;
  158. case AtomicOrdering::SequentiallyConsistent:
  159. return RISCV::LR_D_AQ_RL;
  160. }
  161. }
  162. static unsigned getSCForRMW64(AtomicOrdering Ordering) {
  163. switch (Ordering) {
  164. default:
  165. llvm_unreachable("Unexpected AtomicOrdering");
  166. case AtomicOrdering::Monotonic:
  167. return RISCV::SC_D;
  168. case AtomicOrdering::Acquire:
  169. return RISCV::SC_D;
  170. case AtomicOrdering::Release:
  171. return RISCV::SC_D_RL;
  172. case AtomicOrdering::AcquireRelease:
  173. return RISCV::SC_D_RL;
  174. case AtomicOrdering::SequentiallyConsistent:
  175. return RISCV::SC_D_AQ_RL;
  176. }
  177. }
  178. static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
  179. if (Width == 32)
  180. return getLRForRMW32(Ordering);
  181. if (Width == 64)
  182. return getLRForRMW64(Ordering);
  183. llvm_unreachable("Unexpected LR width\n");
  184. }
  185. static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
  186. if (Width == 32)
  187. return getSCForRMW32(Ordering);
  188. if (Width == 64)
  189. return getSCForRMW64(Ordering);
  190. llvm_unreachable("Unexpected SC width\n");
  191. }
  192. static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
  193. DebugLoc DL, MachineBasicBlock *ThisMBB,
  194. MachineBasicBlock *LoopMBB,
  195. MachineBasicBlock *DoneMBB,
  196. AtomicRMWInst::BinOp BinOp, int Width) {
  197. Register DestReg = MI.getOperand(0).getReg();
  198. Register ScratchReg = MI.getOperand(1).getReg();
  199. Register AddrReg = MI.getOperand(2).getReg();
  200. Register IncrReg = MI.getOperand(3).getReg();
  201. AtomicOrdering Ordering =
  202. static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
  203. // .loop:
  204. // lr.[w|d] dest, (addr)
  205. // binop scratch, dest, val
  206. // sc.[w|d] scratch, scratch, (addr)
  207. // bnez scratch, loop
  208. BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
  209. .addReg(AddrReg);
  210. switch (BinOp) {
  211. default:
  212. llvm_unreachable("Unexpected AtomicRMW BinOp");
  213. case AtomicRMWInst::Nand:
  214. BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
  215. .addReg(DestReg)
  216. .addReg(IncrReg);
  217. BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
  218. .addReg(ScratchReg)
  219. .addImm(-1);
  220. break;
  221. }
  222. BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
  223. .addReg(AddrReg)
  224. .addReg(ScratchReg);
  225. BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
  226. .addReg(ScratchReg)
  227. .addReg(RISCV::X0)
  228. .addMBB(LoopMBB);
  229. }
  230. static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
  231. MachineBasicBlock *MBB, Register DestReg,
  232. Register OldValReg, Register NewValReg,
  233. Register MaskReg, Register ScratchReg) {
  234. assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
  235. assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
  236. assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
  237. // We select bits from newval and oldval using:
  238. // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
  239. // r = oldval ^ ((oldval ^ newval) & masktargetdata);
  240. BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
  241. .addReg(OldValReg)
  242. .addReg(NewValReg);
  243. BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
  244. .addReg(ScratchReg)
  245. .addReg(MaskReg);
  246. BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
  247. .addReg(OldValReg)
  248. .addReg(ScratchReg);
  249. }
  250. static void doMaskedAtomicBinOpExpansion(
  251. const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
  252. MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
  253. MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
  254. assert(Width == 32 && "Should never need to expand masked 64-bit operations");
  255. Register DestReg = MI.getOperand(0).getReg();
  256. Register ScratchReg = MI.getOperand(1).getReg();
  257. Register AddrReg = MI.getOperand(2).getReg();
  258. Register IncrReg = MI.getOperand(3).getReg();
  259. Register MaskReg = MI.getOperand(4).getReg();
  260. AtomicOrdering Ordering =
  261. static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
  262. // .loop:
  263. // lr.w destreg, (alignedaddr)
  264. // binop scratch, destreg, incr
  265. // xor scratch, destreg, scratch
  266. // and scratch, scratch, masktargetdata
  267. // xor scratch, destreg, scratch
  268. // sc.w scratch, scratch, (alignedaddr)
  269. // bnez scratch, loop
  270. BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
  271. .addReg(AddrReg);
  272. switch (BinOp) {
  273. default:
  274. llvm_unreachable("Unexpected AtomicRMW BinOp");
  275. case AtomicRMWInst::Xchg:
  276. BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
  277. .addReg(IncrReg)
  278. .addImm(0);
  279. break;
  280. case AtomicRMWInst::Add:
  281. BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
  282. .addReg(DestReg)
  283. .addReg(IncrReg);
  284. break;
  285. case AtomicRMWInst::Sub:
  286. BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
  287. .addReg(DestReg)
  288. .addReg(IncrReg);
  289. break;
  290. case AtomicRMWInst::Nand:
  291. BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
  292. .addReg(DestReg)
  293. .addReg(IncrReg);
  294. BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
  295. .addReg(ScratchReg)
  296. .addImm(-1);
  297. break;
  298. }
  299. insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
  300. ScratchReg);
  301. BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
  302. .addReg(AddrReg)
  303. .addReg(ScratchReg);
  304. BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
  305. .addReg(ScratchReg)
  306. .addReg(RISCV::X0)
  307. .addMBB(LoopMBB);
  308. }
  309. bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
  310. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  311. AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
  312. MachineBasicBlock::iterator &NextMBBI) {
  313. MachineInstr &MI = *MBBI;
  314. DebugLoc DL = MI.getDebugLoc();
  315. MachineFunction *MF = MBB.getParent();
  316. auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  317. auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  318. // Insert new MBBs.
  319. MF->insert(++MBB.getIterator(), LoopMBB);
  320. MF->insert(++LoopMBB->getIterator(), DoneMBB);
  321. // Set up successors and transfer remaining instructions to DoneMBB.
  322. LoopMBB->addSuccessor(LoopMBB);
  323. LoopMBB->addSuccessor(DoneMBB);
  324. DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
  325. DoneMBB->transferSuccessors(&MBB);
  326. MBB.addSuccessor(LoopMBB);
  327. if (!IsMasked)
  328. doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
  329. else
  330. doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
  331. Width);
  332. NextMBBI = MBB.end();
  333. MI.eraseFromParent();
  334. LivePhysRegs LiveRegs;
  335. computeAndAddLiveIns(LiveRegs, *LoopMBB);
  336. computeAndAddLiveIns(LiveRegs, *DoneMBB);
  337. return true;
  338. }
  339. static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
  340. MachineBasicBlock *MBB, Register ValReg,
  341. Register ShamtReg) {
  342. BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
  343. .addReg(ValReg)
  344. .addReg(ShamtReg);
  345. BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
  346. .addReg(ValReg)
  347. .addReg(ShamtReg);
  348. }
  349. bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
  350. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  351. AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
  352. MachineBasicBlock::iterator &NextMBBI) {
  353. assert(IsMasked == true &&
  354. "Should only need to expand masked atomic max/min");
  355. assert(Width == 32 && "Should never need to expand masked 64-bit operations");
  356. MachineInstr &MI = *MBBI;
  357. DebugLoc DL = MI.getDebugLoc();
  358. MachineFunction *MF = MBB.getParent();
  359. auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  360. auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  361. auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  362. auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  363. // Insert new MBBs.
  364. MF->insert(++MBB.getIterator(), LoopHeadMBB);
  365. MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
  366. MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
  367. MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
  368. // Set up successors and transfer remaining instructions to DoneMBB.
  369. LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
  370. LoopHeadMBB->addSuccessor(LoopTailMBB);
  371. LoopIfBodyMBB->addSuccessor(LoopTailMBB);
  372. LoopTailMBB->addSuccessor(LoopHeadMBB);
  373. LoopTailMBB->addSuccessor(DoneMBB);
  374. DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
  375. DoneMBB->transferSuccessors(&MBB);
  376. MBB.addSuccessor(LoopHeadMBB);
  377. Register DestReg = MI.getOperand(0).getReg();
  378. Register Scratch1Reg = MI.getOperand(1).getReg();
  379. Register Scratch2Reg = MI.getOperand(2).getReg();
  380. Register AddrReg = MI.getOperand(3).getReg();
  381. Register IncrReg = MI.getOperand(4).getReg();
  382. Register MaskReg = MI.getOperand(5).getReg();
  383. bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
  384. AtomicOrdering Ordering =
  385. static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
  386. //
  387. // .loophead:
  388. // lr.w destreg, (alignedaddr)
  389. // and scratch2, destreg, mask
  390. // mv scratch1, destreg
  391. // [sext scratch2 if signed min/max]
  392. // ifnochangeneeded scratch2, incr, .looptail
  393. BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
  394. .addReg(AddrReg);
  395. BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
  396. .addReg(DestReg)
  397. .addReg(MaskReg);
  398. BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
  399. .addReg(DestReg)
  400. .addImm(0);
  401. switch (BinOp) {
  402. default:
  403. llvm_unreachable("Unexpected AtomicRMW BinOp");
  404. case AtomicRMWInst::Max: {
  405. insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
  406. BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
  407. .addReg(Scratch2Reg)
  408. .addReg(IncrReg)
  409. .addMBB(LoopTailMBB);
  410. break;
  411. }
  412. case AtomicRMWInst::Min: {
  413. insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
  414. BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
  415. .addReg(IncrReg)
  416. .addReg(Scratch2Reg)
  417. .addMBB(LoopTailMBB);
  418. break;
  419. }
  420. case AtomicRMWInst::UMax:
  421. BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
  422. .addReg(Scratch2Reg)
  423. .addReg(IncrReg)
  424. .addMBB(LoopTailMBB);
  425. break;
  426. case AtomicRMWInst::UMin:
  427. BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
  428. .addReg(IncrReg)
  429. .addReg(Scratch2Reg)
  430. .addMBB(LoopTailMBB);
  431. break;
  432. }
  433. // .loopifbody:
  434. // xor scratch1, destreg, incr
  435. // and scratch1, scratch1, mask
  436. // xor scratch1, destreg, scratch1
  437. insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
  438. MaskReg, Scratch1Reg);
  439. // .looptail:
  440. // sc.w scratch1, scratch1, (addr)
  441. // bnez scratch1, loop
  442. BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
  443. .addReg(AddrReg)
  444. .addReg(Scratch1Reg);
  445. BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
  446. .addReg(Scratch1Reg)
  447. .addReg(RISCV::X0)
  448. .addMBB(LoopHeadMBB);
  449. NextMBBI = MBB.end();
  450. MI.eraseFromParent();
  451. LivePhysRegs LiveRegs;
  452. computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
  453. computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
  454. computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
  455. computeAndAddLiveIns(LiveRegs, *DoneMBB);
  456. return true;
  457. }
  458. // If a BNE on the cmpxchg comparison result immediately follows the cmpxchg
  459. // operation, it can be folded into the cmpxchg expansion by
  460. // modifying the branch within 'LoopHead' (which performs the same
  461. // comparison). This is a valid transformation because after altering the
  462. // LoopHead's BNE destination, the BNE following the cmpxchg becomes
  463. // redundant and and be deleted. In the case of a masked cmpxchg, an
  464. // appropriate AND and BNE must be matched.
  465. //
  466. // On success, returns true and deletes the matching BNE or AND+BNE, sets the
  467. // LoopHeadBNETarget argument to the target that should be used within the
  468. // loop head, and removes that block as a successor to MBB.
  469. bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB,
  470. MachineBasicBlock::iterator MBBI,
  471. Register DestReg, Register CmpValReg,
  472. Register MaskReg,
  473. MachineBasicBlock *&LoopHeadBNETarget) {
  474. SmallVector<MachineInstr *> ToErase;
  475. auto E = MBB.end();
  476. if (MBBI == E)
  477. return false;
  478. MBBI = skipDebugInstructionsForward(MBBI, E);
  479. // If we have a masked cmpxchg, match AND dst, DestReg, MaskReg.
  480. if (MaskReg.isValid()) {
  481. if (MBBI == E || MBBI->getOpcode() != RISCV::AND)
  482. return false;
  483. Register ANDOp1 = MBBI->getOperand(1).getReg();
  484. Register ANDOp2 = MBBI->getOperand(2).getReg();
  485. if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) &&
  486. !(ANDOp1 == MaskReg && ANDOp2 == DestReg))
  487. return false;
  488. // We now expect the BNE to use the result of the AND as an operand.
  489. DestReg = MBBI->getOperand(0).getReg();
  490. ToErase.push_back(&*MBBI);
  491. MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
  492. }
  493. // Match BNE DestReg, MaskReg.
  494. if (MBBI == E || MBBI->getOpcode() != RISCV::BNE)
  495. return false;
  496. Register BNEOp0 = MBBI->getOperand(0).getReg();
  497. Register BNEOp1 = MBBI->getOperand(1).getReg();
  498. if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&
  499. !(BNEOp0 == CmpValReg && BNEOp1 == DestReg))
  500. return false;
  501. ToErase.push_back(&*MBBI);
  502. LoopHeadBNETarget = MBBI->getOperand(2).getMBB();
  503. MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
  504. if (MBBI != E)
  505. return false;
  506. MBB.removeSuccessor(LoopHeadBNETarget);
  507. for (auto *MI : ToErase)
  508. MI->eraseFromParent();
  509. return true;
  510. }
  511. bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
  512. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
  513. int Width, MachineBasicBlock::iterator &NextMBBI) {
  514. MachineInstr &MI = *MBBI;
  515. DebugLoc DL = MI.getDebugLoc();
  516. MachineFunction *MF = MBB.getParent();
  517. auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  518. auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  519. auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  520. Register DestReg = MI.getOperand(0).getReg();
  521. Register ScratchReg = MI.getOperand(1).getReg();
  522. Register AddrReg = MI.getOperand(2).getReg();
  523. Register CmpValReg = MI.getOperand(3).getReg();
  524. Register NewValReg = MI.getOperand(4).getReg();
  525. Register MaskReg = IsMasked ? MI.getOperand(5).getReg() : Register();
  526. MachineBasicBlock *LoopHeadBNETarget = DoneMBB;
  527. tryToFoldBNEOnCmpXchgResult(MBB, std::next(MBBI), DestReg, CmpValReg, MaskReg,
  528. LoopHeadBNETarget);
  529. // Insert new MBBs.
  530. MF->insert(++MBB.getIterator(), LoopHeadMBB);
  531. MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
  532. MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
  533. // Set up successors and transfer remaining instructions to DoneMBB.
  534. LoopHeadMBB->addSuccessor(LoopTailMBB);
  535. LoopHeadMBB->addSuccessor(LoopHeadBNETarget);
  536. LoopTailMBB->addSuccessor(DoneMBB);
  537. LoopTailMBB->addSuccessor(LoopHeadMBB);
  538. DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
  539. DoneMBB->transferSuccessors(&MBB);
  540. MBB.addSuccessor(LoopHeadMBB);
  541. AtomicOrdering Ordering =
  542. static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
  543. if (!IsMasked) {
  544. // .loophead:
  545. // lr.[w|d] dest, (addr)
  546. // bne dest, cmpval, done
  547. BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
  548. .addReg(AddrReg);
  549. BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
  550. .addReg(DestReg)
  551. .addReg(CmpValReg)
  552. .addMBB(LoopHeadBNETarget);
  553. // .looptail:
  554. // sc.[w|d] scratch, newval, (addr)
  555. // bnez scratch, loophead
  556. BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
  557. .addReg(AddrReg)
  558. .addReg(NewValReg);
  559. BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
  560. .addReg(ScratchReg)
  561. .addReg(RISCV::X0)
  562. .addMBB(LoopHeadMBB);
  563. } else {
  564. // .loophead:
  565. // lr.w dest, (addr)
  566. // and scratch, dest, mask
  567. // bne scratch, cmpval, done
  568. Register MaskReg = MI.getOperand(5).getReg();
  569. BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
  570. .addReg(AddrReg);
  571. BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
  572. .addReg(DestReg)
  573. .addReg(MaskReg);
  574. BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
  575. .addReg(ScratchReg)
  576. .addReg(CmpValReg)
  577. .addMBB(LoopHeadBNETarget);
  578. // .looptail:
  579. // xor scratch, dest, newval
  580. // and scratch, scratch, mask
  581. // xor scratch, dest, scratch
  582. // sc.w scratch, scratch, (adrr)
  583. // bnez scratch, loophead
  584. insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
  585. MaskReg, ScratchReg);
  586. BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
  587. .addReg(AddrReg)
  588. .addReg(ScratchReg);
  589. BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
  590. .addReg(ScratchReg)
  591. .addReg(RISCV::X0)
  592. .addMBB(LoopHeadMBB);
  593. }
  594. NextMBBI = MBB.end();
  595. MI.eraseFromParent();
  596. LivePhysRegs LiveRegs;
  597. computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
  598. computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
  599. computeAndAddLiveIns(LiveRegs, *DoneMBB);
  600. return true;
  601. }
  602. } // end of anonymous namespace
  603. INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
  604. RISCV_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
  605. namespace llvm {
  606. FunctionPass *createRISCVExpandAtomicPseudoPass() {
  607. return new RISCVExpandAtomicPseudo();
  608. }
  609. } // end of namespace llvm