LoongArchExpandAtomicPseudoInsts.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628
  1. //==- LoongArchExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass that expands atomic pseudo instructions into
  10. // target instructions. This pass should be run at the last possible moment,
  11. // avoiding the possibility for other passes to break the requirements for
  12. // forward progress in the LL/SC block.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "LoongArch.h"
  16. #include "LoongArchInstrInfo.h"
  17. #include "LoongArchTargetMachine.h"
  18. #include "llvm/CodeGen/LivePhysRegs.h"
  19. #include "llvm/CodeGen/MachineFunctionPass.h"
  20. #include "llvm/CodeGen/MachineInstrBuilder.h"
  21. using namespace llvm;
  22. #define LoongArch_EXPAND_ATOMIC_PSEUDO_NAME \
  23. "LoongArch atomic pseudo instruction expansion pass"
  24. namespace {
  25. class LoongArchExpandAtomicPseudo : public MachineFunctionPass {
  26. public:
  27. const LoongArchInstrInfo *TII;
  28. static char ID;
  29. LoongArchExpandAtomicPseudo() : MachineFunctionPass(ID) {
  30. initializeLoongArchExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
  31. }
  32. bool runOnMachineFunction(MachineFunction &MF) override;
  33. StringRef getPassName() const override {
  34. return LoongArch_EXPAND_ATOMIC_PSEUDO_NAME;
  35. }
  36. private:
  37. bool expandMBB(MachineBasicBlock &MBB);
  38. bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  39. MachineBasicBlock::iterator &NextMBBI);
  40. bool expandAtomicBinOp(MachineBasicBlock &MBB,
  41. MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
  42. bool IsMasked, int Width,
  43. MachineBasicBlock::iterator &NextMBBI);
  44. bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
  45. MachineBasicBlock::iterator MBBI,
  46. AtomicRMWInst::BinOp, bool IsMasked, int Width,
  47. MachineBasicBlock::iterator &NextMBBI);
  48. bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
  49. MachineBasicBlock::iterator MBBI, bool IsMasked,
  50. int Width, MachineBasicBlock::iterator &NextMBBI);
  51. };
  52. char LoongArchExpandAtomicPseudo::ID = 0;
  53. bool LoongArchExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
  54. TII =
  55. static_cast<const LoongArchInstrInfo *>(MF.getSubtarget().getInstrInfo());
  56. bool Modified = false;
  57. for (auto &MBB : MF)
  58. Modified |= expandMBB(MBB);
  59. return Modified;
  60. }
  61. bool LoongArchExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
  62. bool Modified = false;
  63. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  64. while (MBBI != E) {
  65. MachineBasicBlock::iterator NMBBI = std::next(MBBI);
  66. Modified |= expandMI(MBB, MBBI, NMBBI);
  67. MBBI = NMBBI;
  68. }
  69. return Modified;
  70. }
  71. bool LoongArchExpandAtomicPseudo::expandMI(
  72. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  73. MachineBasicBlock::iterator &NextMBBI) {
  74. switch (MBBI->getOpcode()) {
  75. case LoongArch::PseudoMaskedAtomicSwap32:
  76. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
  77. NextMBBI);
  78. case LoongArch::PseudoAtomicSwap32:
  79. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32,
  80. NextMBBI);
  81. case LoongArch::PseudoMaskedAtomicLoadAdd32:
  82. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
  83. case LoongArch::PseudoMaskedAtomicLoadSub32:
  84. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
  85. case LoongArch::PseudoAtomicLoadNand32:
  86. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
  87. NextMBBI);
  88. case LoongArch::PseudoAtomicLoadNand64:
  89. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
  90. NextMBBI);
  91. case LoongArch::PseudoMaskedAtomicLoadNand32:
  92. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
  93. NextMBBI);
  94. case LoongArch::PseudoAtomicLoadAdd32:
  95. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32,
  96. NextMBBI);
  97. case LoongArch::PseudoAtomicLoadSub32:
  98. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32,
  99. NextMBBI);
  100. case LoongArch::PseudoAtomicLoadAnd32:
  101. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32,
  102. NextMBBI);
  103. case LoongArch::PseudoAtomicLoadOr32:
  104. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI);
  105. case LoongArch::PseudoAtomicLoadXor32:
  106. return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
  107. NextMBBI);
  108. case LoongArch::PseudoMaskedAtomicLoadUMax32:
  109. return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
  110. NextMBBI);
  111. case LoongArch::PseudoMaskedAtomicLoadUMin32:
  112. return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
  113. NextMBBI);
  114. case LoongArch::PseudoCmpXchg32:
  115. return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
  116. case LoongArch::PseudoCmpXchg64:
  117. return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
  118. case LoongArch::PseudoMaskedCmpXchg32:
  119. return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
  120. case LoongArch::PseudoMaskedAtomicLoadMax32:
  121. return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
  122. NextMBBI);
  123. case LoongArch::PseudoMaskedAtomicLoadMin32:
  124. return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
  125. NextMBBI);
  126. }
  127. return false;
  128. }
  129. static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII,
  130. MachineInstr &MI, DebugLoc DL,
  131. MachineBasicBlock *ThisMBB,
  132. MachineBasicBlock *LoopMBB,
  133. MachineBasicBlock *DoneMBB,
  134. AtomicRMWInst::BinOp BinOp, int Width) {
  135. Register DestReg = MI.getOperand(0).getReg();
  136. Register ScratchReg = MI.getOperand(1).getReg();
  137. Register AddrReg = MI.getOperand(2).getReg();
  138. Register IncrReg = MI.getOperand(3).getReg();
  139. AtomicOrdering Ordering =
  140. static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
  141. // .loop:
  142. // if(Ordering != AtomicOrdering::Monotonic)
  143. // dbar 0
  144. // ll.[w|d] dest, (addr)
  145. // binop scratch, dest, val
  146. // sc.[w|d] scratch, scratch, (addr)
  147. // beqz scratch, loop
  148. if (Ordering != AtomicOrdering::Monotonic)
  149. BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
  150. BuildMI(LoopMBB, DL,
  151. TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
  152. .addReg(AddrReg)
  153. .addImm(0);
  154. switch (BinOp) {
  155. default:
  156. llvm_unreachable("Unexpected AtomicRMW BinOp");
  157. case AtomicRMWInst::Xchg:
  158. BuildMI(LoopMBB, DL, TII->get(LoongArch::OR), ScratchReg)
  159. .addReg(IncrReg)
  160. .addReg(LoongArch::R0);
  161. break;
  162. case AtomicRMWInst::Nand:
  163. BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg)
  164. .addReg(DestReg)
  165. .addReg(IncrReg);
  166. BuildMI(LoopMBB, DL, TII->get(LoongArch::NOR), ScratchReg)
  167. .addReg(ScratchReg)
  168. .addReg(LoongArch::R0);
  169. break;
  170. case AtomicRMWInst::Add:
  171. BuildMI(LoopMBB, DL, TII->get(LoongArch::ADD_W), ScratchReg)
  172. .addReg(DestReg)
  173. .addReg(IncrReg);
  174. break;
  175. case AtomicRMWInst::Sub:
  176. BuildMI(LoopMBB, DL, TII->get(LoongArch::SUB_W), ScratchReg)
  177. .addReg(DestReg)
  178. .addReg(IncrReg);
  179. break;
  180. case AtomicRMWInst::And:
  181. BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg)
  182. .addReg(DestReg)
  183. .addReg(IncrReg);
  184. break;
  185. case AtomicRMWInst::Or:
  186. BuildMI(LoopMBB, DL, TII->get(LoongArch::OR), ScratchReg)
  187. .addReg(DestReg)
  188. .addReg(IncrReg);
  189. break;
  190. case AtomicRMWInst::Xor:
  191. BuildMI(LoopMBB, DL, TII->get(LoongArch::XOR), ScratchReg)
  192. .addReg(DestReg)
  193. .addReg(IncrReg);
  194. break;
  195. }
  196. BuildMI(LoopMBB, DL,
  197. TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), ScratchReg)
  198. .addReg(ScratchReg)
  199. .addReg(AddrReg)
  200. .addImm(0);
  201. BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQZ))
  202. .addReg(ScratchReg)
  203. .addMBB(LoopMBB);
  204. }
  205. static void insertMaskedMerge(const LoongArchInstrInfo *TII, DebugLoc DL,
  206. MachineBasicBlock *MBB, Register DestReg,
  207. Register OldValReg, Register NewValReg,
  208. Register MaskReg, Register ScratchReg) {
  209. assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
  210. assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
  211. assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
  212. // res = oldval ^ ((oldval ^ newval) & masktargetdata);
  213. BuildMI(MBB, DL, TII->get(LoongArch::XOR), ScratchReg)
  214. .addReg(OldValReg)
  215. .addReg(NewValReg);
  216. BuildMI(MBB, DL, TII->get(LoongArch::AND), ScratchReg)
  217. .addReg(ScratchReg)
  218. .addReg(MaskReg);
  219. BuildMI(MBB, DL, TII->get(LoongArch::XOR), DestReg)
  220. .addReg(OldValReg)
  221. .addReg(ScratchReg);
  222. }
  223. static void doMaskedAtomicBinOpExpansion(
  224. const LoongArchInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
  225. MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
  226. MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
  227. assert(Width == 32 && "Should never need to expand masked 64-bit operations");
  228. Register DestReg = MI.getOperand(0).getReg();
  229. Register ScratchReg = MI.getOperand(1).getReg();
  230. Register AddrReg = MI.getOperand(2).getReg();
  231. Register IncrReg = MI.getOperand(3).getReg();
  232. Register MaskReg = MI.getOperand(4).getReg();
  233. AtomicOrdering Ordering =
  234. static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
  235. // .loop:
  236. // if(Ordering != AtomicOrdering::Monotonic)
  237. // dbar 0
  238. // ll.w destreg, (alignedaddr)
  239. // binop scratch, destreg, incr
  240. // xor scratch, destreg, scratch
  241. // and scratch, scratch, masktargetdata
  242. // xor scratch, destreg, scratch
  243. // sc.w scratch, scratch, (alignedaddr)
  244. // beqz scratch, loop
  245. if (Ordering != AtomicOrdering::Monotonic)
  246. BuildMI(LoopMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
  247. BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg)
  248. .addReg(AddrReg)
  249. .addImm(0);
  250. switch (BinOp) {
  251. default:
  252. llvm_unreachable("Unexpected AtomicRMW BinOp");
  253. case AtomicRMWInst::Xchg:
  254. BuildMI(LoopMBB, DL, TII->get(LoongArch::ADDI_W), ScratchReg)
  255. .addReg(IncrReg)
  256. .addImm(0);
  257. break;
  258. case AtomicRMWInst::Add:
  259. BuildMI(LoopMBB, DL, TII->get(LoongArch::ADD_W), ScratchReg)
  260. .addReg(DestReg)
  261. .addReg(IncrReg);
  262. break;
  263. case AtomicRMWInst::Sub:
  264. BuildMI(LoopMBB, DL, TII->get(LoongArch::SUB_W), ScratchReg)
  265. .addReg(DestReg)
  266. .addReg(IncrReg);
  267. break;
  268. case AtomicRMWInst::Nand:
  269. BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg)
  270. .addReg(DestReg)
  271. .addReg(IncrReg);
  272. BuildMI(LoopMBB, DL, TII->get(LoongArch::NOR), ScratchReg)
  273. .addReg(ScratchReg)
  274. .addReg(LoongArch::R0);
  275. // TODO: support other AtomicRMWInst.
  276. }
  277. insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
  278. ScratchReg);
  279. BuildMI(LoopMBB, DL, TII->get(LoongArch::SC_W), ScratchReg)
  280. .addReg(ScratchReg)
  281. .addReg(AddrReg)
  282. .addImm(0);
  283. BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQZ))
  284. .addReg(ScratchReg)
  285. .addMBB(LoopMBB);
  286. }
  287. bool LoongArchExpandAtomicPseudo::expandAtomicBinOp(
  288. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  289. AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
  290. MachineBasicBlock::iterator &NextMBBI) {
  291. MachineInstr &MI = *MBBI;
  292. DebugLoc DL = MI.getDebugLoc();
  293. MachineFunction *MF = MBB.getParent();
  294. auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  295. auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  296. // Insert new MBBs.
  297. MF->insert(++MBB.getIterator(), LoopMBB);
  298. MF->insert(++LoopMBB->getIterator(), DoneMBB);
  299. // Set up successors and transfer remaining instructions to DoneMBB.
  300. LoopMBB->addSuccessor(LoopMBB);
  301. LoopMBB->addSuccessor(DoneMBB);
  302. DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
  303. DoneMBB->transferSuccessors(&MBB);
  304. MBB.addSuccessor(LoopMBB);
  305. if (IsMasked)
  306. doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
  307. Width);
  308. else
  309. doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
  310. NextMBBI = MBB.end();
  311. MI.eraseFromParent();
  312. LivePhysRegs LiveRegs;
  313. computeAndAddLiveIns(LiveRegs, *LoopMBB);
  314. computeAndAddLiveIns(LiveRegs, *DoneMBB);
  315. return true;
  316. }
  317. static void insertSext(const LoongArchInstrInfo *TII, DebugLoc DL,
  318. MachineBasicBlock *MBB, Register ValReg,
  319. Register ShamtReg) {
  320. BuildMI(MBB, DL, TII->get(LoongArch::SLL_W), ValReg)
  321. .addReg(ValReg)
  322. .addReg(ShamtReg);
  323. BuildMI(MBB, DL, TII->get(LoongArch::SRA_W), ValReg)
  324. .addReg(ValReg)
  325. .addReg(ShamtReg);
  326. }
  327. bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
  328. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  329. AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
  330. MachineBasicBlock::iterator &NextMBBI) {
  331. assert(IsMasked == true &&
  332. "Should only need to expand masked atomic max/min");
  333. assert(Width == 32 && "Should never need to expand masked 64-bit operations");
  334. MachineInstr &MI = *MBBI;
  335. DebugLoc DL = MI.getDebugLoc();
  336. MachineFunction *MF = MBB.getParent();
  337. auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  338. auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  339. auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  340. auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  341. auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  342. // Insert new MBBs.
  343. MF->insert(++MBB.getIterator(), LoopHeadMBB);
  344. MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
  345. MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
  346. MF->insert(++LoopTailMBB->getIterator(), TailMBB);
  347. MF->insert(++TailMBB->getIterator(), DoneMBB);
  348. // Set up successors and transfer remaining instructions to DoneMBB.
  349. LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
  350. LoopHeadMBB->addSuccessor(LoopTailMBB);
  351. LoopIfBodyMBB->addSuccessor(LoopTailMBB);
  352. LoopTailMBB->addSuccessor(LoopHeadMBB);
  353. LoopTailMBB->addSuccessor(TailMBB);
  354. TailMBB->addSuccessor(DoneMBB);
  355. DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
  356. DoneMBB->transferSuccessors(&MBB);
  357. MBB.addSuccessor(LoopHeadMBB);
  358. Register DestReg = MI.getOperand(0).getReg();
  359. Register Scratch1Reg = MI.getOperand(1).getReg();
  360. Register Scratch2Reg = MI.getOperand(2).getReg();
  361. Register AddrReg = MI.getOperand(3).getReg();
  362. Register IncrReg = MI.getOperand(4).getReg();
  363. Register MaskReg = MI.getOperand(5).getReg();
  364. //
  365. // .loophead:
  366. // dbar 0
  367. // ll.w destreg, (alignedaddr)
  368. // and scratch2, destreg, mask
  369. // move scratch1, destreg
  370. BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
  371. BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
  372. .addReg(AddrReg)
  373. .addImm(0);
  374. BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), Scratch2Reg)
  375. .addReg(DestReg)
  376. .addReg(MaskReg);
  377. BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), Scratch1Reg)
  378. .addReg(DestReg)
  379. .addReg(LoongArch::R0);
  380. switch (BinOp) {
  381. default:
  382. llvm_unreachable("Unexpected AtomicRMW BinOp");
  383. // bgeu scratch2, incr, .looptail
  384. case AtomicRMWInst::UMax:
  385. BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
  386. .addReg(Scratch2Reg)
  387. .addReg(IncrReg)
  388. .addMBB(LoopTailMBB);
  389. break;
  390. // bgeu incr, scratch2, .looptail
  391. case AtomicRMWInst::UMin:
  392. BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
  393. .addReg(IncrReg)
  394. .addReg(Scratch2Reg)
  395. .addMBB(LoopTailMBB);
  396. break;
  397. case AtomicRMWInst::Max:
  398. insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
  399. // bge scratch2, incr, .looptail
  400. BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
  401. .addReg(Scratch2Reg)
  402. .addReg(IncrReg)
  403. .addMBB(LoopTailMBB);
  404. break;
  405. case AtomicRMWInst::Min:
  406. insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
  407. // bge incr, scratch2, .looptail
  408. BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
  409. .addReg(IncrReg)
  410. .addReg(Scratch2Reg)
  411. .addMBB(LoopTailMBB);
  412. break;
  413. // TODO: support other AtomicRMWInst.
  414. }
  415. // .loopifbody:
  416. // xor scratch1, destreg, incr
  417. // and scratch1, scratch1, mask
  418. // xor scratch1, destreg, scratch1
  419. insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
  420. MaskReg, Scratch1Reg);
  421. // .looptail:
  422. // sc.w scratch1, scratch1, (addr)
  423. // beqz scratch1, loop
  424. // dbar 0x700
  425. BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
  426. .addReg(Scratch1Reg)
  427. .addReg(AddrReg)
  428. .addImm(0);
  429. BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
  430. .addReg(Scratch1Reg)
  431. .addMBB(LoopHeadMBB);
  432. // .tail:
  433. // dbar 0x700
  434. BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
  435. NextMBBI = MBB.end();
  436. MI.eraseFromParent();
  437. LivePhysRegs LiveRegs;
  438. computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
  439. computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
  440. computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
  441. computeAndAddLiveIns(LiveRegs, *TailMBB);
  442. computeAndAddLiveIns(LiveRegs, *DoneMBB);
  443. return true;
  444. }
  445. bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
  446. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
  447. int Width, MachineBasicBlock::iterator &NextMBBI) {
  448. MachineInstr &MI = *MBBI;
  449. DebugLoc DL = MI.getDebugLoc();
  450. MachineFunction *MF = MBB.getParent();
  451. auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  452. auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  453. auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  454. auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  455. // Insert new MBBs.
  456. MF->insert(++MBB.getIterator(), LoopHeadMBB);
  457. MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
  458. MF->insert(++LoopTailMBB->getIterator(), TailMBB);
  459. MF->insert(++TailMBB->getIterator(), DoneMBB);
  460. // Set up successors and transfer remaining instructions to DoneMBB.
  461. LoopHeadMBB->addSuccessor(LoopTailMBB);
  462. LoopHeadMBB->addSuccessor(TailMBB);
  463. LoopTailMBB->addSuccessor(DoneMBB);
  464. LoopTailMBB->addSuccessor(LoopHeadMBB);
  465. TailMBB->addSuccessor(DoneMBB);
  466. DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
  467. DoneMBB->transferSuccessors(&MBB);
  468. MBB.addSuccessor(LoopHeadMBB);
  469. Register DestReg = MI.getOperand(0).getReg();
  470. Register ScratchReg = MI.getOperand(1).getReg();
  471. Register AddrReg = MI.getOperand(2).getReg();
  472. Register CmpValReg = MI.getOperand(3).getReg();
  473. Register NewValReg = MI.getOperand(4).getReg();
  474. if (!IsMasked) {
  475. // .loophead:
  476. // ll.[w|d] dest, (addr)
  477. // bne dest, cmpval, tail
  478. BuildMI(LoopHeadMBB, DL,
  479. TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
  480. .addReg(AddrReg)
  481. .addImm(0);
  482. BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE))
  483. .addReg(DestReg)
  484. .addReg(CmpValReg)
  485. .addMBB(TailMBB);
  486. // .looptail:
  487. // dbar 0
  488. // move scratch, newval
  489. // sc.[w|d] scratch, scratch, (addr)
  490. // beqz scratch, loophead
  491. // b done
  492. BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
  493. BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
  494. .addReg(NewValReg)
  495. .addReg(LoongArch::R0);
  496. BuildMI(LoopTailMBB, DL,
  497. TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D),
  498. ScratchReg)
  499. .addReg(ScratchReg)
  500. .addReg(AddrReg)
  501. .addImm(0);
  502. BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
  503. .addReg(ScratchReg)
  504. .addMBB(LoopHeadMBB);
  505. BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
  506. } else {
  507. // .loophead:
  508. // ll.[w|d] dest, (addr)
  509. // and scratch, dest, mask
  510. // bne scratch, cmpval, tail
  511. Register MaskReg = MI.getOperand(5).getReg();
  512. BuildMI(LoopHeadMBB, DL,
  513. TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
  514. .addReg(AddrReg)
  515. .addImm(0);
  516. BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), ScratchReg)
  517. .addReg(DestReg)
  518. .addReg(MaskReg);
  519. BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE))
  520. .addReg(ScratchReg)
  521. .addReg(CmpValReg)
  522. .addMBB(TailMBB);
  523. // .looptail:
  524. // dbar 0
  525. // andn scratch, dest, mask
  526. // or scratch, scratch, newval
  527. // sc.[w|d] scratch, scratch, (addr)
  528. // beqz scratch, loophead
  529. // b done
  530. BuildMI(LoopTailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0);
  531. BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg)
  532. .addReg(DestReg)
  533. .addReg(MaskReg);
  534. BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
  535. .addReg(ScratchReg)
  536. .addReg(NewValReg);
  537. BuildMI(LoopTailMBB, DL,
  538. TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D),
  539. ScratchReg)
  540. .addReg(ScratchReg)
  541. .addReg(AddrReg)
  542. .addImm(0);
  543. BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
  544. .addReg(ScratchReg)
  545. .addMBB(LoopHeadMBB);
  546. BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
  547. }
  548. // .tail:
  549. // dbar 0x700
  550. BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(0x700);
  551. NextMBBI = MBB.end();
  552. MI.eraseFromParent();
  553. LivePhysRegs LiveRegs;
  554. computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
  555. computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
  556. computeAndAddLiveIns(LiveRegs, *TailMBB);
  557. computeAndAddLiveIns(LiveRegs, *DoneMBB);
  558. return true;
  559. }
  560. } // end namespace
  561. INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo",
  562. LoongArch_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
  563. namespace llvm {
  564. FunctionPass *createLoongArchExpandAtomicPseudoPass() {
  565. return new LoongArchExpandAtomicPseudo();
  566. }
  567. } // end namespace llvm