AtomicExpandPass.cpp 75 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911
  1. //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass (at IR level) to replace atomic instructions with
  10. // __atomic_* library calls, or target specific instruction which implement the
  11. // same semantics in a way which better fits the target backend. This can
  12. // include the use of (intrinsic-based) load-linked/store-conditional loops,
  13. // AtomicCmpXchg, or type coercions.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #include "llvm/ADT/ArrayRef.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/ADT/SmallVector.h"
  19. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  20. #include "llvm/CodeGen/AtomicExpandUtils.h"
  21. #include "llvm/CodeGen/RuntimeLibcalls.h"
  22. #include "llvm/CodeGen/TargetLowering.h"
  23. #include "llvm/CodeGen/TargetPassConfig.h"
  24. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  25. #include "llvm/CodeGen/ValueTypes.h"
  26. #include "llvm/IR/Attributes.h"
  27. #include "llvm/IR/BasicBlock.h"
  28. #include "llvm/IR/Constant.h"
  29. #include "llvm/IR/Constants.h"
  30. #include "llvm/IR/DataLayout.h"
  31. #include "llvm/IR/DerivedTypes.h"
  32. #include "llvm/IR/Function.h"
  33. #include "llvm/IR/IRBuilder.h"
  34. #include "llvm/IR/InstIterator.h"
  35. #include "llvm/IR/Instruction.h"
  36. #include "llvm/IR/Instructions.h"
  37. #include "llvm/IR/Module.h"
  38. #include "llvm/IR/Type.h"
  39. #include "llvm/IR/User.h"
  40. #include "llvm/IR/Value.h"
  41. #include "llvm/InitializePasses.h"
  42. #include "llvm/Pass.h"
  43. #include "llvm/Support/AtomicOrdering.h"
  44. #include "llvm/Support/Casting.h"
  45. #include "llvm/Support/Debug.h"
  46. #include "llvm/Support/ErrorHandling.h"
  47. #include "llvm/Support/raw_ostream.h"
  48. #include "llvm/Target/TargetMachine.h"
  49. #include <cassert>
  50. #include <cstdint>
  51. #include <iterator>
  52. using namespace llvm;
  53. #define DEBUG_TYPE "atomic-expand"
  54. namespace {
  55. class AtomicExpand: public FunctionPass {
  56. const TargetLowering *TLI = nullptr;
  57. public:
  58. static char ID; // Pass identification, replacement for typeid
  59. AtomicExpand() : FunctionPass(ID) {
  60. initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
  61. }
  62. bool runOnFunction(Function &F) override;
  63. private:
  64. bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
  65. IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
  66. LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
  67. bool tryExpandAtomicLoad(LoadInst *LI);
  68. bool expandAtomicLoadToLL(LoadInst *LI);
  69. bool expandAtomicLoadToCmpXchg(LoadInst *LI);
  70. StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
  71. bool expandAtomicStore(StoreInst *SI);
  72. bool tryExpandAtomicRMW(AtomicRMWInst *AI);
  73. AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
  74. Value *
  75. insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
  76. Align AddrAlign, AtomicOrdering MemOpOrder,
  77. function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
  78. void expandAtomicOpToLLSC(
  79. Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
  80. AtomicOrdering MemOpOrder,
  81. function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
  82. void expandPartwordAtomicRMW(
  83. AtomicRMWInst *I,
  84. TargetLoweringBase::AtomicExpansionKind ExpansionKind);
  85. AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
  86. bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
  87. void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
  88. void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
  89. AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
  90. static Value *insertRMWCmpXchgLoop(
  91. IRBuilder<> &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
  92. AtomicOrdering MemOpOrder, SyncScope::ID SSID,
  93. function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
  94. CreateCmpXchgInstFun CreateCmpXchg);
  95. bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
  96. bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
  97. bool isIdempotentRMW(AtomicRMWInst *RMWI);
  98. bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
  99. bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
  100. Value *PointerOperand, Value *ValueOperand,
  101. Value *CASExpected, AtomicOrdering Ordering,
  102. AtomicOrdering Ordering2,
  103. ArrayRef<RTLIB::Libcall> Libcalls);
  104. void expandAtomicLoadToLibcall(LoadInst *LI);
  105. void expandAtomicStoreToLibcall(StoreInst *LI);
  106. void expandAtomicRMWToLibcall(AtomicRMWInst *I);
  107. void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
  108. friend bool
  109. llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
  110. CreateCmpXchgInstFun CreateCmpXchg);
  111. };
  112. } // end anonymous namespace
  113. char AtomicExpand::ID = 0;
  114. char &llvm::AtomicExpandID = AtomicExpand::ID;
  115. INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
  116. false, false)
  117. FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
  118. // Helper functions to retrieve the size of atomic instructions.
  119. static unsigned getAtomicOpSize(LoadInst *LI) {
  120. const DataLayout &DL = LI->getModule()->getDataLayout();
  121. return DL.getTypeStoreSize(LI->getType());
  122. }
  123. static unsigned getAtomicOpSize(StoreInst *SI) {
  124. const DataLayout &DL = SI->getModule()->getDataLayout();
  125. return DL.getTypeStoreSize(SI->getValueOperand()->getType());
  126. }
  127. static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
  128. const DataLayout &DL = RMWI->getModule()->getDataLayout();
  129. return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
  130. }
  131. static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
  132. const DataLayout &DL = CASI->getModule()->getDataLayout();
  133. return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
  134. }
  135. // Determine if a particular atomic operation has a supported size,
  136. // and is of appropriate alignment, to be passed through for target
  137. // lowering. (Versus turning into a __atomic libcall)
  138. template <typename Inst>
  139. static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
  140. unsigned Size = getAtomicOpSize(I);
  141. Align Alignment = I->getAlign();
  142. return Alignment >= Size &&
  143. Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
  144. }
  145. bool AtomicExpand::runOnFunction(Function &F) {
  146. auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
  147. if (!TPC)
  148. return false;
  149. auto &TM = TPC->getTM<TargetMachine>();
  150. if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
  151. return false;
  152. TLI = TM.getSubtargetImpl(F)->getTargetLowering();
  153. SmallVector<Instruction *, 1> AtomicInsts;
  154. // Changing control-flow while iterating through it is a bad idea, so gather a
  155. // list of all atomic instructions before we start.
  156. for (Instruction &I : instructions(F))
  157. if (I.isAtomic() && !isa<FenceInst>(&I))
  158. AtomicInsts.push_back(&I);
  159. bool MadeChange = false;
  160. for (auto I : AtomicInsts) {
  161. auto LI = dyn_cast<LoadInst>(I);
  162. auto SI = dyn_cast<StoreInst>(I);
  163. auto RMWI = dyn_cast<AtomicRMWInst>(I);
  164. auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
  165. assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
  166. // If the Size/Alignment is not supported, replace with a libcall.
  167. if (LI) {
  168. if (!atomicSizeSupported(TLI, LI)) {
  169. expandAtomicLoadToLibcall(LI);
  170. MadeChange = true;
  171. continue;
  172. }
  173. } else if (SI) {
  174. if (!atomicSizeSupported(TLI, SI)) {
  175. expandAtomicStoreToLibcall(SI);
  176. MadeChange = true;
  177. continue;
  178. }
  179. } else if (RMWI) {
  180. if (!atomicSizeSupported(TLI, RMWI)) {
  181. expandAtomicRMWToLibcall(RMWI);
  182. MadeChange = true;
  183. continue;
  184. }
  185. } else if (CASI) {
  186. if (!atomicSizeSupported(TLI, CASI)) {
  187. expandAtomicCASToLibcall(CASI);
  188. MadeChange = true;
  189. continue;
  190. }
  191. }
  192. if (TLI->shouldInsertFencesForAtomic(I)) {
  193. auto FenceOrdering = AtomicOrdering::Monotonic;
  194. if (LI && isAcquireOrStronger(LI->getOrdering())) {
  195. FenceOrdering = LI->getOrdering();
  196. LI->setOrdering(AtomicOrdering::Monotonic);
  197. } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
  198. FenceOrdering = SI->getOrdering();
  199. SI->setOrdering(AtomicOrdering::Monotonic);
  200. } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
  201. isAcquireOrStronger(RMWI->getOrdering()))) {
  202. FenceOrdering = RMWI->getOrdering();
  203. RMWI->setOrdering(AtomicOrdering::Monotonic);
  204. } else if (CASI &&
  205. TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
  206. TargetLoweringBase::AtomicExpansionKind::None &&
  207. (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
  208. isAcquireOrStronger(CASI->getSuccessOrdering()) ||
  209. isAcquireOrStronger(CASI->getFailureOrdering()))) {
  210. // If a compare and swap is lowered to LL/SC, we can do smarter fence
  211. // insertion, with a stronger one on the success path than on the
  212. // failure path. As a result, fence insertion is directly done by
  213. // expandAtomicCmpXchg in that case.
  214. FenceOrdering = CASI->getMergedOrdering();
  215. CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
  216. CASI->setFailureOrdering(AtomicOrdering::Monotonic);
  217. }
  218. if (FenceOrdering != AtomicOrdering::Monotonic) {
  219. MadeChange |= bracketInstWithFences(I, FenceOrdering);
  220. }
  221. }
  222. if (LI) {
  223. if (LI->getType()->isFloatingPointTy()) {
  224. // TODO: add a TLI hook to control this so that each target can
  225. // convert to lowering the original type one at a time.
  226. LI = convertAtomicLoadToIntegerType(LI);
  227. assert(LI->getType()->isIntegerTy() && "invariant broken");
  228. MadeChange = true;
  229. }
  230. MadeChange |= tryExpandAtomicLoad(LI);
  231. } else if (SI) {
  232. if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
  233. // TODO: add a TLI hook to control this so that each target can
  234. // convert to lowering the original type one at a time.
  235. SI = convertAtomicStoreToIntegerType(SI);
  236. assert(SI->getValueOperand()->getType()->isIntegerTy() &&
  237. "invariant broken");
  238. MadeChange = true;
  239. }
  240. if (TLI->shouldExpandAtomicStoreInIR(SI))
  241. MadeChange |= expandAtomicStore(SI);
  242. } else if (RMWI) {
  243. // There are two different ways of expanding RMW instructions:
  244. // - into a load if it is idempotent
  245. // - into a Cmpxchg/LL-SC loop otherwise
  246. // we try them in that order.
  247. if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
  248. MadeChange = true;
  249. } else {
  250. AtomicRMWInst::BinOp Op = RMWI->getOperation();
  251. if (Op == AtomicRMWInst::Xchg &&
  252. RMWI->getValOperand()->getType()->isFloatingPointTy()) {
  253. // TODO: add a TLI hook to control this so that each target can
  254. // convert to lowering the original type one at a time.
  255. RMWI = convertAtomicXchgToIntegerType(RMWI);
  256. assert(RMWI->getValOperand()->getType()->isIntegerTy() &&
  257. "invariant broken");
  258. MadeChange = true;
  259. }
  260. unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
  261. unsigned ValueSize = getAtomicOpSize(RMWI);
  262. if (ValueSize < MinCASSize &&
  263. (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
  264. Op == AtomicRMWInst::And)) {
  265. RMWI = widenPartwordAtomicRMW(RMWI);
  266. MadeChange = true;
  267. }
  268. MadeChange |= tryExpandAtomicRMW(RMWI);
  269. }
  270. } else if (CASI) {
  271. // TODO: when we're ready to make the change at the IR level, we can
  272. // extend convertCmpXchgToInteger for floating point too.
  273. assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
  274. "unimplemented - floating point not legal at IR level");
  275. if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
  276. // TODO: add a TLI hook to control this so that each target can
  277. // convert to lowering the original type one at a time.
  278. CASI = convertCmpXchgToIntegerType(CASI);
  279. assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
  280. "invariant broken");
  281. MadeChange = true;
  282. }
  283. MadeChange |= tryExpandAtomicCmpXchg(CASI);
  284. }
  285. }
  286. return MadeChange;
  287. }
  288. bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
  289. IRBuilder<> Builder(I);
  290. auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
  291. auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
  292. // We have a guard here because not every atomic operation generates a
  293. // trailing fence.
  294. if (TrailingFence)
  295. TrailingFence->moveAfter(I);
  296. return (LeadingFence || TrailingFence);
  297. }
  298. /// Get the iX type with the same bitwidth as T.
  299. IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
  300. const DataLayout &DL) {
  301. EVT VT = TLI->getMemValueType(DL, T);
  302. unsigned BitWidth = VT.getStoreSizeInBits();
  303. assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
  304. return IntegerType::get(T->getContext(), BitWidth);
  305. }
  306. /// Convert an atomic load of a non-integral type to an integer load of the
  307. /// equivalent bitwidth. See the function comment on
  308. /// convertAtomicStoreToIntegerType for background.
  309. LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
  310. auto *M = LI->getModule();
  311. Type *NewTy = getCorrespondingIntegerType(LI->getType(),
  312. M->getDataLayout());
  313. IRBuilder<> Builder(LI);
  314. Value *Addr = LI->getPointerOperand();
  315. Type *PT = PointerType::get(NewTy,
  316. Addr->getType()->getPointerAddressSpace());
  317. Value *NewAddr = Builder.CreateBitCast(Addr, PT);
  318. auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
  319. NewLI->setAlignment(LI->getAlign());
  320. NewLI->setVolatile(LI->isVolatile());
  321. NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
  322. LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
  323. Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
  324. LI->replaceAllUsesWith(NewVal);
  325. LI->eraseFromParent();
  326. return NewLI;
  327. }
  328. AtomicRMWInst *
  329. AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
  330. auto *M = RMWI->getModule();
  331. Type *NewTy =
  332. getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
  333. IRBuilder<> Builder(RMWI);
  334. Value *Addr = RMWI->getPointerOperand();
  335. Value *Val = RMWI->getValOperand();
  336. Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
  337. Value *NewAddr = Builder.CreateBitCast(Addr, PT);
  338. Value *NewVal = Builder.CreateBitCast(Val, NewTy);
  339. auto *NewRMWI =
  340. Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
  341. RMWI->getAlign(), RMWI->getOrdering());
  342. NewRMWI->setVolatile(RMWI->isVolatile());
  343. LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
  344. Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType());
  345. RMWI->replaceAllUsesWith(NewRVal);
  346. RMWI->eraseFromParent();
  347. return NewRMWI;
  348. }
  349. bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
  350. switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
  351. case TargetLoweringBase::AtomicExpansionKind::None:
  352. return false;
  353. case TargetLoweringBase::AtomicExpansionKind::LLSC:
  354. expandAtomicOpToLLSC(
  355. LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
  356. LI->getOrdering(),
  357. [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
  358. return true;
  359. case TargetLoweringBase::AtomicExpansionKind::LLOnly:
  360. return expandAtomicLoadToLL(LI);
  361. case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
  362. return expandAtomicLoadToCmpXchg(LI);
  363. default:
  364. llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
  365. }
  366. }
  367. bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
  368. IRBuilder<> Builder(LI);
  369. // On some architectures, load-linked instructions are atomic for larger
  370. // sizes than normal loads. For example, the only 64-bit load guaranteed
  371. // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
  372. Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
  373. LI->getPointerOperand(), LI->getOrdering());
  374. TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
  375. LI->replaceAllUsesWith(Val);
  376. LI->eraseFromParent();
  377. return true;
  378. }
  379. bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
  380. IRBuilder<> Builder(LI);
  381. AtomicOrdering Order = LI->getOrdering();
  382. if (Order == AtomicOrdering::Unordered)
  383. Order = AtomicOrdering::Monotonic;
  384. Value *Addr = LI->getPointerOperand();
  385. Type *Ty = LI->getType();
  386. Constant *DummyVal = Constant::getNullValue(Ty);
  387. Value *Pair = Builder.CreateAtomicCmpXchg(
  388. Addr, DummyVal, DummyVal, LI->getAlign(), Order,
  389. AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
  390. Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
  391. LI->replaceAllUsesWith(Loaded);
  392. LI->eraseFromParent();
  393. return true;
  394. }
  395. /// Convert an atomic store of a non-integral type to an integer store of the
  396. /// equivalent bitwidth. We used to not support floating point or vector
  397. /// atomics in the IR at all. The backends learned to deal with the bitcast
  398. /// idiom because that was the only way of expressing the notion of a atomic
  399. /// float or vector store. The long term plan is to teach each backend to
  400. /// instruction select from the original atomic store, but as a migration
  401. /// mechanism, we convert back to the old format which the backends understand.
  402. /// Each backend will need individual work to recognize the new format.
  403. StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
  404. IRBuilder<> Builder(SI);
  405. auto *M = SI->getModule();
  406. Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
  407. M->getDataLayout());
  408. Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
  409. Value *Addr = SI->getPointerOperand();
  410. Type *PT = PointerType::get(NewTy,
  411. Addr->getType()->getPointerAddressSpace());
  412. Value *NewAddr = Builder.CreateBitCast(Addr, PT);
  413. StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
  414. NewSI->setAlignment(SI->getAlign());
  415. NewSI->setVolatile(SI->isVolatile());
  416. NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
  417. LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
  418. SI->eraseFromParent();
  419. return NewSI;
  420. }
  421. bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
  422. // This function is only called on atomic stores that are too large to be
  423. // atomic if implemented as a native store. So we replace them by an
  424. // atomic swap, that can be implemented for example as a ldrex/strex on ARM
  425. // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
  426. // It is the responsibility of the target to only signal expansion via
  427. // shouldExpandAtomicRMW in cases where this is required and possible.
  428. IRBuilder<> Builder(SI);
  429. AtomicRMWInst *AI = Builder.CreateAtomicRMW(
  430. AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
  431. SI->getAlign(), SI->getOrdering());
  432. SI->eraseFromParent();
  433. // Now we have an appropriate swap instruction, lower it as usual.
  434. return tryExpandAtomicRMW(AI);
  435. }
  436. static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
  437. Value *Loaded, Value *NewVal, Align AddrAlign,
  438. AtomicOrdering MemOpOrder, SyncScope::ID SSID,
  439. Value *&Success, Value *&NewLoaded) {
  440. Type *OrigTy = NewVal->getType();
  441. // This code can go away when cmpxchg supports FP types.
  442. bool NeedBitcast = OrigTy->isFloatingPointTy();
  443. if (NeedBitcast) {
  444. IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
  445. unsigned AS = Addr->getType()->getPointerAddressSpace();
  446. Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
  447. NewVal = Builder.CreateBitCast(NewVal, IntTy);
  448. Loaded = Builder.CreateBitCast(Loaded, IntTy);
  449. }
  450. Value *Pair = Builder.CreateAtomicCmpXchg(
  451. Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
  452. AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);
  453. Success = Builder.CreateExtractValue(Pair, 1, "success");
  454. NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
  455. if (NeedBitcast)
  456. NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
  457. }
  458. /// Emit IR to implement the given atomicrmw operation on values in registers,
  459. /// returning the new value.
  460. static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
  461. Value *Loaded, Value *Inc) {
  462. Value *NewVal;
  463. switch (Op) {
  464. case AtomicRMWInst::Xchg:
  465. return Inc;
  466. case AtomicRMWInst::Add:
  467. return Builder.CreateAdd(Loaded, Inc, "new");
  468. case AtomicRMWInst::Sub:
  469. return Builder.CreateSub(Loaded, Inc, "new");
  470. case AtomicRMWInst::And:
  471. return Builder.CreateAnd(Loaded, Inc, "new");
  472. case AtomicRMWInst::Nand:
  473. return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
  474. case AtomicRMWInst::Or:
  475. return Builder.CreateOr(Loaded, Inc, "new");
  476. case AtomicRMWInst::Xor:
  477. return Builder.CreateXor(Loaded, Inc, "new");
  478. case AtomicRMWInst::Max:
  479. NewVal = Builder.CreateICmpSGT(Loaded, Inc);
  480. return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
  481. case AtomicRMWInst::Min:
  482. NewVal = Builder.CreateICmpSLE(Loaded, Inc);
  483. return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
  484. case AtomicRMWInst::UMax:
  485. NewVal = Builder.CreateICmpUGT(Loaded, Inc);
  486. return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
  487. case AtomicRMWInst::UMin:
  488. NewVal = Builder.CreateICmpULE(Loaded, Inc);
  489. return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
  490. case AtomicRMWInst::FAdd:
  491. return Builder.CreateFAdd(Loaded, Inc, "new");
  492. case AtomicRMWInst::FSub:
  493. return Builder.CreateFSub(Loaded, Inc, "new");
  494. default:
  495. llvm_unreachable("Unknown atomic op");
  496. }
  497. }
  498. bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
  499. LLVMContext &Ctx = AI->getModule()->getContext();
  500. TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
  501. switch (Kind) {
  502. case TargetLoweringBase::AtomicExpansionKind::None:
  503. return false;
  504. case TargetLoweringBase::AtomicExpansionKind::LLSC: {
  505. unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
  506. unsigned ValueSize = getAtomicOpSize(AI);
  507. if (ValueSize < MinCASSize) {
  508. expandPartwordAtomicRMW(AI,
  509. TargetLoweringBase::AtomicExpansionKind::LLSC);
  510. } else {
  511. auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
  512. return performAtomicOp(AI->getOperation(), Builder, Loaded,
  513. AI->getValOperand());
  514. };
  515. expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
  516. AI->getAlign(), AI->getOrdering(), PerformOp);
  517. }
  518. return true;
  519. }
  520. case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
  521. unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
  522. unsigned ValueSize = getAtomicOpSize(AI);
  523. if (ValueSize < MinCASSize) {
  524. // TODO: Handle atomicrmw fadd/fsub
  525. if (AI->getType()->isFloatingPointTy())
  526. return false;
  527. expandPartwordAtomicRMW(AI,
  528. TargetLoweringBase::AtomicExpansionKind::CmpXChg);
  529. } else {
  530. SmallVector<StringRef> SSNs;
  531. Ctx.getSyncScopeNames(SSNs);
  532. auto MemScope = SSNs[AI->getSyncScopeID()].empty()
  533. ? "system"
  534. : SSNs[AI->getSyncScopeID()];
  535. OptimizationRemarkEmitter ORE(AI->getFunction());
  536. ORE.emit([&]() {
  537. return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
  538. << "A compare and swap loop was generated for an atomic "
  539. << AI->getOperationName(AI->getOperation()) << " operation at "
  540. << MemScope << " memory scope";
  541. });
  542. expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
  543. }
  544. return true;
  545. }
  546. case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
  547. expandAtomicRMWToMaskedIntrinsic(AI);
  548. return true;
  549. }
  550. default:
  551. llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
  552. }
  553. }
  554. namespace {
  555. struct PartwordMaskValues {
  556. // These three fields are guaranteed to be set by createMaskInstrs.
  557. Type *WordType = nullptr;
  558. Type *ValueType = nullptr;
  559. Value *AlignedAddr = nullptr;
  560. Align AlignedAddrAlignment;
  561. // The remaining fields can be null.
  562. Value *ShiftAmt = nullptr;
  563. Value *Mask = nullptr;
  564. Value *Inv_Mask = nullptr;
  565. };
  566. LLVM_ATTRIBUTE_UNUSED
  567. raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
  568. auto PrintObj = [&O](auto *V) {
  569. if (V)
  570. O << *V;
  571. else
  572. O << "nullptr";
  573. O << '\n';
  574. };
  575. O << "PartwordMaskValues {\n";
  576. O << " WordType: ";
  577. PrintObj(PMV.WordType);
  578. O << " ValueType: ";
  579. PrintObj(PMV.ValueType);
  580. O << " AlignedAddr: ";
  581. PrintObj(PMV.AlignedAddr);
  582. O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
  583. O << " ShiftAmt: ";
  584. PrintObj(PMV.ShiftAmt);
  585. O << " Mask: ";
  586. PrintObj(PMV.Mask);
  587. O << " Inv_Mask: ";
  588. PrintObj(PMV.Inv_Mask);
  589. O << "}\n";
  590. return O;
  591. }
  592. } // end anonymous namespace
  593. /// This is a helper function which builds instructions to provide
  594. /// values necessary for partword atomic operations. It takes an
  595. /// incoming address, Addr, and ValueType, and constructs the address,
  596. /// shift-amounts and masks needed to work with a larger value of size
  597. /// WordSize.
  598. ///
  599. /// AlignedAddr: Addr rounded down to a multiple of WordSize
  600. ///
  601. /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
  602. /// from AlignAddr for it to have the same value as if
  603. /// ValueType was loaded from Addr.
  604. ///
  605. /// Mask: Value to mask with the value loaded from AlignAddr to
  606. /// include only the part that would've been loaded from Addr.
  607. ///
  608. /// Inv_Mask: The inverse of Mask.
  609. static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
  610. Type *ValueType, Value *Addr,
  611. Align AddrAlign,
  612. unsigned MinWordSize) {
  613. PartwordMaskValues PMV;
  614. Module *M = I->getModule();
  615. LLVMContext &Ctx = M->getContext();
  616. const DataLayout &DL = M->getDataLayout();
  617. unsigned ValueSize = DL.getTypeStoreSize(ValueType);
  618. PMV.ValueType = ValueType;
  619. PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
  620. : ValueType;
  621. if (PMV.ValueType == PMV.WordType) {
  622. PMV.AlignedAddr = Addr;
  623. PMV.AlignedAddrAlignment = AddrAlign;
  624. PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
  625. PMV.Mask = ConstantInt::get(PMV.ValueType, ~0);
  626. return PMV;
  627. }
  628. assert(ValueSize < MinWordSize);
  629. Type *WordPtrType =
  630. PMV.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
  631. // TODO: we could skip some of this if AddrAlign >= MinWordSize.
  632. Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
  633. PMV.AlignedAddr = Builder.CreateIntToPtr(
  634. Builder.CreateAnd(AddrInt, ~(uint64_t)(MinWordSize - 1)), WordPtrType,
  635. "AlignedAddr");
  636. PMV.AlignedAddrAlignment = Align(MinWordSize);
  637. Value *PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
  638. if (DL.isLittleEndian()) {
  639. // turn bytes into bits
  640. PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
  641. } else {
  642. // turn bytes into bits, and count from the other side.
  643. PMV.ShiftAmt = Builder.CreateShl(
  644. Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
  645. }
  646. PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
  647. PMV.Mask = Builder.CreateShl(
  648. ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
  649. "Mask");
  650. PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
  651. return PMV;
  652. }
  653. static Value *extractMaskedValue(IRBuilder<> &Builder, Value *WideWord,
  654. const PartwordMaskValues &PMV) {
  655. assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
  656. if (PMV.WordType == PMV.ValueType)
  657. return WideWord;
  658. Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
  659. Value *Trunc = Builder.CreateTrunc(Shift, PMV.ValueType, "extracted");
  660. return Trunc;
  661. }
  662. static Value *insertMaskedValue(IRBuilder<> &Builder, Value *WideWord,
  663. Value *Updated, const PartwordMaskValues &PMV) {
  664. assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
  665. assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
  666. if (PMV.WordType == PMV.ValueType)
  667. return Updated;
  668. Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
  669. Value *Shift =
  670. Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
  671. Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
  672. Value *Or = Builder.CreateOr(And, Shift, "inserted");
  673. return Or;
  674. }
  675. /// Emit IR to implement a masked version of a given atomicrmw
  676. /// operation. (That is, only the bits under the Mask should be
  677. /// affected by the operation)
  678. static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
  679. IRBuilder<> &Builder, Value *Loaded,
  680. Value *Shifted_Inc, Value *Inc,
  681. const PartwordMaskValues &PMV) {
  682. // TODO: update to use
  683. // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
  684. // to merge bits from two values without requiring PMV.Inv_Mask.
  685. switch (Op) {
  686. case AtomicRMWInst::Xchg: {
  687. Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
  688. Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
  689. return FinalVal;
  690. }
  691. case AtomicRMWInst::Or:
  692. case AtomicRMWInst::Xor:
  693. case AtomicRMWInst::And:
  694. llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
  695. case AtomicRMWInst::Add:
  696. case AtomicRMWInst::Sub:
  697. case AtomicRMWInst::Nand: {
  698. // The other arithmetic ops need to be masked into place.
  699. Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
  700. Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
  701. Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
  702. Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
  703. return FinalVal;
  704. }
  705. case AtomicRMWInst::Max:
  706. case AtomicRMWInst::Min:
  707. case AtomicRMWInst::UMax:
  708. case AtomicRMWInst::UMin: {
  709. // Finally, comparison ops will operate on the full value, so
  710. // truncate down to the original size, and expand out again after
  711. // doing the operation.
  712. Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
  713. Value *NewVal = performAtomicOp(Op, Builder, Loaded_Extract, Inc);
  714. Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
  715. return FinalVal;
  716. }
  717. default:
  718. llvm_unreachable("Unknown atomic op");
  719. }
  720. }
  721. /// Expand a sub-word atomicrmw operation into an appropriate
  722. /// word-sized operation.
  723. ///
  724. /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
  725. /// way as a typical atomicrmw expansion. The only difference here is
  726. /// that the operation inside of the loop may operate upon only a
  727. /// part of the value.
  728. void AtomicExpand::expandPartwordAtomicRMW(
  729. AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
  730. AtomicOrdering MemOpOrder = AI->getOrdering();
  731. SyncScope::ID SSID = AI->getSyncScopeID();
  732. IRBuilder<> Builder(AI);
  733. PartwordMaskValues PMV =
  734. createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
  735. AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
  736. Value *ValOperand_Shifted =
  737. Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
  738. PMV.ShiftAmt, "ValOperand_Shifted");
  739. auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
  740. return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
  741. ValOperand_Shifted, AI->getValOperand(), PMV);
  742. };
  743. Value *OldResult;
  744. if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
  745. OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
  746. PMV.AlignedAddrAlignment, MemOpOrder,
  747. SSID, PerformPartwordOp,
  748. createCmpXchgInstFun);
  749. } else {
  750. assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
  751. OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
  752. PMV.AlignedAddrAlignment, MemOpOrder,
  753. PerformPartwordOp);
  754. }
  755. Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
  756. AI->replaceAllUsesWith(FinalOldResult);
  757. AI->eraseFromParent();
  758. }
  759. // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
  760. AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
  761. IRBuilder<> Builder(AI);
  762. AtomicRMWInst::BinOp Op = AI->getOperation();
  763. assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
  764. Op == AtomicRMWInst::And) &&
  765. "Unable to widen operation");
  766. PartwordMaskValues PMV =
  767. createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
  768. AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
  769. Value *ValOperand_Shifted =
  770. Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
  771. PMV.ShiftAmt, "ValOperand_Shifted");
  772. Value *NewOperand;
  773. if (Op == AtomicRMWInst::And)
  774. NewOperand =
  775. Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
  776. else
  777. NewOperand = ValOperand_Shifted;
  778. AtomicRMWInst *NewAI =
  779. Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand,
  780. PMV.AlignedAddrAlignment, AI->getOrdering());
  781. Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
  782. AI->replaceAllUsesWith(FinalOldResult);
  783. AI->eraseFromParent();
  784. return NewAI;
  785. }
  786. bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
  787. // The basic idea here is that we're expanding a cmpxchg of a
  788. // smaller memory size up to a word-sized cmpxchg. To do this, we
  789. // need to add a retry-loop for strong cmpxchg, so that
  790. // modifications to other parts of the word don't cause a spurious
  791. // failure.
  792. // This generates code like the following:
  793. // [[Setup mask values PMV.*]]
  794. // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
  795. // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
  796. // %InitLoaded = load i32* %addr
  797. // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
  798. // br partword.cmpxchg.loop
  799. // partword.cmpxchg.loop:
  800. // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
  801. // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
  802. // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
  803. // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
  804. // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
  805. // i32 %FullWord_NewVal success_ordering failure_ordering
  806. // %OldVal = extractvalue { i32, i1 } %NewCI, 0
  807. // %Success = extractvalue { i32, i1 } %NewCI, 1
  808. // br i1 %Success, label %partword.cmpxchg.end,
  809. // label %partword.cmpxchg.failure
  810. // partword.cmpxchg.failure:
  811. // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
  812. // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
  813. // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
  814. // label %partword.cmpxchg.end
  815. // partword.cmpxchg.end:
  816. // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
  817. // %FinalOldVal = trunc i32 %tmp1 to i8
  818. // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
  819. // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
  820. Value *Addr = CI->getPointerOperand();
  821. Value *Cmp = CI->getCompareOperand();
  822. Value *NewVal = CI->getNewValOperand();
  823. BasicBlock *BB = CI->getParent();
  824. Function *F = BB->getParent();
  825. IRBuilder<> Builder(CI);
  826. LLVMContext &Ctx = Builder.getContext();
  827. BasicBlock *EndBB =
  828. BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
  829. auto FailureBB =
  830. BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
  831. auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
  832. // The split call above "helpfully" added a branch at the end of BB
  833. // (to the wrong place).
  834. std::prev(BB->end())->eraseFromParent();
  835. Builder.SetInsertPoint(BB);
  836. PartwordMaskValues PMV =
  837. createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
  838. CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
  839. // Shift the incoming values over, into the right location in the word.
  840. Value *NewVal_Shifted =
  841. Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
  842. Value *Cmp_Shifted =
  843. Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
  844. // Load the entire current word, and mask into place the expected and new
  845. // values
  846. LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
  847. InitLoaded->setVolatile(CI->isVolatile());
  848. Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
  849. Builder.CreateBr(LoopBB);
  850. // partword.cmpxchg.loop:
  851. Builder.SetInsertPoint(LoopBB);
  852. PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
  853. Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
  854. // Mask/Or the expected and new values into place in the loaded word.
  855. Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
  856. Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
  857. AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
  858. PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
  859. CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID());
  860. NewCI->setVolatile(CI->isVolatile());
  861. // When we're building a strong cmpxchg, we need a loop, so you
  862. // might think we could use a weak cmpxchg inside. But, using strong
  863. // allows the below comparison for ShouldContinue, and we're
  864. // expecting the underlying cmpxchg to be a machine instruction,
  865. // which is strong anyways.
  866. NewCI->setWeak(CI->isWeak());
  867. Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
  868. Value *Success = Builder.CreateExtractValue(NewCI, 1);
  869. if (CI->isWeak())
  870. Builder.CreateBr(EndBB);
  871. else
  872. Builder.CreateCondBr(Success, EndBB, FailureBB);
  873. // partword.cmpxchg.failure:
  874. Builder.SetInsertPoint(FailureBB);
  875. // Upon failure, verify that the masked-out part of the loaded value
  876. // has been modified. If it didn't, abort the cmpxchg, since the
  877. // masked-in part must've.
  878. Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
  879. Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
  880. Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
  881. // Add the second value to the phi from above
  882. Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
  883. // partword.cmpxchg.end:
  884. Builder.SetInsertPoint(CI);
  885. Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
  886. Value *Res = UndefValue::get(CI->getType());
  887. Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
  888. Res = Builder.CreateInsertValue(Res, Success, 1);
  889. CI->replaceAllUsesWith(Res);
  890. CI->eraseFromParent();
  891. return true;
  892. }
  893. void AtomicExpand::expandAtomicOpToLLSC(
  894. Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
  895. AtomicOrdering MemOpOrder,
  896. function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
  897. IRBuilder<> Builder(I);
  898. Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
  899. MemOpOrder, PerformOp);
  900. I->replaceAllUsesWith(Loaded);
  901. I->eraseFromParent();
  902. }
  903. void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
  904. IRBuilder<> Builder(AI);
  905. PartwordMaskValues PMV =
  906. createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
  907. AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
  908. // The value operand must be sign-extended for signed min/max so that the
  909. // target's signed comparison instructions can be used. Otherwise, just
  910. // zero-ext.
  911. Instruction::CastOps CastOp = Instruction::ZExt;
  912. AtomicRMWInst::BinOp RMWOp = AI->getOperation();
  913. if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
  914. CastOp = Instruction::SExt;
  915. Value *ValOperand_Shifted = Builder.CreateShl(
  916. Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
  917. PMV.ShiftAmt, "ValOperand_Shifted");
  918. Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
  919. Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
  920. AI->getOrdering());
  921. Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
  922. AI->replaceAllUsesWith(FinalOldResult);
  923. AI->eraseFromParent();
  924. }
  925. void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
  926. IRBuilder<> Builder(CI);
  927. PartwordMaskValues PMV = createMaskInstrs(
  928. Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
  929. CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
  930. Value *CmpVal_Shifted = Builder.CreateShl(
  931. Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
  932. "CmpVal_Shifted");
  933. Value *NewVal_Shifted = Builder.CreateShl(
  934. Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
  935. "NewVal_Shifted");
  936. Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
  937. Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
  938. CI->getMergedOrdering());
  939. Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
  940. Value *Res = UndefValue::get(CI->getType());
  941. Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
  942. Value *Success = Builder.CreateICmpEQ(
  943. CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
  944. Res = Builder.CreateInsertValue(Res, Success, 1);
  945. CI->replaceAllUsesWith(Res);
  946. CI->eraseFromParent();
  947. }
  948. Value *AtomicExpand::insertRMWLLSCLoop(
  949. IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
  950. AtomicOrdering MemOpOrder,
  951. function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
  952. LLVMContext &Ctx = Builder.getContext();
  953. BasicBlock *BB = Builder.GetInsertBlock();
  954. Function *F = BB->getParent();
  955. assert(AddrAlign >=
  956. F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
  957. "Expected at least natural alignment at this point.");
  958. // Given: atomicrmw some_op iN* %addr, iN %incr ordering
  959. //
  960. // The standard expansion we produce is:
  961. // [...]
  962. // atomicrmw.start:
  963. // %loaded = @load.linked(%addr)
  964. // %new = some_op iN %loaded, %incr
  965. // %stored = @store_conditional(%new, %addr)
  966. // %try_again = icmp i32 ne %stored, 0
  967. // br i1 %try_again, label %loop, label %atomicrmw.end
  968. // atomicrmw.end:
  969. // [...]
  970. BasicBlock *ExitBB =
  971. BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
  972. BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
  973. // The split call above "helpfully" added a branch at the end of BB (to the
  974. // wrong place).
  975. std::prev(BB->end())->eraseFromParent();
  976. Builder.SetInsertPoint(BB);
  977. Builder.CreateBr(LoopBB);
  978. // Start the main loop block now that we've taken care of the preliminaries.
  979. Builder.SetInsertPoint(LoopBB);
  980. Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
  981. Value *NewVal = PerformOp(Builder, Loaded);
  982. Value *StoreSuccess =
  983. TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
  984. Value *TryAgain = Builder.CreateICmpNE(
  985. StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
  986. Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
  987. Builder.SetInsertPoint(ExitBB, ExitBB->begin());
  988. return Loaded;
  989. }
  990. /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
  991. /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
  992. /// IR. As a migration step, we convert back to what use to be the standard
  993. /// way to represent a pointer cmpxchg so that we can update backends one by
  994. /// one.
  995. AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
  996. auto *M = CI->getModule();
  997. Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
  998. M->getDataLayout());
  999. IRBuilder<> Builder(CI);
  1000. Value *Addr = CI->getPointerOperand();
  1001. Type *PT = PointerType::get(NewTy,
  1002. Addr->getType()->getPointerAddressSpace());
  1003. Value *NewAddr = Builder.CreateBitCast(Addr, PT);
  1004. Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
  1005. Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
  1006. auto *NewCI = Builder.CreateAtomicCmpXchg(
  1007. NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
  1008. CI->getFailureOrdering(), CI->getSyncScopeID());
  1009. NewCI->setVolatile(CI->isVolatile());
  1010. NewCI->setWeak(CI->isWeak());
  1011. LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
  1012. Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
  1013. Value *Succ = Builder.CreateExtractValue(NewCI, 1);
  1014. OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
  1015. Value *Res = UndefValue::get(CI->getType());
  1016. Res = Builder.CreateInsertValue(Res, OldVal, 0);
  1017. Res = Builder.CreateInsertValue(Res, Succ, 1);
  1018. CI->replaceAllUsesWith(Res);
  1019. CI->eraseFromParent();
  1020. return NewCI;
  1021. }
  1022. bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
  1023. AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
  1024. AtomicOrdering FailureOrder = CI->getFailureOrdering();
  1025. Value *Addr = CI->getPointerOperand();
  1026. BasicBlock *BB = CI->getParent();
  1027. Function *F = BB->getParent();
  1028. LLVMContext &Ctx = F->getContext();
  1029. // If shouldInsertFencesForAtomic() returns true, then the target does not
  1030. // want to deal with memory orders, and emitLeading/TrailingFence should take
  1031. // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
  1032. // should preserve the ordering.
  1033. bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
  1034. AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
  1035. ? AtomicOrdering::Monotonic
  1036. : CI->getMergedOrdering();
  1037. // In implementations which use a barrier to achieve release semantics, we can
  1038. // delay emitting this barrier until we know a store is actually going to be
  1039. // attempted. The cost of this delay is that we need 2 copies of the block
  1040. // emitting the load-linked, affecting code size.
  1041. //
  1042. // Ideally, this logic would be unconditional except for the minsize check
  1043. // since in other cases the extra blocks naturally collapse down to the
  1044. // minimal loop. Unfortunately, this puts too much stress on later
  1045. // optimisations so we avoid emitting the extra logic in those cases too.
  1046. bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
  1047. SuccessOrder != AtomicOrdering::Monotonic &&
  1048. SuccessOrder != AtomicOrdering::Acquire &&
  1049. !F->hasMinSize();
  1050. // There's no overhead for sinking the release barrier in a weak cmpxchg, so
  1051. // do it even on minsize.
  1052. bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
  1053. // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
  1054. //
  1055. // The full expansion we produce is:
  1056. // [...]
  1057. // %aligned.addr = ...
  1058. // cmpxchg.start:
  1059. // %unreleasedload = @load.linked(%aligned.addr)
  1060. // %unreleasedload.extract = extract value from %unreleasedload
  1061. // %should_store = icmp eq %unreleasedload.extract, %desired
  1062. // br i1 %should_store, label %cmpxchg.releasingstore,
  1063. // label %cmpxchg.nostore
  1064. // cmpxchg.releasingstore:
  1065. // fence?
  1066. // br label cmpxchg.trystore
  1067. // cmpxchg.trystore:
  1068. // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
  1069. // [%releasedload, %cmpxchg.releasedload]
  1070. // %updated.new = insert %new into %loaded.trystore
  1071. // %stored = @store_conditional(%updated.new, %aligned.addr)
  1072. // %success = icmp eq i32 %stored, 0
  1073. // br i1 %success, label %cmpxchg.success,
  1074. // label %cmpxchg.releasedload/%cmpxchg.failure
  1075. // cmpxchg.releasedload:
  1076. // %releasedload = @load.linked(%aligned.addr)
  1077. // %releasedload.extract = extract value from %releasedload
  1078. // %should_store = icmp eq %releasedload.extract, %desired
  1079. // br i1 %should_store, label %cmpxchg.trystore,
  1080. // label %cmpxchg.failure
  1081. // cmpxchg.success:
  1082. // fence?
  1083. // br label %cmpxchg.end
  1084. // cmpxchg.nostore:
  1085. // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
  1086. // [%releasedload,
  1087. // %cmpxchg.releasedload/%cmpxchg.trystore]
  1088. // @load_linked_fail_balance()?
  1089. // br label %cmpxchg.failure
  1090. // cmpxchg.failure:
  1091. // fence?
  1092. // br label %cmpxchg.end
  1093. // cmpxchg.end:
  1094. // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
  1095. // [%loaded.trystore, %cmpxchg.trystore]
  1096. // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
  1097. // %loaded = extract value from %loaded.exit
  1098. // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
  1099. // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
  1100. // [...]
  1101. BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
  1102. auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
  1103. auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
  1104. auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
  1105. auto ReleasedLoadBB =
  1106. BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
  1107. auto TryStoreBB =
  1108. BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
  1109. auto ReleasingStoreBB =
  1110. BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
  1111. auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
  1112. // This grabs the DebugLoc from CI
  1113. IRBuilder<> Builder(CI);
  1114. // The split call above "helpfully" added a branch at the end of BB (to the
  1115. // wrong place), but we might want a fence too. It's easiest to just remove
  1116. // the branch entirely.
  1117. std::prev(BB->end())->eraseFromParent();
  1118. Builder.SetInsertPoint(BB);
  1119. if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
  1120. TLI->emitLeadingFence(Builder, CI, SuccessOrder);
  1121. PartwordMaskValues PMV =
  1122. createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
  1123. CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
  1124. Builder.CreateBr(StartBB);
  1125. // Start the main loop block now that we've taken care of the preliminaries.
  1126. Builder.SetInsertPoint(StartBB);
  1127. Value *UnreleasedLoad =
  1128. TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
  1129. Value *UnreleasedLoadExtract =
  1130. extractMaskedValue(Builder, UnreleasedLoad, PMV);
  1131. Value *ShouldStore = Builder.CreateICmpEQ(
  1132. UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
  1133. // If the cmpxchg doesn't actually need any ordering when it fails, we can
  1134. // jump straight past that fence instruction (if it exists).
  1135. Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
  1136. Builder.SetInsertPoint(ReleasingStoreBB);
  1137. if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
  1138. TLI->emitLeadingFence(Builder, CI, SuccessOrder);
  1139. Builder.CreateBr(TryStoreBB);
  1140. Builder.SetInsertPoint(TryStoreBB);
  1141. PHINode *LoadedTryStore =
  1142. Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
  1143. LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
  1144. Value *NewValueInsert =
  1145. insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
  1146. Value *StoreSuccess =
  1147. TLI->emitStoreConditional(Builder, NewValueInsert, PMV.AlignedAddr,
  1148. MemOpOrder);
  1149. StoreSuccess = Builder.CreateICmpEQ(
  1150. StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
  1151. BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
  1152. Builder.CreateCondBr(StoreSuccess, SuccessBB,
  1153. CI->isWeak() ? FailureBB : RetryBB);
  1154. Builder.SetInsertPoint(ReleasedLoadBB);
  1155. Value *SecondLoad;
  1156. if (HasReleasedLoadBB) {
  1157. SecondLoad =
  1158. TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
  1159. Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
  1160. ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
  1161. CI->getCompareOperand(), "should_store");
  1162. // If the cmpxchg doesn't actually need any ordering when it fails, we can
  1163. // jump straight past that fence instruction (if it exists).
  1164. Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
  1165. // Update PHI node in TryStoreBB.
  1166. LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
  1167. } else
  1168. Builder.CreateUnreachable();
  1169. // Make sure later instructions don't get reordered with a fence if
  1170. // necessary.
  1171. Builder.SetInsertPoint(SuccessBB);
  1172. if (ShouldInsertFencesForAtomic)
  1173. TLI->emitTrailingFence(Builder, CI, SuccessOrder);
  1174. Builder.CreateBr(ExitBB);
  1175. Builder.SetInsertPoint(NoStoreBB);
  1176. PHINode *LoadedNoStore =
  1177. Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
  1178. LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
  1179. if (HasReleasedLoadBB)
  1180. LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
  1181. // In the failing case, where we don't execute the store-conditional, the
  1182. // target might want to balance out the load-linked with a dedicated
  1183. // instruction (e.g., on ARM, clearing the exclusive monitor).
  1184. TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
  1185. Builder.CreateBr(FailureBB);
  1186. Builder.SetInsertPoint(FailureBB);
  1187. PHINode *LoadedFailure =
  1188. Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
  1189. LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
  1190. if (CI->isWeak())
  1191. LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
  1192. if (ShouldInsertFencesForAtomic)
  1193. TLI->emitTrailingFence(Builder, CI, FailureOrder);
  1194. Builder.CreateBr(ExitBB);
  1195. // Finally, we have control-flow based knowledge of whether the cmpxchg
  1196. // succeeded or not. We expose this to later passes by converting any
  1197. // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
  1198. // PHI.
  1199. Builder.SetInsertPoint(ExitBB, ExitBB->begin());
  1200. PHINode *LoadedExit =
  1201. Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
  1202. LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
  1203. LoadedExit->addIncoming(LoadedFailure, FailureBB);
  1204. PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
  1205. Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
  1206. Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
  1207. // This is the "exit value" from the cmpxchg expansion. It may be of
  1208. // a type wider than the one in the cmpxchg instruction.
  1209. Value *LoadedFull = LoadedExit;
  1210. Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
  1211. Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
  1212. // Look for any users of the cmpxchg that are just comparing the loaded value
  1213. // against the desired one, and replace them with the CFG-derived version.
  1214. SmallVector<ExtractValueInst *, 2> PrunedInsts;
  1215. for (auto User : CI->users()) {
  1216. ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
  1217. if (!EV)
  1218. continue;
  1219. assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
  1220. "weird extraction from { iN, i1 }");
  1221. if (EV->getIndices()[0] == 0)
  1222. EV->replaceAllUsesWith(Loaded);
  1223. else
  1224. EV->replaceAllUsesWith(Success);
  1225. PrunedInsts.push_back(EV);
  1226. }
  1227. // We can remove the instructions now we're no longer iterating through them.
  1228. for (auto EV : PrunedInsts)
  1229. EV->eraseFromParent();
  1230. if (!CI->use_empty()) {
  1231. // Some use of the full struct return that we don't understand has happened,
  1232. // so we've got to reconstruct it properly.
  1233. Value *Res;
  1234. Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
  1235. Res = Builder.CreateInsertValue(Res, Success, 1);
  1236. CI->replaceAllUsesWith(Res);
  1237. }
  1238. CI->eraseFromParent();
  1239. return true;
  1240. }
  1241. bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
  1242. auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
  1243. if(!C)
  1244. return false;
  1245. AtomicRMWInst::BinOp Op = RMWI->getOperation();
  1246. switch(Op) {
  1247. case AtomicRMWInst::Add:
  1248. case AtomicRMWInst::Sub:
  1249. case AtomicRMWInst::Or:
  1250. case AtomicRMWInst::Xor:
  1251. return C->isZero();
  1252. case AtomicRMWInst::And:
  1253. return C->isMinusOne();
  1254. // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
  1255. default:
  1256. return false;
  1257. }
  1258. }
  1259. bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
  1260. if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
  1261. tryExpandAtomicLoad(ResultingLoad);
  1262. return true;
  1263. }
  1264. return false;
  1265. }
  1266. Value *AtomicExpand::insertRMWCmpXchgLoop(
  1267. IRBuilder<> &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
  1268. AtomicOrdering MemOpOrder, SyncScope::ID SSID,
  1269. function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
  1270. CreateCmpXchgInstFun CreateCmpXchg) {
  1271. LLVMContext &Ctx = Builder.getContext();
  1272. BasicBlock *BB = Builder.GetInsertBlock();
  1273. Function *F = BB->getParent();
  1274. // Given: atomicrmw some_op iN* %addr, iN %incr ordering
  1275. //
  1276. // The standard expansion we produce is:
  1277. // [...]
  1278. // %init_loaded = load atomic iN* %addr
  1279. // br label %loop
  1280. // loop:
  1281. // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
  1282. // %new = some_op iN %loaded, %incr
  1283. // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
  1284. // %new_loaded = extractvalue { iN, i1 } %pair, 0
  1285. // %success = extractvalue { iN, i1 } %pair, 1
  1286. // br i1 %success, label %atomicrmw.end, label %loop
  1287. // atomicrmw.end:
  1288. // [...]
  1289. BasicBlock *ExitBB =
  1290. BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
  1291. BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
  1292. // The split call above "helpfully" added a branch at the end of BB (to the
  1293. // wrong place), but we want a load. It's easiest to just remove
  1294. // the branch entirely.
  1295. std::prev(BB->end())->eraseFromParent();
  1296. Builder.SetInsertPoint(BB);
  1297. LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
  1298. Builder.CreateBr(LoopBB);
  1299. // Start the main loop block now that we've taken care of the preliminaries.
  1300. Builder.SetInsertPoint(LoopBB);
  1301. PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
  1302. Loaded->addIncoming(InitLoaded, BB);
  1303. Value *NewVal = PerformOp(Builder, Loaded);
  1304. Value *NewLoaded = nullptr;
  1305. Value *Success = nullptr;
  1306. CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
  1307. MemOpOrder == AtomicOrdering::Unordered
  1308. ? AtomicOrdering::Monotonic
  1309. : MemOpOrder,
  1310. SSID, Success, NewLoaded);
  1311. assert(Success && NewLoaded);
  1312. Loaded->addIncoming(NewLoaded, LoopBB);
  1313. Builder.CreateCondBr(Success, ExitBB, LoopBB);
  1314. Builder.SetInsertPoint(ExitBB, ExitBB->begin());
  1315. return NewLoaded;
  1316. }
  1317. bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
  1318. unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
  1319. unsigned ValueSize = getAtomicOpSize(CI);
  1320. switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
  1321. default:
  1322. llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
  1323. case TargetLoweringBase::AtomicExpansionKind::None:
  1324. if (ValueSize < MinCASSize)
  1325. return expandPartwordCmpXchg(CI);
  1326. return false;
  1327. case TargetLoweringBase::AtomicExpansionKind::LLSC: {
  1328. return expandAtomicCmpXchg(CI);
  1329. }
  1330. case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
  1331. expandAtomicCmpXchgToMaskedIntrinsic(CI);
  1332. return true;
  1333. }
  1334. }
  1335. // Note: This function is exposed externally by AtomicExpandUtils.h
  1336. bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
  1337. CreateCmpXchgInstFun CreateCmpXchg) {
  1338. IRBuilder<> Builder(AI);
  1339. Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
  1340. Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
  1341. AI->getOrdering(), AI->getSyncScopeID(),
  1342. [&](IRBuilder<> &Builder, Value *Loaded) {
  1343. return performAtomicOp(AI->getOperation(), Builder, Loaded,
  1344. AI->getValOperand());
  1345. },
  1346. CreateCmpXchg);
  1347. AI->replaceAllUsesWith(Loaded);
  1348. AI->eraseFromParent();
  1349. return true;
  1350. }
  1351. // In order to use one of the sized library calls such as
  1352. // __atomic_fetch_add_4, the alignment must be sufficient, the size
  1353. // must be one of the potentially-specialized sizes, and the value
  1354. // type must actually exist in C on the target (otherwise, the
  1355. // function wouldn't actually be defined.)
  1356. static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
  1357. const DataLayout &DL) {
  1358. // TODO: "LargestSize" is an approximation for "largest type that
  1359. // you can express in C". It seems to be the case that int128 is
  1360. // supported on all 64-bit platforms, otherwise only up to 64-bit
  1361. // integers are supported. If we get this wrong, then we'll try to
  1362. // call a sized libcall that doesn't actually exist. There should
  1363. // really be some more reliable way in LLVM of determining integer
  1364. // sizes which are valid in the target's C ABI...
  1365. unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
  1366. return Alignment >= Size &&
  1367. (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
  1368. Size <= LargestSize;
  1369. }
  1370. void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
  1371. static const RTLIB::Libcall Libcalls[6] = {
  1372. RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
  1373. RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
  1374. unsigned Size = getAtomicOpSize(I);
  1375. bool expanded = expandAtomicOpToLibcall(
  1376. I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
  1377. I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
  1378. if (!expanded)
  1379. report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
  1380. }
  1381. void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
  1382. static const RTLIB::Libcall Libcalls[6] = {
  1383. RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
  1384. RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
  1385. unsigned Size = getAtomicOpSize(I);
  1386. bool expanded = expandAtomicOpToLibcall(
  1387. I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
  1388. nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
  1389. if (!expanded)
  1390. report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
  1391. }
  1392. void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
  1393. static const RTLIB::Libcall Libcalls[6] = {
  1394. RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
  1395. RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
  1396. RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
  1397. unsigned Size = getAtomicOpSize(I);
  1398. bool expanded = expandAtomicOpToLibcall(
  1399. I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
  1400. I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
  1401. Libcalls);
  1402. if (!expanded)
  1403. report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
  1404. }
  1405. static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
  1406. static const RTLIB::Libcall LibcallsXchg[6] = {
  1407. RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
  1408. RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
  1409. RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
  1410. static const RTLIB::Libcall LibcallsAdd[6] = {
  1411. RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
  1412. RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
  1413. RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
  1414. static const RTLIB::Libcall LibcallsSub[6] = {
  1415. RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
  1416. RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
  1417. RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
  1418. static const RTLIB::Libcall LibcallsAnd[6] = {
  1419. RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
  1420. RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
  1421. RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
  1422. static const RTLIB::Libcall LibcallsOr[6] = {
  1423. RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
  1424. RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
  1425. RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
  1426. static const RTLIB::Libcall LibcallsXor[6] = {
  1427. RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
  1428. RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
  1429. RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
  1430. static const RTLIB::Libcall LibcallsNand[6] = {
  1431. RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
  1432. RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
  1433. RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
  1434. switch (Op) {
  1435. case AtomicRMWInst::BAD_BINOP:
  1436. llvm_unreachable("Should not have BAD_BINOP.");
  1437. case AtomicRMWInst::Xchg:
  1438. return makeArrayRef(LibcallsXchg);
  1439. case AtomicRMWInst::Add:
  1440. return makeArrayRef(LibcallsAdd);
  1441. case AtomicRMWInst::Sub:
  1442. return makeArrayRef(LibcallsSub);
  1443. case AtomicRMWInst::And:
  1444. return makeArrayRef(LibcallsAnd);
  1445. case AtomicRMWInst::Or:
  1446. return makeArrayRef(LibcallsOr);
  1447. case AtomicRMWInst::Xor:
  1448. return makeArrayRef(LibcallsXor);
  1449. case AtomicRMWInst::Nand:
  1450. return makeArrayRef(LibcallsNand);
  1451. case AtomicRMWInst::Max:
  1452. case AtomicRMWInst::Min:
  1453. case AtomicRMWInst::UMax:
  1454. case AtomicRMWInst::UMin:
  1455. case AtomicRMWInst::FAdd:
  1456. case AtomicRMWInst::FSub:
  1457. // No atomic libcalls are available for max/min/umax/umin.
  1458. return {};
  1459. }
  1460. llvm_unreachable("Unexpected AtomicRMW operation.");
  1461. }
  1462. void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
  1463. ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
  1464. unsigned Size = getAtomicOpSize(I);
  1465. bool Success = false;
  1466. if (!Libcalls.empty())
  1467. Success = expandAtomicOpToLibcall(
  1468. I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
  1469. nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
  1470. // The expansion failed: either there were no libcalls at all for
  1471. // the operation (min/max), or there were only size-specialized
  1472. // libcalls (add/sub/etc) and we needed a generic. So, expand to a
  1473. // CAS libcall, via a CAS loop, instead.
  1474. if (!Success) {
  1475. expandAtomicRMWToCmpXchg(
  1476. I, [this](IRBuilder<> &Builder, Value *Addr, Value *Loaded,
  1477. Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
  1478. SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
  1479. // Create the CAS instruction normally...
  1480. AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
  1481. Addr, Loaded, NewVal, Alignment, MemOpOrder,
  1482. AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);
  1483. Success = Builder.CreateExtractValue(Pair, 1, "success");
  1484. NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
  1485. // ...and then expand the CAS into a libcall.
  1486. expandAtomicCASToLibcall(Pair);
  1487. });
  1488. }
  1489. }
  1490. // A helper routine for the above expandAtomic*ToLibcall functions.
  1491. //
  1492. // 'Libcalls' contains an array of enum values for the particular
  1493. // ATOMIC libcalls to be emitted. All of the other arguments besides
  1494. // 'I' are extracted from the Instruction subclass by the
  1495. // caller. Depending on the particular call, some will be null.
  1496. bool AtomicExpand::expandAtomicOpToLibcall(
  1497. Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
  1498. Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
  1499. AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
  1500. assert(Libcalls.size() == 6);
  1501. LLVMContext &Ctx = I->getContext();
  1502. Module *M = I->getModule();
  1503. const DataLayout &DL = M->getDataLayout();
  1504. IRBuilder<> Builder(I);
  1505. IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
  1506. bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
  1507. Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
  1508. const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
  1509. // TODO: the "order" argument type is "int", not int32. So
  1510. // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
  1511. ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
  1512. assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
  1513. Constant *OrderingVal =
  1514. ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
  1515. Constant *Ordering2Val = nullptr;
  1516. if (CASExpected) {
  1517. assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
  1518. Ordering2Val =
  1519. ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
  1520. }
  1521. bool HasResult = I->getType() != Type::getVoidTy(Ctx);
  1522. RTLIB::Libcall RTLibType;
  1523. if (UseSizedLibcall) {
  1524. switch (Size) {
  1525. case 1: RTLibType = Libcalls[1]; break;
  1526. case 2: RTLibType = Libcalls[2]; break;
  1527. case 4: RTLibType = Libcalls[3]; break;
  1528. case 8: RTLibType = Libcalls[4]; break;
  1529. case 16: RTLibType = Libcalls[5]; break;
  1530. }
  1531. } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
  1532. RTLibType = Libcalls[0];
  1533. } else {
  1534. // Can't use sized function, and there's no generic for this
  1535. // operation, so give up.
  1536. return false;
  1537. }
  1538. if (!TLI->getLibcallName(RTLibType)) {
  1539. // This target does not implement the requested atomic libcall so give up.
  1540. return false;
  1541. }
  1542. // Build up the function call. There's two kinds. First, the sized
  1543. // variants. These calls are going to be one of the following (with
  1544. // N=1,2,4,8,16):
  1545. // iN __atomic_load_N(iN *ptr, int ordering)
  1546. // void __atomic_store_N(iN *ptr, iN val, int ordering)
  1547. // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
  1548. // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
  1549. // int success_order, int failure_order)
  1550. //
  1551. // Note that these functions can be used for non-integer atomic
  1552. // operations, the values just need to be bitcast to integers on the
  1553. // way in and out.
  1554. //
  1555. // And, then, the generic variants. They look like the following:
  1556. // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
  1557. // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
  1558. // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
  1559. // int ordering)
  1560. // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
  1561. // void *desired, int success_order,
  1562. // int failure_order)
  1563. //
  1564. // The different signatures are built up depending on the
  1565. // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
  1566. // variables.
  1567. AllocaInst *AllocaCASExpected = nullptr;
  1568. Value *AllocaCASExpected_i8 = nullptr;
  1569. AllocaInst *AllocaValue = nullptr;
  1570. Value *AllocaValue_i8 = nullptr;
  1571. AllocaInst *AllocaResult = nullptr;
  1572. Value *AllocaResult_i8 = nullptr;
  1573. Type *ResultTy;
  1574. SmallVector<Value *, 6> Args;
  1575. AttributeList Attr;
  1576. // 'size' argument.
  1577. if (!UseSizedLibcall) {
  1578. // Note, getIntPtrType is assumed equivalent to size_t.
  1579. Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
  1580. }
  1581. // 'ptr' argument.
  1582. // note: This assumes all address spaces share a common libfunc
  1583. // implementation and that addresses are convertable. For systems without
  1584. // that property, we'd need to extend this mechanism to support AS-specific
  1585. // families of atomic intrinsics.
  1586. auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
  1587. Value *PtrVal = Builder.CreateBitCast(PointerOperand,
  1588. Type::getInt8PtrTy(Ctx, PtrTypeAS));
  1589. PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
  1590. Args.push_back(PtrVal);
  1591. // 'expected' argument, if present.
  1592. if (CASExpected) {
  1593. AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
  1594. AllocaCASExpected->setAlignment(AllocaAlignment);
  1595. unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
  1596. AllocaCASExpected_i8 =
  1597. Builder.CreateBitCast(AllocaCASExpected,
  1598. Type::getInt8PtrTy(Ctx, AllocaAS));
  1599. Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
  1600. Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
  1601. Args.push_back(AllocaCASExpected_i8);
  1602. }
  1603. // 'val' argument ('desired' for cas), if present.
  1604. if (ValueOperand) {
  1605. if (UseSizedLibcall) {
  1606. Value *IntValue =
  1607. Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
  1608. Args.push_back(IntValue);
  1609. } else {
  1610. AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
  1611. AllocaValue->setAlignment(AllocaAlignment);
  1612. AllocaValue_i8 =
  1613. Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
  1614. Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
  1615. Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
  1616. Args.push_back(AllocaValue_i8);
  1617. }
  1618. }
  1619. // 'ret' argument.
  1620. if (!CASExpected && HasResult && !UseSizedLibcall) {
  1621. AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
  1622. AllocaResult->setAlignment(AllocaAlignment);
  1623. unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
  1624. AllocaResult_i8 =
  1625. Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
  1626. Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
  1627. Args.push_back(AllocaResult_i8);
  1628. }
  1629. // 'ordering' ('success_order' for cas) argument.
  1630. Args.push_back(OrderingVal);
  1631. // 'failure_order' argument, if present.
  1632. if (Ordering2Val)
  1633. Args.push_back(Ordering2Val);
  1634. // Now, the return type.
  1635. if (CASExpected) {
  1636. ResultTy = Type::getInt1Ty(Ctx);
  1637. Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
  1638. } else if (HasResult && UseSizedLibcall)
  1639. ResultTy = SizedIntTy;
  1640. else
  1641. ResultTy = Type::getVoidTy(Ctx);
  1642. // Done with setting up arguments and return types, create the call:
  1643. SmallVector<Type *, 6> ArgTys;
  1644. for (Value *Arg : Args)
  1645. ArgTys.push_back(Arg->getType());
  1646. FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
  1647. FunctionCallee LibcallFn =
  1648. M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
  1649. CallInst *Call = Builder.CreateCall(LibcallFn, Args);
  1650. Call->setAttributes(Attr);
  1651. Value *Result = Call;
  1652. // And then, extract the results...
  1653. if (ValueOperand && !UseSizedLibcall)
  1654. Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
  1655. if (CASExpected) {
  1656. // The final result from the CAS is {load of 'expected' alloca, bool result
  1657. // from call}
  1658. Type *FinalResultTy = I->getType();
  1659. Value *V = UndefValue::get(FinalResultTy);
  1660. Value *ExpectedOut = Builder.CreateAlignedLoad(
  1661. CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
  1662. Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
  1663. V = Builder.CreateInsertValue(V, ExpectedOut, 0);
  1664. V = Builder.CreateInsertValue(V, Result, 1);
  1665. I->replaceAllUsesWith(V);
  1666. } else if (HasResult) {
  1667. Value *V;
  1668. if (UseSizedLibcall)
  1669. V = Builder.CreateBitOrPointerCast(Result, I->getType());
  1670. else {
  1671. V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
  1672. AllocaAlignment);
  1673. Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
  1674. }
  1675. I->replaceAllUsesWith(V);
  1676. }
  1677. I->eraseFromParent();
  1678. return true;
  1679. }