ExpandLargeFpConvert.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664
  1. //===--- ExpandLargeFpConvert.cpp - Expand large fp convert----------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass expands ‘fptoui .. to’, ‘fptosi .. to’, ‘uitofp .. to’,
  10. // ‘sitofp .. to’ instructions with a bitwidth above a threshold into
  11. // auto-generated functions. This is useful for targets like x86_64 that cannot
  12. // lower fp convertions with more than 128 bits.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/ADT/SmallVector.h"
  16. #include "llvm/ADT/StringExtras.h"
  17. #include "llvm/Analysis/GlobalsModRef.h"
  18. #include "llvm/CodeGen/Passes.h"
  19. #include "llvm/CodeGen/TargetLowering.h"
  20. #include "llvm/CodeGen/TargetPassConfig.h"
  21. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  22. #include "llvm/IR/IRBuilder.h"
  23. #include "llvm/IR/InstIterator.h"
  24. #include "llvm/IR/PassManager.h"
  25. #include "llvm/InitializePasses.h"
  26. #include "llvm/Pass.h"
  27. #include "llvm/Support/CommandLine.h"
  28. #include "llvm/Target/TargetMachine.h"
  29. using namespace llvm;
  30. static cl::opt<unsigned>
  31. ExpandFpConvertBits("expand-fp-convert-bits", cl::Hidden,
  32. cl::init(llvm::IntegerType::MAX_INT_BITS),
  33. cl::desc("fp convert instructions on integers with "
  34. "more than <N> bits are expanded."));
  35. /// Generate code to convert a fp number to integer, replacing FPToS(U)I with
  36. /// the generated code. This currently generates code similarly to compiler-rt's
  37. /// implementations.
  38. ///
  39. /// An example IR generated from compiler-rt/fixsfdi.c looks like below:
  40. /// define dso_local i64 @foo(float noundef %a) local_unnamed_addr #0 {
  41. /// entry:
  42. /// %0 = bitcast float %a to i32
  43. /// %conv.i = zext i32 %0 to i64
  44. /// %tobool.not = icmp sgt i32 %0, -1
  45. /// %conv = select i1 %tobool.not, i64 1, i64 -1
  46. /// %and = lshr i64 %conv.i, 23
  47. /// %shr = and i64 %and, 255
  48. /// %and2 = and i64 %conv.i, 8388607
  49. /// %or = or i64 %and2, 8388608
  50. /// %cmp = icmp ult i64 %shr, 127
  51. /// br i1 %cmp, label %cleanup, label %if.end
  52. ///
  53. /// if.end: ; preds = %entry
  54. /// %sub = add nuw nsw i64 %shr, 4294967169
  55. /// %conv5 = and i64 %sub, 4294967232
  56. /// %cmp6.not = icmp eq i64 %conv5, 0
  57. /// br i1 %cmp6.not, label %if.end12, label %if.then8
  58. ///
  59. /// if.then8: ; preds = %if.end
  60. /// %cond11 = select i1 %tobool.not, i64 9223372036854775807, i64 -9223372036854775808
  61. /// br label %cleanup
  62. ///
  63. /// if.end12: ; preds = %if.end
  64. /// %cmp13 = icmp ult i64 %shr, 150
  65. /// br i1 %cmp13, label %if.then15, label %if.else
  66. ///
  67. /// if.then15: ; preds = %if.end12
  68. /// %sub16 = sub nuw nsw i64 150, %shr
  69. /// %shr17 = lshr i64 %or, %sub16
  70. /// %mul = mul nsw i64 %shr17, %conv
  71. /// br label %cleanup
  72. ///
  73. /// if.else: ; preds = %if.end12
  74. /// %sub18 = add nsw i64 %shr, -150
  75. /// %shl = shl i64 %or, %sub18
  76. /// %mul19 = mul nsw i64 %shl, %conv
  77. /// br label %cleanup
  78. ///
  79. /// cleanup: ; preds = %entry, %if.else, %if.then15, %if.then8
  80. /// %retval.0 = phi i64 [ %cond11, %if.then8 ], [ %mul, %if.then15 ], [ %mul19, %if.else ], [ 0, %entry ]
  81. /// ret i64 %retval.0
  82. /// }
  83. ///
  84. /// Replace fp to integer with generated code.
  85. static void expandFPToI(Instruction *FPToI) {
  86. IRBuilder<> Builder(FPToI);
  87. auto *FloatVal = FPToI->getOperand(0);
  88. IntegerType *IntTy = cast<IntegerType>(FPToI->getType());
  89. unsigned BitWidth = FPToI->getType()->getIntegerBitWidth();
  90. unsigned FPMantissaWidth = FloatVal->getType()->getFPMantissaWidth() - 1;
  91. // FIXME: fp16's range is covered by i32. So `fptoi half` can convert
  92. // to i32 first following a sext/zext to target integer type.
  93. Value *A1 = nullptr;
  94. if (FloatVal->getType()->isHalfTy()) {
  95. if (FPToI->getOpcode() == Instruction::FPToUI) {
  96. Value *A0 = Builder.CreateFPToUI(FloatVal, Builder.getIntNTy(32));
  97. A1 = Builder.CreateZExt(A0, IntTy);
  98. } else { // FPToSI
  99. Value *A0 = Builder.CreateFPToSI(FloatVal, Builder.getIntNTy(32));
  100. A1 = Builder.CreateSExt(A0, IntTy);
  101. }
  102. FPToI->replaceAllUsesWith(A1);
  103. FPToI->dropAllReferences();
  104. FPToI->eraseFromParent();
  105. return;
  106. }
  107. // fp80 conversion is implemented by fpext to fp128 first then do the
  108. // conversion.
  109. FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
  110. unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
  111. unsigned ExponentWidth = FloatWidth - FPMantissaWidth - 1;
  112. unsigned ExponentBias = (1 << (ExponentWidth - 1)) - 1;
  113. Value *ImplicitBit = Builder.CreateShl(
  114. Builder.getIntN(BitWidth, 1), Builder.getIntN(BitWidth, FPMantissaWidth));
  115. Value *SignificandMask =
  116. Builder.CreateSub(ImplicitBit, Builder.getIntN(BitWidth, 1));
  117. Value *NegOne = Builder.CreateSExt(
  118. ConstantInt::getSigned(Builder.getInt32Ty(), -1), IntTy);
  119. Value *NegInf =
  120. Builder.CreateShl(ConstantInt::getSigned(IntTy, 1),
  121. ConstantInt::getSigned(IntTy, BitWidth - 1));
  122. BasicBlock *Entry = Builder.GetInsertBlock();
  123. Function *F = Entry->getParent();
  124. Entry->setName(Twine(Entry->getName(), "fp-to-i-entry"));
  125. BasicBlock *End =
  126. Entry->splitBasicBlock(Builder.GetInsertPoint(), "fp-to-i-cleanup");
  127. BasicBlock *IfEnd =
  128. BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end", F, End);
  129. BasicBlock *IfThen5 =
  130. BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then5", F, End);
  131. BasicBlock *IfEnd9 =
  132. BasicBlock::Create(Builder.getContext(), "fp-to-i-if-end9", F, End);
  133. BasicBlock *IfThen12 =
  134. BasicBlock::Create(Builder.getContext(), "fp-to-i-if-then12", F, End);
  135. BasicBlock *IfElse =
  136. BasicBlock::Create(Builder.getContext(), "fp-to-i-if-else", F, End);
  137. Entry->getTerminator()->eraseFromParent();
  138. // entry:
  139. Builder.SetInsertPoint(Entry);
  140. Value *FloatVal0 = FloatVal;
  141. // fp80 conversion is implemented by fpext to fp128 first then do the
  142. // conversion.
  143. if (FloatVal->getType()->isX86_FP80Ty())
  144. FloatVal0 =
  145. Builder.CreateFPExt(FloatVal, Type::getFP128Ty(Builder.getContext()));
  146. Value *ARep0 =
  147. Builder.CreateBitCast(FloatVal0, Builder.getIntNTy(FloatWidth));
  148. Value *ARep = Builder.CreateZExt(ARep0, FPToI->getType());
  149. Value *PosOrNeg = Builder.CreateICmpSGT(
  150. ARep0, ConstantInt::getSigned(Builder.getIntNTy(FloatWidth), -1));
  151. Value *Sign = Builder.CreateSelect(PosOrNeg, ConstantInt::getSigned(IntTy, 1),
  152. ConstantInt::getSigned(IntTy, -1));
  153. Value *And =
  154. Builder.CreateLShr(ARep, Builder.getIntN(BitWidth, FPMantissaWidth));
  155. Value *And2 = Builder.CreateAnd(
  156. And, Builder.getIntN(BitWidth, (1 << ExponentWidth) - 1));
  157. Value *Abs = Builder.CreateAnd(ARep, SignificandMask);
  158. Value *Or = Builder.CreateOr(Abs, ImplicitBit);
  159. Value *Cmp =
  160. Builder.CreateICmpULT(And2, Builder.getIntN(BitWidth, ExponentBias));
  161. Builder.CreateCondBr(Cmp, End, IfEnd);
  162. // if.end:
  163. Builder.SetInsertPoint(IfEnd);
  164. Value *Add1 = Builder.CreateAdd(
  165. And2, ConstantInt::getSigned(IntTy, -int64_t(ExponentBias + BitWidth)));
  166. Value *Cmp3 =
  167. Builder.CreateICmpULT(Add1, ConstantInt::getSigned(IntTy, -BitWidth));
  168. Builder.CreateCondBr(Cmp3, IfThen5, IfEnd9);
  169. // if.then5:
  170. Builder.SetInsertPoint(IfThen5);
  171. Value *PosInf = Builder.CreateXor(NegOne, NegInf);
  172. Value *Cond8 = Builder.CreateSelect(PosOrNeg, PosInf, NegInf);
  173. Builder.CreateBr(End);
  174. // if.end9:
  175. Builder.SetInsertPoint(IfEnd9);
  176. Value *Cmp10 = Builder.CreateICmpULT(
  177. And2, Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth));
  178. Builder.CreateCondBr(Cmp10, IfThen12, IfElse);
  179. // if.then12:
  180. Builder.SetInsertPoint(IfThen12);
  181. Value *Sub13 = Builder.CreateSub(
  182. Builder.getIntN(BitWidth, ExponentBias + FPMantissaWidth), And2);
  183. Value *Shr14 = Builder.CreateLShr(Or, Sub13);
  184. Value *Mul = Builder.CreateMul(Shr14, Sign);
  185. Builder.CreateBr(End);
  186. // if.else:
  187. Builder.SetInsertPoint(IfElse);
  188. Value *Sub15 = Builder.CreateAdd(
  189. And2,
  190. ConstantInt::getSigned(IntTy, -(ExponentBias + FPMantissaWidth)));
  191. Value *Shl = Builder.CreateShl(Or, Sub15);
  192. Value *Mul16 = Builder.CreateMul(Shl, Sign);
  193. Builder.CreateBr(End);
  194. // cleanup:
  195. Builder.SetInsertPoint(End, End->begin());
  196. PHINode *Retval0 = Builder.CreatePHI(FPToI->getType(), 4);
  197. Retval0->addIncoming(Cond8, IfThen5);
  198. Retval0->addIncoming(Mul, IfThen12);
  199. Retval0->addIncoming(Mul16, IfElse);
  200. Retval0->addIncoming(Builder.getIntN(BitWidth, 0), Entry);
  201. FPToI->replaceAllUsesWith(Retval0);
  202. FPToI->dropAllReferences();
  203. FPToI->eraseFromParent();
  204. }
  205. /// Generate code to convert a fp number to integer, replacing S(U)IToFP with
  206. /// the generated code. This currently generates code similarly to compiler-rt's
  207. /// implementations. This implementation has an implicit assumption that integer
  208. /// width is larger than fp.
  209. ///
  210. /// An example IR generated from compiler-rt/floatdisf.c looks like below:
  211. /// define dso_local float @__floatdisf(i64 noundef %a) local_unnamed_addr #0 {
  212. /// entry:
  213. /// %cmp = icmp eq i64 %a, 0
  214. /// br i1 %cmp, label %return, label %if.end
  215. ///
  216. /// if.end: ; preds = %entry
  217. /// %shr = ashr i64 %a, 63
  218. /// %xor = xor i64 %shr, %a
  219. /// %sub = sub nsw i64 %xor, %shr
  220. /// %0 = tail call i64 @llvm.ctlz.i64(i64 %sub, i1 true), !range !5
  221. /// %cast = trunc i64 %0 to i32
  222. /// %sub1 = sub nuw nsw i32 64, %cast
  223. /// %sub2 = xor i32 %cast, 63
  224. /// %cmp3 = icmp ult i32 %cast, 40
  225. /// br i1 %cmp3, label %if.then4, label %if.else
  226. ///
  227. /// if.then4: ; preds = %if.end
  228. /// switch i32 %sub1, label %sw.default [
  229. /// i32 25, label %sw.bb
  230. /// i32 26, label %sw.epilog
  231. /// ]
  232. ///
  233. /// sw.bb: ; preds = %if.then4
  234. /// %shl = shl i64 %sub, 1
  235. /// br label %sw.epilog
  236. ///
  237. /// sw.default: ; preds = %if.then4
  238. /// %sub5 = sub nsw i64 38, %0
  239. /// %sh_prom = and i64 %sub5, 4294967295
  240. /// %shr6 = lshr i64 %sub, %sh_prom
  241. /// %shr9 = lshr i64 274877906943, %0
  242. /// %and = and i64 %shr9, %sub
  243. /// %cmp10 = icmp ne i64 %and, 0
  244. /// %conv11 = zext i1 %cmp10 to i64
  245. /// %or = or i64 %shr6, %conv11
  246. /// br label %sw.epilog
  247. ///
  248. /// sw.epilog: ; preds = %sw.default, %if.then4, %sw.bb
  249. /// %a.addr.0 = phi i64 [ %or, %sw.default ], [ %sub, %if.then4 ], [ %shl, %sw.bb ]
  250. /// %1 = lshr i64 %a.addr.0, 2
  251. /// %2 = and i64 %1, 1
  252. /// %or16 = or i64 %2, %a.addr.0
  253. /// %inc = add nsw i64 %or16, 1
  254. /// %3 = and i64 %inc, 67108864
  255. /// %tobool.not = icmp eq i64 %3, 0
  256. /// %spec.select.v = select i1 %tobool.not, i64 2, i64 3
  257. /// %spec.select = ashr i64 %inc, %spec.select.v
  258. /// %spec.select56 = select i1 %tobool.not, i32 %sub2, i32 %sub1
  259. /// br label %if.end26
  260. ///
  261. /// if.else: ; preds = %if.end
  262. /// %sub23 = add nuw nsw i64 %0, 4294967256
  263. /// %sh_prom24 = and i64 %sub23, 4294967295
  264. /// %shl25 = shl i64 %sub, %sh_prom24
  265. /// br label %if.end26
  266. ///
  267. /// if.end26: ; preds = %sw.epilog, %if.else
  268. /// %a.addr.1 = phi i64 [ %shl25, %if.else ], [ %spec.select, %sw.epilog ]
  269. /// %e.0 = phi i32 [ %sub2, %if.else ], [ %spec.select56, %sw.epilog ]
  270. /// %conv27 = trunc i64 %shr to i32
  271. /// %and28 = and i32 %conv27, -2147483648
  272. /// %add = shl nuw nsw i32 %e.0, 23
  273. /// %shl29 = add nuw nsw i32 %add, 1065353216
  274. /// %conv31 = trunc i64 %a.addr.1 to i32
  275. /// %and32 = and i32 %conv31, 8388607
  276. /// %or30 = or i32 %and32, %and28
  277. /// %or33 = or i32 %or30, %shl29
  278. /// %4 = bitcast i32 %or33 to float
  279. /// br label %return
  280. ///
  281. /// return: ; preds = %entry, %if.end26
  282. /// %retval.0 = phi float [ %4, %if.end26 ], [ 0.000000e+00, %entry ]
  283. /// ret float %retval.0
  284. /// }
  285. ///
  286. /// Replace integer to fp with generated code.
  287. static void expandIToFP(Instruction *IToFP) {
  288. IRBuilder<> Builder(IToFP);
  289. auto *IntVal = IToFP->getOperand(0);
  290. IntegerType *IntTy = cast<IntegerType>(IntVal->getType());
  291. unsigned BitWidth = IntVal->getType()->getIntegerBitWidth();
  292. unsigned FPMantissaWidth = IToFP->getType()->getFPMantissaWidth() - 1;
  293. // fp80 conversion is implemented by conversion tp fp128 first following
  294. // a fptrunc to fp80.
  295. FPMantissaWidth = FPMantissaWidth == 63 ? 112 : FPMantissaWidth;
  296. // FIXME: As there is no related builtins added in compliler-rt,
  297. // here currently utilized the fp32 <-> fp16 lib calls to implement.
  298. FPMantissaWidth = FPMantissaWidth == 10 ? 23 : FPMantissaWidth;
  299. unsigned FloatWidth = PowerOf2Ceil(FPMantissaWidth);
  300. bool IsSigned = IToFP->getOpcode() == Instruction::SIToFP;
  301. assert(BitWidth > FloatWidth && "Unexpected conversion. expandIToFP() "
  302. "assumes integer width is larger than fp.");
  303. Value *Temp1 =
  304. Builder.CreateShl(Builder.getIntN(BitWidth, 1),
  305. Builder.getIntN(BitWidth, FPMantissaWidth + 3));
  306. BasicBlock *Entry = Builder.GetInsertBlock();
  307. Function *F = Entry->getParent();
  308. Entry->setName(Twine(Entry->getName(), "itofp-entry"));
  309. BasicBlock *End =
  310. Entry->splitBasicBlock(Builder.GetInsertPoint(), "itofp-return");
  311. BasicBlock *IfEnd =
  312. BasicBlock::Create(Builder.getContext(), "itofp-if-end", F, End);
  313. BasicBlock *IfThen4 =
  314. BasicBlock::Create(Builder.getContext(), "itofp-if-then4", F, End);
  315. BasicBlock *SwBB =
  316. BasicBlock::Create(Builder.getContext(), "itofp-sw-bb", F, End);
  317. BasicBlock *SwDefault =
  318. BasicBlock::Create(Builder.getContext(), "itofp-sw-default", F, End);
  319. BasicBlock *SwEpilog =
  320. BasicBlock::Create(Builder.getContext(), "itofp-sw-epilog", F, End);
  321. BasicBlock *IfThen20 =
  322. BasicBlock::Create(Builder.getContext(), "itofp-if-then20", F, End);
  323. BasicBlock *IfElse =
  324. BasicBlock::Create(Builder.getContext(), "itofp-if-else", F, End);
  325. BasicBlock *IfEnd26 =
  326. BasicBlock::Create(Builder.getContext(), "itofp-if-end26", F, End);
  327. Entry->getTerminator()->eraseFromParent();
  328. Function *CTLZ =
  329. Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz, IntTy);
  330. ConstantInt *True = Builder.getTrue();
  331. // entry:
  332. Builder.SetInsertPoint(Entry);
  333. Value *Cmp = Builder.CreateICmpEQ(IntVal, ConstantInt::getSigned(IntTy, 0));
  334. Builder.CreateCondBr(Cmp, End, IfEnd);
  335. // if.end:
  336. Builder.SetInsertPoint(IfEnd);
  337. Value *Shr =
  338. Builder.CreateAShr(IntVal, Builder.getIntN(BitWidth, BitWidth - 1));
  339. Value *Xor = Builder.CreateXor(Shr, IntVal);
  340. Value *Sub = Builder.CreateSub(Xor, Shr);
  341. Value *Call = Builder.CreateCall(CTLZ, {IsSigned ? Sub : IntVal, True});
  342. Value *Cast = Builder.CreateTrunc(Call, Builder.getInt32Ty());
  343. int BitWidthNew = FloatWidth == 128 ? BitWidth : 32;
  344. Value *Sub1 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth),
  345. FloatWidth == 128 ? Call : Cast);
  346. Value *Sub2 = Builder.CreateSub(Builder.getIntN(BitWidthNew, BitWidth - 1),
  347. FloatWidth == 128 ? Call : Cast);
  348. Value *Cmp3 = Builder.CreateICmpSGT(
  349. Sub2, Builder.getIntN(BitWidthNew, FPMantissaWidth + 1));
  350. Builder.CreateCondBr(Cmp3, IfThen4, IfElse);
  351. // if.then4:
  352. Builder.SetInsertPoint(IfThen4);
  353. llvm::SwitchInst *SI = Builder.CreateSwitch(Sub1, SwDefault);
  354. SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 2), SwBB);
  355. SI->addCase(Builder.getIntN(BitWidthNew, FPMantissaWidth + 3), SwEpilog);
  356. // sw.bb:
  357. Builder.SetInsertPoint(SwBB);
  358. Value *Shl =
  359. Builder.CreateShl(IsSigned ? Sub : IntVal, Builder.getIntN(BitWidth, 1));
  360. Builder.CreateBr(SwEpilog);
  361. // sw.default:
  362. Builder.SetInsertPoint(SwDefault);
  363. Value *Sub5 = Builder.CreateSub(
  364. Builder.getIntN(BitWidthNew, BitWidth - FPMantissaWidth - 3),
  365. FloatWidth == 128 ? Call : Cast);
  366. Value *ShProm = Builder.CreateZExt(Sub5, IntTy);
  367. Value *Shr6 = Builder.CreateLShr(IsSigned ? Sub : IntVal,
  368. FloatWidth == 128 ? Sub5 : ShProm);
  369. Value *Sub8 =
  370. Builder.CreateAdd(FloatWidth == 128 ? Call : Cast,
  371. Builder.getIntN(BitWidthNew, FPMantissaWidth + 3));
  372. Value *ShProm9 = Builder.CreateZExt(Sub8, IntTy);
  373. Value *Shr9 = Builder.CreateLShr(ConstantInt::getSigned(IntTy, -1),
  374. FloatWidth == 128 ? Sub8 : ShProm9);
  375. Value *And = Builder.CreateAnd(Shr9, IsSigned ? Sub : IntVal);
  376. Value *Cmp10 = Builder.CreateICmpNE(And, Builder.getIntN(BitWidth, 0));
  377. Value *Conv11 = Builder.CreateZExt(Cmp10, IntTy);
  378. Value *Or = Builder.CreateOr(Shr6, Conv11);
  379. Builder.CreateBr(SwEpilog);
  380. // sw.epilog:
  381. Builder.SetInsertPoint(SwEpilog);
  382. PHINode *AAddr0 = Builder.CreatePHI(IntTy, 3);
  383. AAddr0->addIncoming(Or, SwDefault);
  384. AAddr0->addIncoming(IsSigned ? Sub : IntVal, IfThen4);
  385. AAddr0->addIncoming(Shl, SwBB);
  386. Value *A0 = Builder.CreateTrunc(AAddr0, Builder.getInt32Ty());
  387. Value *A1 = Builder.CreateLShr(A0, Builder.getIntN(32, 2));
  388. Value *A2 = Builder.CreateAnd(A1, Builder.getIntN(32, 1));
  389. Value *Conv16 = Builder.CreateZExt(A2, IntTy);
  390. Value *Or17 = Builder.CreateOr(AAddr0, Conv16);
  391. Value *Inc = Builder.CreateAdd(Or17, Builder.getIntN(BitWidth, 1));
  392. Value *Shr18 = nullptr;
  393. if (IsSigned)
  394. Shr18 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 2));
  395. else
  396. Shr18 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 2));
  397. Value *A3 = Builder.CreateAnd(Inc, Temp1, "a3");
  398. Value *PosOrNeg = Builder.CreateICmpEQ(A3, Builder.getIntN(BitWidth, 0));
  399. Value *ExtractT60 = Builder.CreateTrunc(Shr18, Builder.getIntNTy(FloatWidth));
  400. Value *Extract63 = Builder.CreateLShr(Shr18, Builder.getIntN(BitWidth, 32));
  401. Value *ExtractT64 = nullptr;
  402. if (FloatWidth > 80)
  403. ExtractT64 = Builder.CreateTrunc(Sub2, Builder.getInt64Ty());
  404. else
  405. ExtractT64 = Builder.CreateTrunc(Extract63, Builder.getInt32Ty());
  406. Builder.CreateCondBr(PosOrNeg, IfEnd26, IfThen20);
  407. // if.then20
  408. Builder.SetInsertPoint(IfThen20);
  409. Value *Shr21 = nullptr;
  410. if (IsSigned)
  411. Shr21 = Builder.CreateAShr(Inc, Builder.getIntN(BitWidth, 3));
  412. else
  413. Shr21 = Builder.CreateLShr(Inc, Builder.getIntN(BitWidth, 3));
  414. Value *ExtractT = Builder.CreateTrunc(Shr21, Builder.getIntNTy(FloatWidth));
  415. Value *Extract = Builder.CreateLShr(Shr21, Builder.getIntN(BitWidth, 32));
  416. Value *ExtractT62 = nullptr;
  417. if (FloatWidth > 80)
  418. ExtractT62 = Builder.CreateTrunc(Sub1, Builder.getIntNTy(64));
  419. else
  420. ExtractT62 = Builder.CreateTrunc(Extract, Builder.getIntNTy(32));
  421. Builder.CreateBr(IfEnd26);
  422. // if.else:
  423. Builder.SetInsertPoint(IfElse);
  424. Value *Sub24 = Builder.CreateAdd(
  425. FloatWidth == 128 ? Call : Cast,
  426. ConstantInt::getSigned(Builder.getIntNTy(BitWidthNew),
  427. -(BitWidth - FPMantissaWidth - 1)));
  428. Value *ShProm25 = Builder.CreateZExt(Sub24, IntTy);
  429. Value *Shl26 = Builder.CreateShl(IsSigned ? Sub : IntVal,
  430. FloatWidth == 128 ? Sub24 : ShProm25);
  431. Value *ExtractT61 = Builder.CreateTrunc(Shl26, Builder.getIntNTy(FloatWidth));
  432. Value *Extract65 = Builder.CreateLShr(Shl26, Builder.getIntN(BitWidth, 32));
  433. Value *ExtractT66 = nullptr;
  434. if (FloatWidth > 80)
  435. ExtractT66 = Builder.CreateTrunc(Sub2, Builder.getIntNTy(64));
  436. else
  437. ExtractT66 = Builder.CreateTrunc(Extract65, Builder.getInt32Ty());
  438. Builder.CreateBr(IfEnd26);
  439. // if.end26:
  440. Builder.SetInsertPoint(IfEnd26);
  441. PHINode *AAddr1Off0 = Builder.CreatePHI(Builder.getIntNTy(FloatWidth), 3);
  442. AAddr1Off0->addIncoming(ExtractT, IfThen20);
  443. AAddr1Off0->addIncoming(ExtractT60, SwEpilog);
  444. AAddr1Off0->addIncoming(ExtractT61, IfElse);
  445. PHINode *AAddr1Off32 = nullptr;
  446. if (FloatWidth > 32) {
  447. AAddr1Off32 =
  448. Builder.CreatePHI(Builder.getIntNTy(FloatWidth > 80 ? 64 : 32), 3);
  449. AAddr1Off32->addIncoming(ExtractT62, IfThen20);
  450. AAddr1Off32->addIncoming(ExtractT64, SwEpilog);
  451. AAddr1Off32->addIncoming(ExtractT66, IfElse);
  452. }
  453. PHINode *E0 = nullptr;
  454. if (FloatWidth <= 80) {
  455. E0 = Builder.CreatePHI(Builder.getIntNTy(BitWidthNew), 3);
  456. E0->addIncoming(Sub1, IfThen20);
  457. E0->addIncoming(Sub2, SwEpilog);
  458. E0->addIncoming(Sub2, IfElse);
  459. }
  460. Value *And29 = nullptr;
  461. if (FloatWidth > 80) {
  462. Value *Temp2 = Builder.CreateShl(Builder.getIntN(BitWidth, 1),
  463. Builder.getIntN(BitWidth, 63));
  464. And29 = Builder.CreateAnd(Shr, Temp2, "and29");
  465. } else {
  466. Value *Conv28 = Builder.CreateTrunc(Shr, Builder.getIntNTy(32));
  467. And29 = Builder.CreateAnd(
  468. Conv28, ConstantInt::getSigned(Builder.getIntNTy(32), 0x80000000));
  469. }
  470. unsigned TempMod = FPMantissaWidth % 32;
  471. Value *And34 = nullptr;
  472. Value *Shl30 = nullptr;
  473. if (FloatWidth > 80) {
  474. TempMod += 32;
  475. Value *Add = Builder.CreateShl(AAddr1Off32, Builder.getIntN(64, TempMod));
  476. Shl30 = Builder.CreateAdd(
  477. Add,
  478. Builder.getIntN(64, ((1ull << (62ull - TempMod)) - 1ull) << TempMod));
  479. And34 = Builder.CreateZExt(Shl30, Builder.getIntNTy(128));
  480. } else {
  481. Value *Add = Builder.CreateShl(E0, Builder.getIntN(32, TempMod));
  482. Shl30 = Builder.CreateAdd(
  483. Add, Builder.getIntN(32, ((1 << (30 - TempMod)) - 1) << TempMod));
  484. And34 = Builder.CreateAnd(FloatWidth > 32 ? AAddr1Off32 : AAddr1Off0,
  485. Builder.getIntN(32, (1 << TempMod) - 1));
  486. }
  487. Value *Or35 = nullptr;
  488. if (FloatWidth > 80) {
  489. Value *And29Trunc = Builder.CreateTrunc(And29, Builder.getIntNTy(128));
  490. Value *Or31 = Builder.CreateOr(And29Trunc, And34);
  491. Value *Or34 = Builder.CreateShl(Or31, Builder.getIntN(128, 64));
  492. Value *Temp3 = Builder.CreateShl(Builder.getIntN(128, 1),
  493. Builder.getIntN(128, FPMantissaWidth));
  494. Value *Temp4 = Builder.CreateSub(Temp3, Builder.getIntN(128, 1));
  495. Value *A6 = Builder.CreateAnd(AAddr1Off0, Temp4);
  496. Or35 = Builder.CreateOr(Or34, A6);
  497. } else {
  498. Value *Or31 = Builder.CreateOr(And34, And29);
  499. Or35 = Builder.CreateOr(IsSigned ? Or31 : And34, Shl30);
  500. }
  501. Value *A4 = nullptr;
  502. if (IToFP->getType()->isDoubleTy()) {
  503. Value *ZExt1 = Builder.CreateZExt(Or35, Builder.getIntNTy(FloatWidth));
  504. Value *Shl1 = Builder.CreateShl(ZExt1, Builder.getIntN(FloatWidth, 32));
  505. Value *And1 =
  506. Builder.CreateAnd(AAddr1Off0, Builder.getIntN(FloatWidth, 0xFFFFFFFF));
  507. Value *Or1 = Builder.CreateOr(Shl1, And1);
  508. A4 = Builder.CreateBitCast(Or1, IToFP->getType());
  509. } else if (IToFP->getType()->isX86_FP80Ty()) {
  510. Value *A40 =
  511. Builder.CreateBitCast(Or35, Type::getFP128Ty(Builder.getContext()));
  512. A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
  513. } else if (IToFP->getType()->isHalfTy()) {
  514. // Deal with "half" situation. This is a workaround since we don't have
  515. // floattihf.c currently as referring.
  516. Value *A40 =
  517. Builder.CreateBitCast(Or35, Type::getFloatTy(Builder.getContext()));
  518. A4 = Builder.CreateFPTrunc(A40, IToFP->getType());
  519. } else // float type
  520. A4 = Builder.CreateBitCast(Or35, IToFP->getType());
  521. Builder.CreateBr(End);
  522. // return:
  523. Builder.SetInsertPoint(End, End->begin());
  524. PHINode *Retval0 = Builder.CreatePHI(IToFP->getType(), 2);
  525. Retval0->addIncoming(A4, IfEnd26);
  526. Retval0->addIncoming(ConstantFP::getZero(IToFP->getType(), false), Entry);
  527. IToFP->replaceAllUsesWith(Retval0);
  528. IToFP->dropAllReferences();
  529. IToFP->eraseFromParent();
  530. }
  531. static bool runImpl(Function &F, const TargetLowering &TLI) {
  532. SmallVector<Instruction *, 4> Replace;
  533. bool Modified = false;
  534. unsigned MaxLegalFpConvertBitWidth =
  535. TLI.getMaxLargeFPConvertBitWidthSupported();
  536. if (ExpandFpConvertBits != llvm::IntegerType::MAX_INT_BITS)
  537. MaxLegalFpConvertBitWidth = ExpandFpConvertBits;
  538. if (MaxLegalFpConvertBitWidth >= llvm::IntegerType::MAX_INT_BITS)
  539. return false;
  540. for (auto &I : instructions(F)) {
  541. switch (I.getOpcode()) {
  542. case Instruction::FPToUI:
  543. case Instruction::FPToSI: {
  544. // TODO: This pass doesn't handle vectors.
  545. if (I.getOperand(0)->getType()->isVectorTy())
  546. continue;
  547. auto *IntTy = dyn_cast<IntegerType>(I.getType());
  548. if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
  549. continue;
  550. Replace.push_back(&I);
  551. Modified = true;
  552. break;
  553. }
  554. case Instruction::UIToFP:
  555. case Instruction::SIToFP: {
  556. // TODO: This pass doesn't handle vectors.
  557. if (I.getOperand(0)->getType()->isVectorTy())
  558. continue;
  559. auto *IntTy = dyn_cast<IntegerType>(I.getOperand(0)->getType());
  560. if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth)
  561. continue;
  562. Replace.push_back(&I);
  563. Modified = true;
  564. break;
  565. }
  566. default:
  567. break;
  568. }
  569. }
  570. if (Replace.empty())
  571. return false;
  572. while (!Replace.empty()) {
  573. Instruction *I = Replace.pop_back_val();
  574. if (I->getOpcode() == Instruction::FPToUI ||
  575. I->getOpcode() == Instruction::FPToSI) {
  576. expandFPToI(I);
  577. } else {
  578. expandIToFP(I);
  579. }
  580. }
  581. return Modified;
  582. }
  583. namespace {
  584. class ExpandLargeFpConvertLegacyPass : public FunctionPass {
  585. public:
  586. static char ID;
  587. ExpandLargeFpConvertLegacyPass() : FunctionPass(ID) {
  588. initializeExpandLargeFpConvertLegacyPassPass(
  589. *PassRegistry::getPassRegistry());
  590. }
  591. bool runOnFunction(Function &F) override {
  592. auto *TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
  593. auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
  594. return runImpl(F, *TLI);
  595. }
  596. void getAnalysisUsage(AnalysisUsage &AU) const override {
  597. AU.addRequired<TargetPassConfig>();
  598. AU.addPreserved<AAResultsWrapperPass>();
  599. AU.addPreserved<GlobalsAAWrapperPass>();
  600. }
  601. };
  602. } // namespace
  603. char ExpandLargeFpConvertLegacyPass::ID = 0;
  604. INITIALIZE_PASS_BEGIN(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
  605. "Expand large fp convert", false, false)
  606. INITIALIZE_PASS_END(ExpandLargeFpConvertLegacyPass, "expand-large-fp-convert",
  607. "Expand large fp convert", false, false)
  608. FunctionPass *llvm::createExpandLargeFpConvertPass() {
  609. return new ExpandLargeFpConvertLegacyPass();
  610. }