PPCBranchSelector.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413
  1. //===-- PPCBranchSelector.cpp - Emit long conditional branches ------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass that scans a machine function to determine which
  10. // conditional branches need more than 16 bits of displacement to reach their
  11. // target basic block. It does this in two passes; a calculation of basic block
  12. // positions pass, and a branch pseudo op to machine branch opcode pass. This
  13. // pass should be run last, just before the assembly printer.
  14. //
  15. //===----------------------------------------------------------------------===//
  16. #include "MCTargetDesc/PPCPredicates.h"
  17. #include "PPC.h"
  18. #include "PPCInstrBuilder.h"
  19. #include "PPCInstrInfo.h"
  20. #include "PPCSubtarget.h"
  21. #include "llvm/ADT/Statistic.h"
  22. #include "llvm/CodeGen/MachineFunctionPass.h"
  23. #include "llvm/CodeGen/MachineRegisterInfo.h"
  24. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  25. #include "llvm/Support/MathExtras.h"
  26. #include "llvm/Target/TargetMachine.h"
  27. #include <algorithm>
  28. using namespace llvm;
  29. #define DEBUG_TYPE "ppc-branch-select"
  30. STATISTIC(NumExpanded, "Number of branches expanded to long format");
  31. STATISTIC(NumPrefixed, "Number of prefixed instructions");
  32. STATISTIC(NumPrefixedAligned,
  33. "Number of prefixed instructions that have been aligned");
  34. namespace {
  35. struct PPCBSel : public MachineFunctionPass {
  36. static char ID;
  37. PPCBSel() : MachineFunctionPass(ID) {
  38. initializePPCBSelPass(*PassRegistry::getPassRegistry());
  39. }
  40. // The sizes of the basic blocks in the function (the first
  41. // element of the pair); the second element of the pair is the amount of the
  42. // size that is due to potential padding.
  43. std::vector<std::pair<unsigned, unsigned>> BlockSizes;
  44. // The first block number which has imprecise instruction address.
  45. int FirstImpreciseBlock = -1;
  46. unsigned GetAlignmentAdjustment(MachineBasicBlock &MBB, unsigned Offset);
  47. unsigned ComputeBlockSizes(MachineFunction &Fn);
  48. void modifyAdjustment(MachineFunction &Fn);
  49. int computeBranchSize(MachineFunction &Fn,
  50. const MachineBasicBlock *Src,
  51. const MachineBasicBlock *Dest,
  52. unsigned BrOffset);
  53. bool runOnMachineFunction(MachineFunction &Fn) override;
  54. MachineFunctionProperties getRequiredProperties() const override {
  55. return MachineFunctionProperties().set(
  56. MachineFunctionProperties::Property::NoVRegs);
  57. }
  58. StringRef getPassName() const override { return "PowerPC Branch Selector"; }
  59. };
  60. char PPCBSel::ID = 0;
  61. }
  62. INITIALIZE_PASS(PPCBSel, "ppc-branch-select", "PowerPC Branch Selector",
  63. false, false)
  64. /// createPPCBranchSelectionPass - returns an instance of the Branch Selection
  65. /// Pass
  66. ///
  67. FunctionPass *llvm::createPPCBranchSelectionPass() {
  68. return new PPCBSel();
  69. }
  70. /// In order to make MBB aligned, we need to add an adjustment value to the
  71. /// original Offset.
  72. unsigned PPCBSel::GetAlignmentAdjustment(MachineBasicBlock &MBB,
  73. unsigned Offset) {
  74. const Align Alignment = MBB.getAlignment();
  75. if (Alignment == Align(1))
  76. return 0;
  77. const Align ParentAlign = MBB.getParent()->getAlignment();
  78. if (Alignment <= ParentAlign)
  79. return offsetToAlignment(Offset, Alignment);
  80. // The alignment of this MBB is larger than the function's alignment, so we
  81. // can't tell whether or not it will insert nops. Assume that it will.
  82. if (FirstImpreciseBlock < 0)
  83. FirstImpreciseBlock = MBB.getNumber();
  84. return Alignment.value() + offsetToAlignment(Offset, Alignment);
  85. }
  86. /// We need to be careful about the offset of the first block in the function
  87. /// because it might not have the function's alignment. This happens because,
  88. /// under the ELFv2 ABI, for functions which require a TOC pointer, we add a
  89. /// two-instruction sequence to the start of the function.
  90. /// Note: This needs to be synchronized with the check in
  91. /// PPCLinuxAsmPrinter::EmitFunctionBodyStart.
  92. static inline unsigned GetInitialOffset(MachineFunction &Fn) {
  93. unsigned InitialOffset = 0;
  94. if (Fn.getSubtarget<PPCSubtarget>().isELFv2ABI() &&
  95. !Fn.getRegInfo().use_empty(PPC::X2))
  96. InitialOffset = 8;
  97. return InitialOffset;
  98. }
  99. /// Measure each MBB and compute a size for the entire function.
  100. unsigned PPCBSel::ComputeBlockSizes(MachineFunction &Fn) {
  101. const PPCInstrInfo *TII =
  102. static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
  103. unsigned FuncSize = GetInitialOffset(Fn);
  104. for (MachineBasicBlock &MBB : Fn) {
  105. // The end of the previous block may have extra nops if this block has an
  106. // alignment requirement.
  107. if (MBB.getNumber() > 0) {
  108. unsigned AlignExtra = GetAlignmentAdjustment(MBB, FuncSize);
  109. auto &BS = BlockSizes[MBB.getNumber()-1];
  110. BS.first += AlignExtra;
  111. BS.second = AlignExtra;
  112. FuncSize += AlignExtra;
  113. }
  114. unsigned BlockSize = 0;
  115. unsigned UnalignedBytesRemaining = 0;
  116. for (MachineInstr &MI : MBB) {
  117. unsigned MINumBytes = TII->getInstSizeInBytes(MI);
  118. if (MI.isInlineAsm() && (FirstImpreciseBlock < 0))
  119. FirstImpreciseBlock = MBB.getNumber();
  120. if (TII->isPrefixed(MI.getOpcode())) {
  121. NumPrefixed++;
  122. // All 8 byte instructions may require alignment. Each 8 byte
  123. // instruction may be aligned by another 4 bytes.
  124. // This means that an 8 byte instruction may require 12 bytes
  125. // (8 for the instruction itself and 4 for the alignment nop).
  126. // This will happen if an 8 byte instruction can be aligned to 64 bytes
  127. // by only adding a 4 byte nop.
  128. // We don't know the alignment at this point in the code so we have to
  129. // adopt a more pessimistic approach. If an instruction may need
  130. // alignment we assume that it does need alignment and add 4 bytes to
  131. // it. As a result we may end up with more long branches than before
  132. // but we are in the safe position where if we need a long branch we
  133. // have one.
  134. // The if statement checks to make sure that two 8 byte instructions
  135. // are at least 64 bytes away from each other. It is not possible for
  136. // two instructions that both need alignment to be within 64 bytes of
  137. // each other.
  138. if (!UnalignedBytesRemaining) {
  139. BlockSize += 4;
  140. UnalignedBytesRemaining = 60;
  141. NumPrefixedAligned++;
  142. }
  143. }
  144. UnalignedBytesRemaining -= std::min(UnalignedBytesRemaining, MINumBytes);
  145. BlockSize += MINumBytes;
  146. }
  147. BlockSizes[MBB.getNumber()].first = BlockSize;
  148. FuncSize += BlockSize;
  149. }
  150. return FuncSize;
  151. }
  152. /// Modify the basic block align adjustment.
  153. void PPCBSel::modifyAdjustment(MachineFunction &Fn) {
  154. unsigned Offset = GetInitialOffset(Fn);
  155. for (MachineBasicBlock &MBB : Fn) {
  156. if (MBB.getNumber() > 0) {
  157. auto &BS = BlockSizes[MBB.getNumber()-1];
  158. BS.first -= BS.second;
  159. Offset -= BS.second;
  160. unsigned AlignExtra = GetAlignmentAdjustment(MBB, Offset);
  161. BS.first += AlignExtra;
  162. BS.second = AlignExtra;
  163. Offset += AlignExtra;
  164. }
  165. Offset += BlockSizes[MBB.getNumber()].first;
  166. }
  167. }
  168. /// Determine the offset from the branch in Src block to the Dest block.
  169. /// BrOffset is the offset of the branch instruction inside Src block.
  170. int PPCBSel::computeBranchSize(MachineFunction &Fn,
  171. const MachineBasicBlock *Src,
  172. const MachineBasicBlock *Dest,
  173. unsigned BrOffset) {
  174. int BranchSize;
  175. Align MaxAlign = Align(4);
  176. bool NeedExtraAdjustment = false;
  177. if (Dest->getNumber() <= Src->getNumber()) {
  178. // If this is a backwards branch, the delta is the offset from the
  179. // start of this block to this branch, plus the sizes of all blocks
  180. // from this block to the dest.
  181. BranchSize = BrOffset;
  182. MaxAlign = std::max(MaxAlign, Src->getAlignment());
  183. int DestBlock = Dest->getNumber();
  184. BranchSize += BlockSizes[DestBlock].first;
  185. for (unsigned i = DestBlock+1, e = Src->getNumber(); i < e; ++i) {
  186. BranchSize += BlockSizes[i].first;
  187. MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment());
  188. }
  189. NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
  190. (DestBlock >= FirstImpreciseBlock);
  191. } else {
  192. // Otherwise, add the size of the blocks between this block and the
  193. // dest to the number of bytes left in this block.
  194. unsigned StartBlock = Src->getNumber();
  195. BranchSize = BlockSizes[StartBlock].first - BrOffset;
  196. MaxAlign = std::max(MaxAlign, Dest->getAlignment());
  197. for (unsigned i = StartBlock+1, e = Dest->getNumber(); i != e; ++i) {
  198. BranchSize += BlockSizes[i].first;
  199. MaxAlign = std::max(MaxAlign, Fn.getBlockNumbered(i)->getAlignment());
  200. }
  201. NeedExtraAdjustment = (FirstImpreciseBlock >= 0) &&
  202. (Src->getNumber() >= FirstImpreciseBlock);
  203. }
  204. // We tend to over estimate code size due to large alignment and
  205. // inline assembly. Usually it causes larger computed branch offset.
  206. // But sometimes it may also causes smaller computed branch offset
  207. // than actual branch offset. If the offset is close to the limit of
  208. // encoding, it may cause problem at run time.
  209. // Following is a simplified example.
  210. //
  211. // actual estimated
  212. // address address
  213. // ...
  214. // bne Far 100 10c
  215. // .p2align 4
  216. // Near: 110 110
  217. // ...
  218. // Far: 8108 8108
  219. //
  220. // Actual offset: 0x8108 - 0x100 = 0x8008
  221. // Computed offset: 0x8108 - 0x10c = 0x7ffc
  222. //
  223. // This example also shows when we can get the largest gap between
  224. // estimated offset and actual offset. If there is an aligned block
  225. // ABB between branch and target, assume its alignment is <align>
  226. // bits. Now consider the accumulated function size FSIZE till the end
  227. // of previous block PBB. If the estimated FSIZE is multiple of
  228. // 2^<align>, we don't need any padding for the estimated address of
  229. // ABB. If actual FSIZE at the end of PBB is 4 bytes more than
  230. // multiple of 2^<align>, then we need (2^<align> - 4) bytes of
  231. // padding. It also means the actual branch offset is (2^<align> - 4)
  232. // larger than computed offset. Other actual FSIZE needs less padding
  233. // bytes, so causes smaller gap between actual and computed offset.
  234. //
  235. // On the other hand, if the inline asm or large alignment occurs
  236. // between the branch block and destination block, the estimated address
  237. // can be <delta> larger than actual address. If padding bytes are
  238. // needed for a later aligned block, the actual number of padding bytes
  239. // is at most <delta> more than estimated padding bytes. So the actual
  240. // aligned block address is less than or equal to the estimated aligned
  241. // block address. So the actual branch offset is less than or equal to
  242. // computed branch offset.
  243. //
  244. // The computed offset is at most ((1 << alignment) - 4) bytes smaller
  245. // than actual offset. So we add this number to the offset for safety.
  246. if (NeedExtraAdjustment)
  247. BranchSize += MaxAlign.value() - 4;
  248. return BranchSize;
  249. }
  250. bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
  251. const PPCInstrInfo *TII =
  252. static_cast<const PPCInstrInfo *>(Fn.getSubtarget().getInstrInfo());
  253. // Give the blocks of the function a dense, in-order, numbering.
  254. Fn.RenumberBlocks();
  255. BlockSizes.resize(Fn.getNumBlockIDs());
  256. FirstImpreciseBlock = -1;
  257. // Measure each MBB and compute a size for the entire function.
  258. unsigned FuncSize = ComputeBlockSizes(Fn);
  259. // If the entire function is smaller than the displacement of a branch field,
  260. // we know we don't need to shrink any branches in this function. This is a
  261. // common case.
  262. if (FuncSize < (1 << 15)) {
  263. BlockSizes.clear();
  264. return false;
  265. }
  266. // For each conditional branch, if the offset to its destination is larger
  267. // than the offset field allows, transform it into a long branch sequence
  268. // like this:
  269. // short branch:
  270. // bCC MBB
  271. // long branch:
  272. // b!CC $PC+8
  273. // b MBB
  274. //
  275. bool MadeChange = true;
  276. bool EverMadeChange = false;
  277. while (MadeChange) {
  278. // Iteratively expand branches until we reach a fixed point.
  279. MadeChange = false;
  280. for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
  281. ++MFI) {
  282. MachineBasicBlock &MBB = *MFI;
  283. unsigned MBBStartOffset = 0;
  284. for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
  285. I != E; ++I) {
  286. MachineBasicBlock *Dest = nullptr;
  287. if (I->getOpcode() == PPC::BCC && !I->getOperand(2).isImm())
  288. Dest = I->getOperand(2).getMBB();
  289. else if ((I->getOpcode() == PPC::BC || I->getOpcode() == PPC::BCn) &&
  290. !I->getOperand(1).isImm())
  291. Dest = I->getOperand(1).getMBB();
  292. else if ((I->getOpcode() == PPC::BDNZ8 || I->getOpcode() == PPC::BDNZ ||
  293. I->getOpcode() == PPC::BDZ8 || I->getOpcode() == PPC::BDZ) &&
  294. !I->getOperand(0).isImm())
  295. Dest = I->getOperand(0).getMBB();
  296. if (!Dest) {
  297. MBBStartOffset += TII->getInstSizeInBytes(*I);
  298. continue;
  299. }
  300. // Determine the offset from the current branch to the destination
  301. // block.
  302. int BranchSize = computeBranchSize(Fn, &MBB, Dest, MBBStartOffset);
  303. // If this branch is in range, ignore it.
  304. if (isInt<16>(BranchSize)) {
  305. MBBStartOffset += 4;
  306. continue;
  307. }
  308. // Otherwise, we have to expand it to a long branch.
  309. MachineInstr &OldBranch = *I;
  310. DebugLoc dl = OldBranch.getDebugLoc();
  311. if (I->getOpcode() == PPC::BCC) {
  312. // The BCC operands are:
  313. // 0. PPC branch predicate
  314. // 1. CR register
  315. // 2. Target MBB
  316. PPC::Predicate Pred = (PPC::Predicate)I->getOperand(0).getImm();
  317. Register CRReg = I->getOperand(1).getReg();
  318. // Jump over the uncond branch inst (i.e. $PC+8) on opposite condition.
  319. BuildMI(MBB, I, dl, TII->get(PPC::BCC))
  320. .addImm(PPC::InvertPredicate(Pred)).addReg(CRReg).addImm(2);
  321. } else if (I->getOpcode() == PPC::BC) {
  322. Register CRBit = I->getOperand(0).getReg();
  323. BuildMI(MBB, I, dl, TII->get(PPC::BCn)).addReg(CRBit).addImm(2);
  324. } else if (I->getOpcode() == PPC::BCn) {
  325. Register CRBit = I->getOperand(0).getReg();
  326. BuildMI(MBB, I, dl, TII->get(PPC::BC)).addReg(CRBit).addImm(2);
  327. } else if (I->getOpcode() == PPC::BDNZ) {
  328. BuildMI(MBB, I, dl, TII->get(PPC::BDZ)).addImm(2);
  329. } else if (I->getOpcode() == PPC::BDNZ8) {
  330. BuildMI(MBB, I, dl, TII->get(PPC::BDZ8)).addImm(2);
  331. } else if (I->getOpcode() == PPC::BDZ) {
  332. BuildMI(MBB, I, dl, TII->get(PPC::BDNZ)).addImm(2);
  333. } else if (I->getOpcode() == PPC::BDZ8) {
  334. BuildMI(MBB, I, dl, TII->get(PPC::BDNZ8)).addImm(2);
  335. } else {
  336. llvm_unreachable("Unhandled branch type!");
  337. }
  338. // Uncond branch to the real destination.
  339. I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
  340. // Remove the old branch from the function.
  341. OldBranch.eraseFromParent();
  342. // Remember that this instruction is 8-bytes, increase the size of the
  343. // block by 4, remember to iterate.
  344. BlockSizes[MBB.getNumber()].first += 4;
  345. MBBStartOffset += 8;
  346. ++NumExpanded;
  347. MadeChange = true;
  348. }
  349. }
  350. if (MadeChange) {
  351. // If we're going to iterate again, make sure we've updated our
  352. // padding-based contributions to the block sizes.
  353. modifyAdjustment(Fn);
  354. }
  355. EverMadeChange |= MadeChange;
  356. }
  357. BlockSizes.clear();
  358. return EverMadeChange;
  359. }