Thumb2SizeReduction.cpp 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164
  1. //===-- Thumb2SizeReduction.cpp - Thumb2 code size reduction pass -*- C++ -*-=//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "ARM.h"
  9. #include "ARMBaseInstrInfo.h"
  10. #include "ARMSubtarget.h"
  11. #include "MCTargetDesc/ARMBaseInfo.h"
  12. #include "Thumb2InstrInfo.h"
  13. #include "llvm/ADT/DenseMap.h"
  14. #include "llvm/ADT/PostOrderIterator.h"
  15. #include "llvm/ADT/STLExtras.h"
  16. #include "llvm/ADT/SmallSet.h"
  17. #include "llvm/ADT/SmallVector.h"
  18. #include "llvm/ADT/Statistic.h"
  19. #include "llvm/ADT/StringRef.h"
  20. #include "llvm/CodeGen/MachineBasicBlock.h"
  21. #include "llvm/CodeGen/MachineFunction.h"
  22. #include "llvm/CodeGen/MachineFunctionPass.h"
  23. #include "llvm/CodeGen/MachineInstr.h"
  24. #include "llvm/CodeGen/MachineInstrBuilder.h"
  25. #include "llvm/CodeGen/MachineOperand.h"
  26. #include "llvm/CodeGen/TargetInstrInfo.h"
  27. #include "llvm/IR/DebugLoc.h"
  28. #include "llvm/IR/Function.h"
  29. #include "llvm/MC/MCAsmInfo.h"
  30. #include "llvm/MC/MCInstrDesc.h"
  31. #include "llvm/MC/MCRegisterInfo.h"
  32. #include "llvm/Support/CommandLine.h"
  33. #include "llvm/Support/Compiler.h"
  34. #include "llvm/Support/Debug.h"
  35. #include "llvm/Support/ErrorHandling.h"
  36. #include "llvm/Support/raw_ostream.h"
  37. #include <algorithm>
  38. #include <cassert>
  39. #include <cstdint>
  40. #include <functional>
  41. #include <iterator>
  42. #include <utility>
  43. using namespace llvm;
  44. #define DEBUG_TYPE "thumb2-reduce-size"
  45. #define THUMB2_SIZE_REDUCE_NAME "Thumb2 instruction size reduce pass"
  46. STATISTIC(NumNarrows, "Number of 32-bit instrs reduced to 16-bit ones");
  47. STATISTIC(Num2Addrs, "Number of 32-bit instrs reduced to 2addr 16-bit ones");
  48. STATISTIC(NumLdSts, "Number of 32-bit load / store reduced to 16-bit ones");
  49. static cl::opt<int> ReduceLimit("t2-reduce-limit",
  50. cl::init(-1), cl::Hidden);
  51. static cl::opt<int> ReduceLimit2Addr("t2-reduce-limit2",
  52. cl::init(-1), cl::Hidden);
  53. static cl::opt<int> ReduceLimitLdSt("t2-reduce-limit3",
  54. cl::init(-1), cl::Hidden);
  55. namespace {
  56. /// ReduceTable - A static table with information on mapping from wide
  57. /// opcodes to narrow
  58. struct ReduceEntry {
  59. uint16_t WideOpc; // Wide opcode
  60. uint16_t NarrowOpc1; // Narrow opcode to transform to
  61. uint16_t NarrowOpc2; // Narrow opcode when it's two-address
  62. uint8_t Imm1Limit; // Limit of immediate field (bits)
  63. uint8_t Imm2Limit; // Limit of immediate field when it's two-address
  64. unsigned LowRegs1 : 1; // Only possible if low-registers are used
  65. unsigned LowRegs2 : 1; // Only possible if low-registers are used (2addr)
  66. unsigned PredCC1 : 2; // 0 - If predicated, cc is on and vice versa.
  67. // 1 - No cc field.
  68. // 2 - Always set CPSR.
  69. unsigned PredCC2 : 2;
  70. unsigned PartFlag : 1; // 16-bit instruction does partial flag update
  71. unsigned Special : 1; // Needs to be dealt with specially
  72. unsigned AvoidMovs: 1; // Avoid movs with shifter operand (for Swift)
  73. };
  74. static const ReduceEntry ReduceTable[] = {
  75. // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C,PF,S,AM
  76. { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0,0 },
  77. { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,1,0 },
  78. { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0,0 },
  79. { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1,0 },
  80. { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
  81. { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0,0 },
  82. { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
  83. { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
  84. { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0,0 },
  85. //FIXME: Disable CMN, as CCodes are backwards from compare expectations
  86. //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
  87. { ARM::t2CMNzrr, ARM::tCMNz, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
  88. { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0,0 },
  89. { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1,0 },
  90. { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0,0 },
  91. // FIXME: adr.n immediate offset must be multiple of 4.
  92. //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
  93. { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
  94. { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0,1 },
  95. { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0,1 },
  96. { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0,1 },
  97. { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,0,0 },
  98. { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1,1,0 },
  99. // FIXME: Do we need the 16-bit 'S' variant?
  100. { ARM::t2MOVr,ARM::tMOVr, 0, 0, 0, 0, 0, 1,0, 0,0,0 },
  101. { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0,0 },
  102. { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
  103. { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0,0 },
  104. { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
  105. { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
  106. { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0,0 },
  107. { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0,0 },
  108. { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
  109. { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
  110. { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0,0 },
  111. { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0,0 },
  112. { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0,0 },
  113. { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0,0 },
  114. { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
  115. { ARM::t2SXTB, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
  116. { ARM::t2SXTH, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
  117. { ARM::t2TEQrr, ARM::tEOR, 0, 0, 0, 1, 0, 2,0, 0,1,0 },
  118. { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0,0 },
  119. { ARM::t2UXTB, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
  120. { ARM::t2UXTH, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,1,0 },
  121. // FIXME: Clean this up after splitting each Thumb load / store opcode
  122. // into multiple ones.
  123. { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
  124. { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
  125. { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
  126. { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
  127. { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
  128. { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
  129. { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
  130. { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
  131. { ARM::t2LDR_POST,ARM::tLDMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
  132. { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1,0 },
  133. { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
  134. { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
  135. { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
  136. { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1,0 },
  137. { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1,0 },
  138. { ARM::t2STR_POST,ARM::tSTMIA_UPD,0, 0, 0, 1, 0, 0,0, 0,1,0 },
  139. { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
  140. { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1,0 },
  141. { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1,0 },
  142. // ARM::t2STMIA (with no basereg writeback) has no Thumb1 equivalent.
  143. // tSTMIA_UPD is a change in semantics which can only be used if the base
  144. // register is killed. This difference is correctly handled elsewhere.
  145. { ARM::t2STMIA, ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
  146. { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1,0 },
  147. { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1,0 }
  148. };
  149. class Thumb2SizeReduce : public MachineFunctionPass {
  150. public:
  151. static char ID;
  152. const Thumb2InstrInfo *TII;
  153. const ARMSubtarget *STI;
  154. Thumb2SizeReduce(std::function<bool(const Function &)> Ftor = nullptr);
  155. bool runOnMachineFunction(MachineFunction &MF) override;
  156. MachineFunctionProperties getRequiredProperties() const override {
  157. return MachineFunctionProperties().set(
  158. MachineFunctionProperties::Property::NoVRegs);
  159. }
  160. StringRef getPassName() const override {
  161. return THUMB2_SIZE_REDUCE_NAME;
  162. }
  163. private:
  164. /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable.
  165. DenseMap<unsigned, unsigned> ReduceOpcodeMap;
  166. bool canAddPseudoFlagDep(MachineInstr *Use, bool IsSelfLoop);
  167. bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
  168. bool is2Addr, ARMCC::CondCodes Pred,
  169. bool LiveCPSR, bool &HasCC, bool &CCDead);
  170. bool ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
  171. const ReduceEntry &Entry);
  172. bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
  173. const ReduceEntry &Entry, bool LiveCPSR, bool IsSelfLoop);
  174. /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address
  175. /// instruction.
  176. bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
  177. const ReduceEntry &Entry, bool LiveCPSR,
  178. bool IsSelfLoop);
  179. /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit
  180. /// non-two-address instruction.
  181. bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
  182. const ReduceEntry &Entry, bool LiveCPSR,
  183. bool IsSelfLoop);
  184. /// ReduceMI - Attempt to reduce MI, return true on success.
  185. bool ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI, bool LiveCPSR,
  186. bool IsSelfLoop, bool SkipPrologueEpilogue);
  187. /// ReduceMBB - Reduce width of instructions in the specified basic block.
  188. bool ReduceMBB(MachineBasicBlock &MBB, bool SkipPrologueEpilogue);
  189. bool OptimizeSize;
  190. bool MinimizeSize;
  191. // Last instruction to define CPSR in the current block.
  192. MachineInstr *CPSRDef;
  193. // Was CPSR last defined by a high latency instruction?
  194. // When CPSRDef is null, this refers to CPSR defs in predecessors.
  195. bool HighLatencyCPSR;
  196. struct MBBInfo {
  197. // The flags leaving this block have high latency.
  198. bool HighLatencyCPSR = false;
  199. // Has this block been visited yet?
  200. bool Visited = false;
  201. MBBInfo() = default;
  202. };
  203. SmallVector<MBBInfo, 8> BlockInfo;
  204. std::function<bool(const Function &)> PredicateFtor;
  205. };
  206. char Thumb2SizeReduce::ID = 0;
  207. } // end anonymous namespace
  208. INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false,
  209. false)
  210. Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
  211. : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
  212. OptimizeSize = MinimizeSize = false;
  213. for (unsigned i = 0, e = std::size(ReduceTable); i != e; ++i) {
  214. unsigned FromOpc = ReduceTable[i].WideOpc;
  215. if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
  216. llvm_unreachable("Duplicated entries?");
  217. }
  218. }
  219. static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
  220. return is_contained(MCID.implicit_defs(), ARM::CPSR);
  221. }
  222. // Check for a likely high-latency flag def.
  223. static bool isHighLatencyCPSR(MachineInstr *Def) {
  224. switch(Def->getOpcode()) {
  225. case ARM::FMSTAT:
  226. case ARM::tMUL:
  227. return true;
  228. }
  229. return false;
  230. }
  231. /// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations,
  232. /// the 's' 16-bit instruction partially update CPSR. Abort the
  233. /// transformation to avoid adding false dependency on last CPSR setting
  234. /// instruction which hurts the ability for out-of-order execution engine
  235. /// to do register renaming magic.
  236. /// This function checks if there is a read-of-write dependency between the
  237. /// last instruction that defines the CPSR and the current instruction. If there
  238. /// is, then there is no harm done since the instruction cannot be retired
  239. /// before the CPSR setting instruction anyway.
  240. /// Note, we are not doing full dependency analysis here for the sake of compile
  241. /// time. We're not looking for cases like:
  242. /// r0 = muls ...
  243. /// r1 = add.w r0, ...
  244. /// ...
  245. /// = mul.w r1
  246. /// In this case it would have been ok to narrow the mul.w to muls since there
  247. /// are indirect RAW dependency between the muls and the mul.w
  248. bool
  249. Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Use, bool FirstInSelfLoop) {
  250. // Disable the check for -Oz (aka OptimizeForSizeHarder).
  251. if (MinimizeSize || !STI->avoidCPSRPartialUpdate())
  252. return false;
  253. if (!CPSRDef)
  254. // If this BB loops back to itself, conservatively avoid narrowing the
  255. // first instruction that does partial flag update.
  256. return HighLatencyCPSR || FirstInSelfLoop;
  257. SmallSet<unsigned, 2> Defs;
  258. for (const MachineOperand &MO : CPSRDef->operands()) {
  259. if (!MO.isReg() || MO.isUndef() || MO.isUse())
  260. continue;
  261. Register Reg = MO.getReg();
  262. if (Reg == 0 || Reg == ARM::CPSR)
  263. continue;
  264. Defs.insert(Reg);
  265. }
  266. for (const MachineOperand &MO : Use->operands()) {
  267. if (!MO.isReg() || MO.isUndef() || MO.isDef())
  268. continue;
  269. Register Reg = MO.getReg();
  270. if (Defs.count(Reg))
  271. return false;
  272. }
  273. // If the current CPSR has high latency, try to avoid the false dependency.
  274. if (HighLatencyCPSR)
  275. return true;
  276. // tMOVi8 usually doesn't start long dependency chains, and there are a lot
  277. // of them, so always shrink them when CPSR doesn't have high latency.
  278. if (Use->getOpcode() == ARM::t2MOVi ||
  279. Use->getOpcode() == ARM::t2MOVi16)
  280. return false;
  281. // No read-after-write dependency. The narrowing will add false dependency.
  282. return true;
  283. }
  284. bool
  285. Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry,
  286. bool is2Addr, ARMCC::CondCodes Pred,
  287. bool LiveCPSR, bool &HasCC, bool &CCDead) {
  288. if ((is2Addr && Entry.PredCC2 == 0) ||
  289. (!is2Addr && Entry.PredCC1 == 0)) {
  290. if (Pred == ARMCC::AL) {
  291. // Not predicated, must set CPSR.
  292. if (!HasCC) {
  293. // Original instruction was not setting CPSR, but CPSR is not
  294. // currently live anyway. It's ok to set it. The CPSR def is
  295. // dead though.
  296. if (!LiveCPSR) {
  297. HasCC = true;
  298. CCDead = true;
  299. return true;
  300. }
  301. return false;
  302. }
  303. } else {
  304. // Predicated, must not set CPSR.
  305. if (HasCC)
  306. return false;
  307. }
  308. } else if ((is2Addr && Entry.PredCC2 == 2) ||
  309. (!is2Addr && Entry.PredCC1 == 2)) {
  310. /// Old opcode has an optional def of CPSR.
  311. if (HasCC)
  312. return true;
  313. // If old opcode does not implicitly define CPSR, then it's not ok since
  314. // these new opcodes' CPSR def is not meant to be thrown away. e.g. CMP.
  315. if (!HasImplicitCPSRDef(MI->getDesc()))
  316. return false;
  317. HasCC = true;
  318. } else {
  319. // 16-bit instruction does not set CPSR.
  320. if (HasCC)
  321. return false;
  322. }
  323. return true;
  324. }
  325. static bool VerifyLowRegs(MachineInstr *MI) {
  326. unsigned Opc = MI->getOpcode();
  327. bool isPCOk = (Opc == ARM::t2LDMIA_RET || Opc == ARM::t2LDMIA_UPD);
  328. bool isLROk = (Opc == ARM::t2STMDB_UPD);
  329. bool isSPOk = isPCOk || isLROk;
  330. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  331. const MachineOperand &MO = MI->getOperand(i);
  332. if (!MO.isReg() || MO.isImplicit())
  333. continue;
  334. Register Reg = MO.getReg();
  335. if (Reg == 0 || Reg == ARM::CPSR)
  336. continue;
  337. if (isPCOk && Reg == ARM::PC)
  338. continue;
  339. if (isLROk && Reg == ARM::LR)
  340. continue;
  341. if (Reg == ARM::SP) {
  342. if (isSPOk)
  343. continue;
  344. if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12))
  345. // Special case for these ldr / str with sp as base register.
  346. continue;
  347. }
  348. if (!isARMLowRegister(Reg))
  349. return false;
  350. }
  351. return true;
  352. }
  353. bool
  354. Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
  355. const ReduceEntry &Entry) {
  356. if (ReduceLimitLdSt != -1 && ((int)NumLdSts >= ReduceLimitLdSt))
  357. return false;
  358. unsigned Scale = 1;
  359. bool HasImmOffset = false;
  360. bool HasShift = false;
  361. bool HasOffReg = true;
  362. bool isLdStMul = false;
  363. unsigned Opc = Entry.NarrowOpc1;
  364. unsigned OpNum = 3; // First 'rest' of operands.
  365. uint8_t ImmLimit = Entry.Imm1Limit;
  366. switch (Entry.WideOpc) {
  367. default:
  368. llvm_unreachable("Unexpected Thumb2 load / store opcode!");
  369. case ARM::t2LDRi12:
  370. case ARM::t2STRi12:
  371. if (MI->getOperand(1).getReg() == ARM::SP) {
  372. Opc = Entry.NarrowOpc2;
  373. ImmLimit = Entry.Imm2Limit;
  374. }
  375. Scale = 4;
  376. HasImmOffset = true;
  377. HasOffReg = false;
  378. break;
  379. case ARM::t2LDRBi12:
  380. case ARM::t2STRBi12:
  381. HasImmOffset = true;
  382. HasOffReg = false;
  383. break;
  384. case ARM::t2LDRHi12:
  385. case ARM::t2STRHi12:
  386. Scale = 2;
  387. HasImmOffset = true;
  388. HasOffReg = false;
  389. break;
  390. case ARM::t2LDRs:
  391. case ARM::t2LDRBs:
  392. case ARM::t2LDRHs:
  393. case ARM::t2LDRSBs:
  394. case ARM::t2LDRSHs:
  395. case ARM::t2STRs:
  396. case ARM::t2STRBs:
  397. case ARM::t2STRHs:
  398. HasShift = true;
  399. OpNum = 4;
  400. break;
  401. case ARM::t2LDR_POST:
  402. case ARM::t2STR_POST: {
  403. if (!MinimizeSize)
  404. return false;
  405. if (!MI->hasOneMemOperand() ||
  406. (*MI->memoperands_begin())->getAlign() < Align(4))
  407. return false;
  408. // We're creating a completely different type of load/store - LDM from LDR.
  409. // For this reason we can't reuse the logic at the end of this function; we
  410. // have to implement the MI building here.
  411. bool IsStore = Entry.WideOpc == ARM::t2STR_POST;
  412. Register Rt = MI->getOperand(IsStore ? 1 : 0).getReg();
  413. Register Rn = MI->getOperand(IsStore ? 0 : 1).getReg();
  414. unsigned Offset = MI->getOperand(3).getImm();
  415. unsigned PredImm = MI->getOperand(4).getImm();
  416. Register PredReg = MI->getOperand(5).getReg();
  417. assert(isARMLowRegister(Rt));
  418. assert(isARMLowRegister(Rn));
  419. if (Offset != 4)
  420. return false;
  421. // Add the 16-bit load / store instruction.
  422. DebugLoc dl = MI->getDebugLoc();
  423. auto MIB = BuildMI(MBB, MI, dl, TII->get(Entry.NarrowOpc1))
  424. .addReg(Rn, RegState::Define)
  425. .addReg(Rn)
  426. .addImm(PredImm)
  427. .addReg(PredReg)
  428. .addReg(Rt, IsStore ? 0 : RegState::Define);
  429. // Transfer memoperands.
  430. MIB.setMemRefs(MI->memoperands());
  431. // Transfer MI flags.
  432. MIB.setMIFlags(MI->getFlags());
  433. // Kill the old instruction.
  434. MI->eraseFromBundle();
  435. ++NumLdSts;
  436. return true;
  437. }
  438. case ARM::t2LDMIA: {
  439. Register BaseReg = MI->getOperand(0).getReg();
  440. assert(isARMLowRegister(BaseReg));
  441. // For the non-writeback version (this one), the base register must be
  442. // one of the registers being loaded.
  443. bool isOK = false;
  444. for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3)) {
  445. if (MO.getReg() == BaseReg) {
  446. isOK = true;
  447. break;
  448. }
  449. }
  450. if (!isOK)
  451. return false;
  452. OpNum = 0;
  453. isLdStMul = true;
  454. break;
  455. }
  456. case ARM::t2STMIA: {
  457. // t2STMIA is reduced to tSTMIA_UPD which has writeback. We can only do this
  458. // if the base register is killed, as then it doesn't matter what its value
  459. // is after the instruction.
  460. if (!MI->getOperand(0).isKill())
  461. return false;
  462. // If the base register is in the register list and isn't the lowest
  463. // numbered register (i.e. it's in operand 4 onwards) then with writeback
  464. // the stored value is unknown, so we can't convert to tSTMIA_UPD.
  465. Register BaseReg = MI->getOperand(0).getReg();
  466. for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 4))
  467. if (MO.getReg() == BaseReg)
  468. return false;
  469. break;
  470. }
  471. case ARM::t2LDMIA_RET: {
  472. Register BaseReg = MI->getOperand(1).getReg();
  473. if (BaseReg != ARM::SP)
  474. return false;
  475. Opc = Entry.NarrowOpc2; // tPOP_RET
  476. OpNum = 2;
  477. isLdStMul = true;
  478. break;
  479. }
  480. case ARM::t2LDMIA_UPD:
  481. case ARM::t2STMIA_UPD:
  482. case ARM::t2STMDB_UPD: {
  483. OpNum = 0;
  484. Register BaseReg = MI->getOperand(1).getReg();
  485. if (BaseReg == ARM::SP &&
  486. (Entry.WideOpc == ARM::t2LDMIA_UPD ||
  487. Entry.WideOpc == ARM::t2STMDB_UPD)) {
  488. Opc = Entry.NarrowOpc2; // tPOP or tPUSH
  489. OpNum = 2;
  490. } else if (!isARMLowRegister(BaseReg) ||
  491. (Entry.WideOpc != ARM::t2LDMIA_UPD &&
  492. Entry.WideOpc != ARM::t2STMIA_UPD)) {
  493. return false;
  494. }
  495. isLdStMul = true;
  496. break;
  497. }
  498. }
  499. unsigned OffsetReg = 0;
  500. bool OffsetKill = false;
  501. bool OffsetInternal = false;
  502. if (HasShift) {
  503. OffsetReg = MI->getOperand(2).getReg();
  504. OffsetKill = MI->getOperand(2).isKill();
  505. OffsetInternal = MI->getOperand(2).isInternalRead();
  506. if (MI->getOperand(3).getImm())
  507. // Thumb1 addressing mode doesn't support shift.
  508. return false;
  509. }
  510. unsigned OffsetImm = 0;
  511. if (HasImmOffset) {
  512. OffsetImm = MI->getOperand(2).getImm();
  513. unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale;
  514. if ((OffsetImm & (Scale - 1)) || OffsetImm > MaxOffset)
  515. // Make sure the immediate field fits.
  516. return false;
  517. }
  518. // Add the 16-bit load / store instruction.
  519. DebugLoc dl = MI->getDebugLoc();
  520. MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, TII->get(Opc));
  521. // tSTMIA_UPD takes a defining register operand. We've already checked that
  522. // the register is killed, so mark it as dead here.
  523. if (Entry.WideOpc == ARM::t2STMIA)
  524. MIB.addReg(MI->getOperand(0).getReg(), RegState::Define | RegState::Dead);
  525. if (!isLdStMul) {
  526. MIB.add(MI->getOperand(0));
  527. MIB.add(MI->getOperand(1));
  528. if (HasImmOffset)
  529. MIB.addImm(OffsetImm / Scale);
  530. assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!");
  531. if (HasOffReg)
  532. MIB.addReg(OffsetReg, getKillRegState(OffsetKill) |
  533. getInternalReadRegState(OffsetInternal));
  534. }
  535. // Transfer the rest of operands.
  536. for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), OpNum))
  537. MIB.add(MO);
  538. // Transfer memoperands.
  539. MIB.setMemRefs(MI->memoperands());
  540. // Transfer MI flags.
  541. MIB.setMIFlags(MI->getFlags());
  542. LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
  543. << " to 16-bit: " << *MIB);
  544. MBB.erase_instr(MI);
  545. ++NumLdSts;
  546. return true;
  547. }
  548. bool
  549. Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
  550. const ReduceEntry &Entry,
  551. bool LiveCPSR, bool IsSelfLoop) {
  552. unsigned Opc = MI->getOpcode();
  553. if (Opc == ARM::t2ADDri) {
  554. // If the source register is SP, try to reduce to tADDrSPi, otherwise
  555. // it's a normal reduce.
  556. if (MI->getOperand(1).getReg() != ARM::SP) {
  557. if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
  558. return true;
  559. return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
  560. }
  561. // Try to reduce to tADDrSPi.
  562. unsigned Imm = MI->getOperand(2).getImm();
  563. // The immediate must be in range, the destination register must be a low
  564. // reg, the predicate must be "always" and the condition flags must not
  565. // be being set.
  566. if (Imm & 3 || Imm > 1020)
  567. return false;
  568. if (!isARMLowRegister(MI->getOperand(0).getReg()))
  569. return false;
  570. if (MI->getOperand(3).getImm() != ARMCC::AL)
  571. return false;
  572. const MCInstrDesc &MCID = MI->getDesc();
  573. if (MCID.hasOptionalDef() &&
  574. MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
  575. return false;
  576. MachineInstrBuilder MIB =
  577. BuildMI(MBB, MI, MI->getDebugLoc(),
  578. TII->get(ARM::tADDrSPi))
  579. .add(MI->getOperand(0))
  580. .add(MI->getOperand(1))
  581. .addImm(Imm / 4) // The tADDrSPi has an implied scale by four.
  582. .add(predOps(ARMCC::AL));
  583. // Transfer MI flags.
  584. MIB.setMIFlags(MI->getFlags());
  585. LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
  586. << " to 16-bit: " << *MIB);
  587. MBB.erase_instr(MI);
  588. ++NumNarrows;
  589. return true;
  590. }
  591. if (Entry.LowRegs1 && !VerifyLowRegs(MI))
  592. return false;
  593. if (MI->mayLoadOrStore())
  594. return ReduceLoadStore(MBB, MI, Entry);
  595. switch (Opc) {
  596. default: break;
  597. case ARM::t2ADDSri:
  598. case ARM::t2ADDSrr: {
  599. Register PredReg;
  600. if (getInstrPredicate(*MI, PredReg) == ARMCC::AL) {
  601. switch (Opc) {
  602. default: break;
  603. case ARM::t2ADDSri:
  604. if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
  605. return true;
  606. [[fallthrough]];
  607. case ARM::t2ADDSrr:
  608. return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
  609. }
  610. }
  611. break;
  612. }
  613. case ARM::t2RSBri:
  614. case ARM::t2RSBSri:
  615. case ARM::t2SXTB:
  616. case ARM::t2SXTH:
  617. case ARM::t2UXTB:
  618. case ARM::t2UXTH:
  619. if (MI->getOperand(2).getImm() == 0)
  620. return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
  621. break;
  622. case ARM::t2MOVi16:
  623. // Can convert only 'pure' immediate operands, not immediates obtained as
  624. // globals' addresses.
  625. if (MI->getOperand(1).isImm())
  626. return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
  627. break;
  628. case ARM::t2CMPrr: {
  629. // Try to reduce to the lo-reg only version first. Why there are two
  630. // versions of the instruction is a mystery.
  631. // It would be nice to just have two entries in the main table that
  632. // are prioritized, but the table assumes a unique entry for each
  633. // source insn opcode. So for now, we hack a local entry record to use.
  634. static const ReduceEntry NarrowEntry =
  635. { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1,0 };
  636. if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, IsSelfLoop))
  637. return true;
  638. return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
  639. }
  640. case ARM::t2TEQrr: {
  641. Register PredReg;
  642. // Can only convert to eors if we're not in an IT block.
  643. if (getInstrPredicate(*MI, PredReg) != ARMCC::AL)
  644. break;
  645. // TODO if Operand 0 is not killed but Operand 1 is, then we could write
  646. // to Op1 instead.
  647. if (MI->getOperand(0).isKill())
  648. return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
  649. }
  650. }
  651. return false;
  652. }
  653. bool
  654. Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
  655. const ReduceEntry &Entry,
  656. bool LiveCPSR, bool IsSelfLoop) {
  657. if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr))
  658. return false;
  659. if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
  660. // Don't issue movs with shifter operand for some CPUs unless we
  661. // are optimizing for size.
  662. return false;
  663. Register Reg0 = MI->getOperand(0).getReg();
  664. Register Reg1 = MI->getOperand(1).getReg();
  665. // t2MUL is "special". The tied source operand is second, not first.
  666. if (MI->getOpcode() == ARM::t2MUL) {
  667. Register Reg2 = MI->getOperand(2).getReg();
  668. // Early exit if the regs aren't all low regs.
  669. if (!isARMLowRegister(Reg0) || !isARMLowRegister(Reg1)
  670. || !isARMLowRegister(Reg2))
  671. return false;
  672. if (Reg0 != Reg2) {
  673. // If the other operand also isn't the same as the destination, we
  674. // can't reduce.
  675. if (Reg1 != Reg0)
  676. return false;
  677. // Try to commute the operands to make it a 2-address instruction.
  678. MachineInstr *CommutedMI = TII->commuteInstruction(*MI);
  679. if (!CommutedMI)
  680. return false;
  681. }
  682. } else if (Reg0 != Reg1) {
  683. // Try to commute the operands to make it a 2-address instruction.
  684. unsigned CommOpIdx1 = 1;
  685. unsigned CommOpIdx2 = TargetInstrInfo::CommuteAnyOperandIndex;
  686. if (!TII->findCommutedOpIndices(*MI, CommOpIdx1, CommOpIdx2) ||
  687. MI->getOperand(CommOpIdx2).getReg() != Reg0)
  688. return false;
  689. MachineInstr *CommutedMI =
  690. TII->commuteInstruction(*MI, false, CommOpIdx1, CommOpIdx2);
  691. if (!CommutedMI)
  692. return false;
  693. }
  694. if (Entry.LowRegs2 && !isARMLowRegister(Reg0))
  695. return false;
  696. if (Entry.Imm2Limit) {
  697. unsigned Imm = MI->getOperand(2).getImm();
  698. unsigned Limit = (1 << Entry.Imm2Limit) - 1;
  699. if (Imm > Limit)
  700. return false;
  701. } else {
  702. Register Reg2 = MI->getOperand(2).getReg();
  703. if (Entry.LowRegs2 && !isARMLowRegister(Reg2))
  704. return false;
  705. }
  706. // Check if it's possible / necessary to transfer the predicate.
  707. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
  708. Register PredReg;
  709. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  710. bool SkipPred = false;
  711. if (Pred != ARMCC::AL) {
  712. if (!NewMCID.isPredicable())
  713. // Can't transfer predicate, fail.
  714. return false;
  715. } else {
  716. SkipPred = !NewMCID.isPredicable();
  717. }
  718. bool HasCC = false;
  719. bool CCDead = false;
  720. const MCInstrDesc &MCID = MI->getDesc();
  721. if (MCID.hasOptionalDef()) {
  722. unsigned NumOps = MCID.getNumOperands();
  723. HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
  724. if (HasCC && MI->getOperand(NumOps-1).isDead())
  725. CCDead = true;
  726. }
  727. if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead))
  728. return false;
  729. // Avoid adding a false dependency on partial flag update by some 16-bit
  730. // instructions which has the 's' bit set.
  731. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
  732. canAddPseudoFlagDep(MI, IsSelfLoop))
  733. return false;
  734. // Add the 16-bit instruction.
  735. DebugLoc dl = MI->getDebugLoc();
  736. MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
  737. MIB.add(MI->getOperand(0));
  738. if (NewMCID.hasOptionalDef())
  739. MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
  740. // Transfer the rest of operands.
  741. unsigned NumOps = MCID.getNumOperands();
  742. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
  743. if (i < NumOps && MCID.operands()[i].isOptionalDef())
  744. continue;
  745. if (SkipPred && MCID.operands()[i].isPredicate())
  746. continue;
  747. MIB.add(MI->getOperand(i));
  748. }
  749. // Transfer MI flags.
  750. MIB.setMIFlags(MI->getFlags());
  751. LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
  752. << " to 16-bit: " << *MIB);
  753. MBB.erase_instr(MI);
  754. ++Num2Addrs;
  755. return true;
  756. }
  757. bool
  758. Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
  759. const ReduceEntry &Entry,
  760. bool LiveCPSR, bool IsSelfLoop) {
  761. if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit))
  762. return false;
  763. if (!OptimizeSize && Entry.AvoidMovs && STI->avoidMOVsShifterOperand())
  764. // Don't issue movs with shifter operand for some CPUs unless we
  765. // are optimizing for size.
  766. return false;
  767. unsigned Limit = ~0U;
  768. if (Entry.Imm1Limit)
  769. Limit = (1 << Entry.Imm1Limit) - 1;
  770. const MCInstrDesc &MCID = MI->getDesc();
  771. for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
  772. if (MCID.operands()[i].isPredicate())
  773. continue;
  774. const MachineOperand &MO = MI->getOperand(i);
  775. if (MO.isReg()) {
  776. Register Reg = MO.getReg();
  777. if (!Reg || Reg == ARM::CPSR)
  778. continue;
  779. if (Entry.LowRegs1 && !isARMLowRegister(Reg))
  780. return false;
  781. } else if (MO.isImm() && !MCID.operands()[i].isPredicate()) {
  782. if (((unsigned)MO.getImm()) > Limit)
  783. return false;
  784. }
  785. }
  786. // Check if it's possible / necessary to transfer the predicate.
  787. const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
  788. Register PredReg;
  789. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  790. bool SkipPred = false;
  791. if (Pred != ARMCC::AL) {
  792. if (!NewMCID.isPredicable())
  793. // Can't transfer predicate, fail.
  794. return false;
  795. } else {
  796. SkipPred = !NewMCID.isPredicable();
  797. }
  798. bool HasCC = false;
  799. bool CCDead = false;
  800. if (MCID.hasOptionalDef()) {
  801. unsigned NumOps = MCID.getNumOperands();
  802. HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
  803. if (HasCC && MI->getOperand(NumOps-1).isDead())
  804. CCDead = true;
  805. }
  806. if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead))
  807. return false;
  808. // Avoid adding a false dependency on partial flag update by some 16-bit
  809. // instructions which has the 's' bit set.
  810. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
  811. canAddPseudoFlagDep(MI, IsSelfLoop))
  812. return false;
  813. // Add the 16-bit instruction.
  814. DebugLoc dl = MI->getDebugLoc();
  815. MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
  816. // TEQ is special in that it doesn't define a register but we're converting
  817. // it into an EOR which does. So add the first operand as a def and then
  818. // again as a use.
  819. if (MCID.getOpcode() == ARM::t2TEQrr) {
  820. MIB.add(MI->getOperand(0));
  821. MIB->getOperand(0).setIsKill(false);
  822. MIB->getOperand(0).setIsDef(true);
  823. MIB->getOperand(0).setIsDead(true);
  824. if (NewMCID.hasOptionalDef())
  825. MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
  826. MIB.add(MI->getOperand(0));
  827. } else {
  828. MIB.add(MI->getOperand(0));
  829. if (NewMCID.hasOptionalDef())
  830. MIB.add(HasCC ? t1CondCodeOp(CCDead) : condCodeOp());
  831. }
  832. // Transfer the rest of operands.
  833. unsigned NumOps = MCID.getNumOperands();
  834. for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
  835. if (i < NumOps && MCID.operands()[i].isOptionalDef())
  836. continue;
  837. if ((MCID.getOpcode() == ARM::t2RSBSri ||
  838. MCID.getOpcode() == ARM::t2RSBri ||
  839. MCID.getOpcode() == ARM::t2SXTB ||
  840. MCID.getOpcode() == ARM::t2SXTH ||
  841. MCID.getOpcode() == ARM::t2UXTB ||
  842. MCID.getOpcode() == ARM::t2UXTH) && i == 2)
  843. // Skip the zero immediate operand, it's now implicit.
  844. continue;
  845. bool isPred = (i < NumOps && MCID.operands()[i].isPredicate());
  846. if (SkipPred && isPred)
  847. continue;
  848. const MachineOperand &MO = MI->getOperand(i);
  849. if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
  850. // Skip implicit def of CPSR. Either it's modeled as an optional
  851. // def now or it's already an implicit def on the new instruction.
  852. continue;
  853. MIB.add(MO);
  854. }
  855. if (!MCID.isPredicable() && NewMCID.isPredicable())
  856. MIB.add(predOps(ARMCC::AL));
  857. // Transfer MI flags.
  858. MIB.setMIFlags(MI->getFlags());
  859. LLVM_DEBUG(dbgs() << "Converted 32-bit: " << *MI
  860. << " to 16-bit: " << *MIB);
  861. MBB.erase_instr(MI);
  862. ++NumNarrows;
  863. return true;
  864. }
  865. static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) {
  866. bool HasDef = false;
  867. for (const MachineOperand &MO : MI.operands()) {
  868. if (!MO.isReg() || MO.isUndef() || MO.isUse())
  869. continue;
  870. if (MO.getReg() != ARM::CPSR)
  871. continue;
  872. DefCPSR = true;
  873. if (!MO.isDead())
  874. HasDef = true;
  875. }
  876. return HasDef || LiveCPSR;
  877. }
  878. static bool UpdateCPSRUse(MachineInstr &MI, bool LiveCPSR) {
  879. for (const MachineOperand &MO : MI.operands()) {
  880. if (!MO.isReg() || MO.isUndef() || MO.isDef())
  881. continue;
  882. if (MO.getReg() != ARM::CPSR)
  883. continue;
  884. assert(LiveCPSR && "CPSR liveness tracking is wrong!");
  885. if (MO.isKill()) {
  886. LiveCPSR = false;
  887. break;
  888. }
  889. }
  890. return LiveCPSR;
  891. }
  892. bool Thumb2SizeReduce::ReduceMI(MachineBasicBlock &MBB, MachineInstr *MI,
  893. bool LiveCPSR, bool IsSelfLoop,
  894. bool SkipPrologueEpilogue) {
  895. unsigned Opcode = MI->getOpcode();
  896. DenseMap<unsigned, unsigned>::iterator OPI = ReduceOpcodeMap.find(Opcode);
  897. if (OPI == ReduceOpcodeMap.end())
  898. return false;
  899. if (SkipPrologueEpilogue && (MI->getFlag(MachineInstr::FrameSetup) ||
  900. MI->getFlag(MachineInstr::FrameDestroy)))
  901. return false;
  902. const ReduceEntry &Entry = ReduceTable[OPI->second];
  903. // Don't attempt normal reductions on "special" cases for now.
  904. if (Entry.Special)
  905. return ReduceSpecial(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
  906. // Try to transform to a 16-bit two-address instruction.
  907. if (Entry.NarrowOpc2 &&
  908. ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
  909. return true;
  910. // Try to transform to a 16-bit non-two-address instruction.
  911. if (Entry.NarrowOpc1 &&
  912. ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
  913. return true;
  914. return false;
  915. }
  916. bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB,
  917. bool SkipPrologueEpilogue) {
  918. bool Modified = false;
  919. // Yes, CPSR could be livein.
  920. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR);
  921. MachineInstr *BundleMI = nullptr;
  922. CPSRDef = nullptr;
  923. HighLatencyCPSR = false;
  924. // Check predecessors for the latest CPSRDef.
  925. for (auto *Pred : MBB.predecessors()) {
  926. const MBBInfo &PInfo = BlockInfo[Pred->getNumber()];
  927. if (!PInfo.Visited) {
  928. // Since blocks are visited in RPO, this must be a back-edge.
  929. continue;
  930. }
  931. if (PInfo.HighLatencyCPSR) {
  932. HighLatencyCPSR = true;
  933. break;
  934. }
  935. }
  936. // If this BB loops back to itself, conservatively avoid narrowing the
  937. // first instruction that does partial flag update.
  938. bool IsSelfLoop = MBB.isSuccessor(&MBB);
  939. MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),E = MBB.instr_end();
  940. MachineBasicBlock::instr_iterator NextMII;
  941. for (; MII != E; MII = NextMII) {
  942. NextMII = std::next(MII);
  943. MachineInstr *MI = &*MII;
  944. if (MI->isBundle()) {
  945. BundleMI = MI;
  946. continue;
  947. }
  948. if (MI->isDebugInstr())
  949. continue;
  950. LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
  951. // Does NextMII belong to the same bundle as MI?
  952. bool NextInSameBundle = NextMII != E && NextMII->isBundledWithPred();
  953. if (ReduceMI(MBB, MI, LiveCPSR, IsSelfLoop, SkipPrologueEpilogue)) {
  954. Modified = true;
  955. MachineBasicBlock::instr_iterator I = std::prev(NextMII);
  956. MI = &*I;
  957. // Removing and reinserting the first instruction in a bundle will break
  958. // up the bundle. Fix the bundling if it was broken.
  959. if (NextInSameBundle && !NextMII->isBundledWithPred())
  960. NextMII->bundleWithPred();
  961. }
  962. if (BundleMI && !NextInSameBundle && MI->isInsideBundle()) {
  963. // FIXME: Since post-ra scheduler operates on bundles, the CPSR kill
  964. // marker is only on the BUNDLE instruction. Process the BUNDLE
  965. // instruction as we finish with the bundled instruction to work around
  966. // the inconsistency.
  967. if (BundleMI->killsRegister(ARM::CPSR))
  968. LiveCPSR = false;
  969. MachineOperand *MO = BundleMI->findRegisterDefOperand(ARM::CPSR);
  970. if (MO && !MO->isDead())
  971. LiveCPSR = true;
  972. MO = BundleMI->findRegisterUseOperand(ARM::CPSR);
  973. if (MO && !MO->isKill())
  974. LiveCPSR = true;
  975. }
  976. bool DefCPSR = false;
  977. LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR);
  978. if (MI->isCall()) {
  979. // Calls don't really set CPSR.
  980. CPSRDef = nullptr;
  981. HighLatencyCPSR = false;
  982. IsSelfLoop = false;
  983. } else if (DefCPSR) {
  984. // This is the last CPSR defining instruction.
  985. CPSRDef = MI;
  986. HighLatencyCPSR = isHighLatencyCPSR(CPSRDef);
  987. IsSelfLoop = false;
  988. }
  989. }
  990. MBBInfo &Info = BlockInfo[MBB.getNumber()];
  991. Info.HighLatencyCPSR = HighLatencyCPSR;
  992. Info.Visited = true;
  993. return Modified;
  994. }
  995. bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) {
  996. if (PredicateFtor && !PredicateFtor(MF.getFunction()))
  997. return false;
  998. STI = &MF.getSubtarget<ARMSubtarget>();
  999. if (STI->isThumb1Only() || STI->prefers32BitThumb())
  1000. return false;
  1001. TII = static_cast<const Thumb2InstrInfo *>(STI->getInstrInfo());
  1002. // Optimizing / minimizing size? Minimizing size implies optimizing for size.
  1003. OptimizeSize = MF.getFunction().hasOptSize();
  1004. MinimizeSize = STI->hasMinSize();
  1005. BlockInfo.clear();
  1006. BlockInfo.resize(MF.getNumBlockIDs());
  1007. // Visit blocks in reverse post-order so LastCPSRDef is known for all
  1008. // predecessors.
  1009. ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
  1010. bool Modified = false;
  1011. bool NeedsWinCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
  1012. MF.getFunction().needsUnwindTableEntry();
  1013. for (MachineBasicBlock *MBB : RPOT)
  1014. Modified |= ReduceMBB(*MBB, /*SkipPrologueEpilogue=*/NeedsWinCFI);
  1015. return Modified;
  1016. }
  1017. /// createThumb2SizeReductionPass - Returns an instance of the Thumb2 size
  1018. /// reduction pass.
  1019. FunctionPass *llvm::createThumb2SizeReductionPass(
  1020. std::function<bool(const Function &)> Ftor) {
  1021. return new Thumb2SizeReduce(std::move(Ftor));
  1022. }