ARMLoadStoreOptimizer.cpp 104 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990
  1. //===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file This file contains a pass that performs load / store related peephole
  10. /// optimizations. This pass should be run after register allocation.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "ARM.h"
  14. #include "ARMBaseInstrInfo.h"
  15. #include "ARMBaseRegisterInfo.h"
  16. #include "ARMISelLowering.h"
  17. #include "ARMMachineFunctionInfo.h"
  18. #include "ARMSubtarget.h"
  19. #include "MCTargetDesc/ARMAddressingModes.h"
  20. #include "MCTargetDesc/ARMBaseInfo.h"
  21. #include "Utils/ARMBaseInfo.h"
  22. #include "llvm/ADT/ArrayRef.h"
  23. #include "llvm/ADT/DenseMap.h"
  24. #include "llvm/ADT/DenseSet.h"
  25. #include "llvm/ADT/STLExtras.h"
  26. #include "llvm/ADT/SmallPtrSet.h"
  27. #include "llvm/ADT/SmallSet.h"
  28. #include "llvm/ADT/SmallVector.h"
  29. #include "llvm/ADT/Statistic.h"
  30. #include "llvm/ADT/iterator_range.h"
  31. #include "llvm/Analysis/AliasAnalysis.h"
  32. #include "llvm/CodeGen/LivePhysRegs.h"
  33. #include "llvm/CodeGen/MachineBasicBlock.h"
  34. #include "llvm/CodeGen/MachineDominators.h"
  35. #include "llvm/CodeGen/MachineFunction.h"
  36. #include "llvm/CodeGen/MachineFunctionPass.h"
  37. #include "llvm/CodeGen/MachineInstr.h"
  38. #include "llvm/CodeGen/MachineInstrBuilder.h"
  39. #include "llvm/CodeGen/MachineMemOperand.h"
  40. #include "llvm/CodeGen/MachineOperand.h"
  41. #include "llvm/CodeGen/MachineRegisterInfo.h"
  42. #include "llvm/CodeGen/RegisterClassInfo.h"
  43. #include "llvm/CodeGen/TargetFrameLowering.h"
  44. #include "llvm/CodeGen/TargetInstrInfo.h"
  45. #include "llvm/CodeGen/TargetLowering.h"
  46. #include "llvm/CodeGen/TargetRegisterInfo.h"
  47. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  48. #include "llvm/IR/DataLayout.h"
  49. #include "llvm/IR/DebugLoc.h"
  50. #include "llvm/IR/DerivedTypes.h"
  51. #include "llvm/IR/Function.h"
  52. #include "llvm/IR/Type.h"
  53. #include "llvm/InitializePasses.h"
  54. #include "llvm/MC/MCInstrDesc.h"
  55. #include "llvm/Pass.h"
  56. #include "llvm/Support/Allocator.h"
  57. #include "llvm/Support/CommandLine.h"
  58. #include "llvm/Support/Debug.h"
  59. #include "llvm/Support/ErrorHandling.h"
  60. #include "llvm/Support/raw_ostream.h"
  61. #include <algorithm>
  62. #include <cassert>
  63. #include <cstddef>
  64. #include <cstdlib>
  65. #include <iterator>
  66. #include <limits>
  67. #include <utility>
  68. using namespace llvm;
  69. #define DEBUG_TYPE "arm-ldst-opt"
  70. STATISTIC(NumLDMGened , "Number of ldm instructions generated");
  71. STATISTIC(NumSTMGened , "Number of stm instructions generated");
  72. STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
  73. STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
  74. STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
  75. STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
  76. STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
  77. STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
  78. STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
  79. STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
  80. STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
  81. /// This switch disables formation of double/multi instructions that could
  82. /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
  83. /// disabled. This can be used to create libraries that are robust even when
  84. /// users provoke undefined behaviour by supplying misaligned pointers.
  85. /// \see mayCombineMisaligned()
  86. static cl::opt<bool>
  87. AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
  88. cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
  89. #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
  90. namespace {
  91. /// Post- register allocation pass the combine load / store instructions to
  92. /// form ldm / stm instructions.
  93. struct ARMLoadStoreOpt : public MachineFunctionPass {
  94. static char ID;
  95. const MachineFunction *MF;
  96. const TargetInstrInfo *TII;
  97. const TargetRegisterInfo *TRI;
  98. const ARMSubtarget *STI;
  99. const TargetLowering *TL;
  100. ARMFunctionInfo *AFI;
  101. LivePhysRegs LiveRegs;
  102. RegisterClassInfo RegClassInfo;
  103. MachineBasicBlock::const_iterator LiveRegPos;
  104. bool LiveRegsValid;
  105. bool RegClassInfoValid;
  106. bool isThumb1, isThumb2;
  107. ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
  108. bool runOnMachineFunction(MachineFunction &Fn) override;
  109. MachineFunctionProperties getRequiredProperties() const override {
  110. return MachineFunctionProperties().set(
  111. MachineFunctionProperties::Property::NoVRegs);
  112. }
  113. StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
  114. private:
  115. /// A set of load/store MachineInstrs with same base register sorted by
  116. /// offset.
  117. struct MemOpQueueEntry {
  118. MachineInstr *MI;
  119. int Offset; ///< Load/Store offset.
  120. unsigned Position; ///< Position as counted from end of basic block.
  121. MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
  122. : MI(&MI), Offset(Offset), Position(Position) {}
  123. };
  124. using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
  125. /// A set of MachineInstrs that fulfill (nearly all) conditions to get
  126. /// merged into a LDM/STM.
  127. struct MergeCandidate {
  128. /// List of instructions ordered by load/store offset.
  129. SmallVector<MachineInstr*, 4> Instrs;
  130. /// Index in Instrs of the instruction being latest in the schedule.
  131. unsigned LatestMIIdx;
  132. /// Index in Instrs of the instruction being earliest in the schedule.
  133. unsigned EarliestMIIdx;
  134. /// Index into the basic block where the merged instruction will be
  135. /// inserted. (See MemOpQueueEntry.Position)
  136. unsigned InsertPos;
  137. /// Whether the instructions can be merged into a ldm/stm instruction.
  138. bool CanMergeToLSMulti;
  139. /// Whether the instructions can be merged into a ldrd/strd instruction.
  140. bool CanMergeToLSDouble;
  141. };
  142. SpecificBumpPtrAllocator<MergeCandidate> Allocator;
  143. SmallVector<const MergeCandidate*,4> Candidates;
  144. SmallVector<MachineInstr*,4> MergeBaseCandidates;
  145. void moveLiveRegsBefore(const MachineBasicBlock &MBB,
  146. MachineBasicBlock::const_iterator Before);
  147. unsigned findFreeReg(const TargetRegisterClass &RegClass);
  148. void UpdateBaseRegUses(MachineBasicBlock &MBB,
  149. MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
  150. unsigned Base, unsigned WordOffset,
  151. ARMCC::CondCodes Pred, unsigned PredReg);
  152. MachineInstr *CreateLoadStoreMulti(
  153. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  154. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  155. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  156. ArrayRef<std::pair<unsigned, bool>> Regs,
  157. ArrayRef<MachineInstr*> Instrs);
  158. MachineInstr *CreateLoadStoreDouble(
  159. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  160. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  161. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  162. ArrayRef<std::pair<unsigned, bool>> Regs,
  163. ArrayRef<MachineInstr*> Instrs) const;
  164. void FormCandidates(const MemOpQueue &MemOps);
  165. MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
  166. bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
  167. MachineBasicBlock::iterator &MBBI);
  168. bool MergeBaseUpdateLoadStore(MachineInstr *MI);
  169. bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
  170. bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
  171. bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
  172. bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
  173. bool CombineMovBx(MachineBasicBlock &MBB);
  174. };
  175. } // end anonymous namespace
  176. char ARMLoadStoreOpt::ID = 0;
  177. INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
  178. false)
  179. static bool definesCPSR(const MachineInstr &MI) {
  180. for (const auto &MO : MI.operands()) {
  181. if (!MO.isReg())
  182. continue;
  183. if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
  184. // If the instruction has live CPSR def, then it's not safe to fold it
  185. // into load / store.
  186. return true;
  187. }
  188. return false;
  189. }
  190. static int getMemoryOpOffset(const MachineInstr &MI) {
  191. unsigned Opcode = MI.getOpcode();
  192. bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
  193. unsigned NumOperands = MI.getDesc().getNumOperands();
  194. unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
  195. if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
  196. Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
  197. Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
  198. Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
  199. return OffField;
  200. // Thumb1 immediate offsets are scaled by 4
  201. if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
  202. Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
  203. return OffField * 4;
  204. int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
  205. : ARM_AM::getAM5Offset(OffField) * 4;
  206. ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
  207. : ARM_AM::getAM5Op(OffField);
  208. if (Op == ARM_AM::sub)
  209. return -Offset;
  210. return Offset;
  211. }
  212. static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
  213. return MI.getOperand(1);
  214. }
  215. static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
  216. return MI.getOperand(0);
  217. }
  218. static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
  219. switch (Opcode) {
  220. default: llvm_unreachable("Unhandled opcode!");
  221. case ARM::LDRi12:
  222. ++NumLDMGened;
  223. switch (Mode) {
  224. default: llvm_unreachable("Unhandled submode!");
  225. case ARM_AM::ia: return ARM::LDMIA;
  226. case ARM_AM::da: return ARM::LDMDA;
  227. case ARM_AM::db: return ARM::LDMDB;
  228. case ARM_AM::ib: return ARM::LDMIB;
  229. }
  230. case ARM::STRi12:
  231. ++NumSTMGened;
  232. switch (Mode) {
  233. default: llvm_unreachable("Unhandled submode!");
  234. case ARM_AM::ia: return ARM::STMIA;
  235. case ARM_AM::da: return ARM::STMDA;
  236. case ARM_AM::db: return ARM::STMDB;
  237. case ARM_AM::ib: return ARM::STMIB;
  238. }
  239. case ARM::tLDRi:
  240. case ARM::tLDRspi:
  241. // tLDMIA is writeback-only - unless the base register is in the input
  242. // reglist.
  243. ++NumLDMGened;
  244. switch (Mode) {
  245. default: llvm_unreachable("Unhandled submode!");
  246. case ARM_AM::ia: return ARM::tLDMIA;
  247. }
  248. case ARM::tSTRi:
  249. case ARM::tSTRspi:
  250. // There is no non-writeback tSTMIA either.
  251. ++NumSTMGened;
  252. switch (Mode) {
  253. default: llvm_unreachable("Unhandled submode!");
  254. case ARM_AM::ia: return ARM::tSTMIA_UPD;
  255. }
  256. case ARM::t2LDRi8:
  257. case ARM::t2LDRi12:
  258. ++NumLDMGened;
  259. switch (Mode) {
  260. default: llvm_unreachable("Unhandled submode!");
  261. case ARM_AM::ia: return ARM::t2LDMIA;
  262. case ARM_AM::db: return ARM::t2LDMDB;
  263. }
  264. case ARM::t2STRi8:
  265. case ARM::t2STRi12:
  266. ++NumSTMGened;
  267. switch (Mode) {
  268. default: llvm_unreachable("Unhandled submode!");
  269. case ARM_AM::ia: return ARM::t2STMIA;
  270. case ARM_AM::db: return ARM::t2STMDB;
  271. }
  272. case ARM::VLDRS:
  273. ++NumVLDMGened;
  274. switch (Mode) {
  275. default: llvm_unreachable("Unhandled submode!");
  276. case ARM_AM::ia: return ARM::VLDMSIA;
  277. case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
  278. }
  279. case ARM::VSTRS:
  280. ++NumVSTMGened;
  281. switch (Mode) {
  282. default: llvm_unreachable("Unhandled submode!");
  283. case ARM_AM::ia: return ARM::VSTMSIA;
  284. case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
  285. }
  286. case ARM::VLDRD:
  287. ++NumVLDMGened;
  288. switch (Mode) {
  289. default: llvm_unreachable("Unhandled submode!");
  290. case ARM_AM::ia: return ARM::VLDMDIA;
  291. case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
  292. }
  293. case ARM::VSTRD:
  294. ++NumVSTMGened;
  295. switch (Mode) {
  296. default: llvm_unreachable("Unhandled submode!");
  297. case ARM_AM::ia: return ARM::VSTMDIA;
  298. case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
  299. }
  300. }
  301. }
  302. static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
  303. switch (Opcode) {
  304. default: llvm_unreachable("Unhandled opcode!");
  305. case ARM::LDMIA_RET:
  306. case ARM::LDMIA:
  307. case ARM::LDMIA_UPD:
  308. case ARM::STMIA:
  309. case ARM::STMIA_UPD:
  310. case ARM::tLDMIA:
  311. case ARM::tLDMIA_UPD:
  312. case ARM::tSTMIA_UPD:
  313. case ARM::t2LDMIA_RET:
  314. case ARM::t2LDMIA:
  315. case ARM::t2LDMIA_UPD:
  316. case ARM::t2STMIA:
  317. case ARM::t2STMIA_UPD:
  318. case ARM::VLDMSIA:
  319. case ARM::VLDMSIA_UPD:
  320. case ARM::VSTMSIA:
  321. case ARM::VSTMSIA_UPD:
  322. case ARM::VLDMDIA:
  323. case ARM::VLDMDIA_UPD:
  324. case ARM::VSTMDIA:
  325. case ARM::VSTMDIA_UPD:
  326. return ARM_AM::ia;
  327. case ARM::LDMDA:
  328. case ARM::LDMDA_UPD:
  329. case ARM::STMDA:
  330. case ARM::STMDA_UPD:
  331. return ARM_AM::da;
  332. case ARM::LDMDB:
  333. case ARM::LDMDB_UPD:
  334. case ARM::STMDB:
  335. case ARM::STMDB_UPD:
  336. case ARM::t2LDMDB:
  337. case ARM::t2LDMDB_UPD:
  338. case ARM::t2STMDB:
  339. case ARM::t2STMDB_UPD:
  340. case ARM::VLDMSDB_UPD:
  341. case ARM::VSTMSDB_UPD:
  342. case ARM::VLDMDDB_UPD:
  343. case ARM::VSTMDDB_UPD:
  344. return ARM_AM::db;
  345. case ARM::LDMIB:
  346. case ARM::LDMIB_UPD:
  347. case ARM::STMIB:
  348. case ARM::STMIB_UPD:
  349. return ARM_AM::ib;
  350. }
  351. }
  352. static bool isT1i32Load(unsigned Opc) {
  353. return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
  354. }
  355. static bool isT2i32Load(unsigned Opc) {
  356. return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
  357. }
  358. static bool isi32Load(unsigned Opc) {
  359. return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
  360. }
  361. static bool isT1i32Store(unsigned Opc) {
  362. return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
  363. }
  364. static bool isT2i32Store(unsigned Opc) {
  365. return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
  366. }
  367. static bool isi32Store(unsigned Opc) {
  368. return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
  369. }
  370. static bool isLoadSingle(unsigned Opc) {
  371. return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
  372. }
  373. static unsigned getImmScale(unsigned Opc) {
  374. switch (Opc) {
  375. default: llvm_unreachable("Unhandled opcode!");
  376. case ARM::tLDRi:
  377. case ARM::tSTRi:
  378. case ARM::tLDRspi:
  379. case ARM::tSTRspi:
  380. return 1;
  381. case ARM::tLDRHi:
  382. case ARM::tSTRHi:
  383. return 2;
  384. case ARM::tLDRBi:
  385. case ARM::tSTRBi:
  386. return 4;
  387. }
  388. }
  389. static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
  390. switch (MI->getOpcode()) {
  391. default: return 0;
  392. case ARM::LDRi12:
  393. case ARM::STRi12:
  394. case ARM::tLDRi:
  395. case ARM::tSTRi:
  396. case ARM::tLDRspi:
  397. case ARM::tSTRspi:
  398. case ARM::t2LDRi8:
  399. case ARM::t2LDRi12:
  400. case ARM::t2STRi8:
  401. case ARM::t2STRi12:
  402. case ARM::VLDRS:
  403. case ARM::VSTRS:
  404. return 4;
  405. case ARM::VLDRD:
  406. case ARM::VSTRD:
  407. return 8;
  408. case ARM::LDMIA:
  409. case ARM::LDMDA:
  410. case ARM::LDMDB:
  411. case ARM::LDMIB:
  412. case ARM::STMIA:
  413. case ARM::STMDA:
  414. case ARM::STMDB:
  415. case ARM::STMIB:
  416. case ARM::tLDMIA:
  417. case ARM::tLDMIA_UPD:
  418. case ARM::tSTMIA_UPD:
  419. case ARM::t2LDMIA:
  420. case ARM::t2LDMDB:
  421. case ARM::t2STMIA:
  422. case ARM::t2STMDB:
  423. case ARM::VLDMSIA:
  424. case ARM::VSTMSIA:
  425. return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
  426. case ARM::VLDMDIA:
  427. case ARM::VSTMDIA:
  428. return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
  429. }
  430. }
  431. /// Update future uses of the base register with the offset introduced
  432. /// due to writeback. This function only works on Thumb1.
  433. void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
  434. MachineBasicBlock::iterator MBBI,
  435. const DebugLoc &DL, unsigned Base,
  436. unsigned WordOffset,
  437. ARMCC::CondCodes Pred,
  438. unsigned PredReg) {
  439. assert(isThumb1 && "Can only update base register uses for Thumb1!");
  440. // Start updating any instructions with immediate offsets. Insert a SUB before
  441. // the first non-updateable instruction (if any).
  442. for (; MBBI != MBB.end(); ++MBBI) {
  443. bool InsertSub = false;
  444. unsigned Opc = MBBI->getOpcode();
  445. if (MBBI->readsRegister(Base)) {
  446. int Offset;
  447. bool IsLoad =
  448. Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
  449. bool IsStore =
  450. Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
  451. if (IsLoad || IsStore) {
  452. // Loads and stores with immediate offsets can be updated, but only if
  453. // the new offset isn't negative.
  454. // The MachineOperand containing the offset immediate is the last one
  455. // before predicates.
  456. MachineOperand &MO =
  457. MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
  458. // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
  459. Offset = MO.getImm() - WordOffset * getImmScale(Opc);
  460. // If storing the base register, it needs to be reset first.
  461. Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
  462. if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
  463. MO.setImm(Offset);
  464. else
  465. InsertSub = true;
  466. } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
  467. !definesCPSR(*MBBI)) {
  468. // SUBS/ADDS using this register, with a dead def of the CPSR.
  469. // Merge it with the update; if the merged offset is too large,
  470. // insert a new sub instead.
  471. MachineOperand &MO =
  472. MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
  473. Offset = (Opc == ARM::tSUBi8) ?
  474. MO.getImm() + WordOffset * 4 :
  475. MO.getImm() - WordOffset * 4 ;
  476. if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
  477. // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
  478. // Offset == 0.
  479. MO.setImm(Offset);
  480. // The base register has now been reset, so exit early.
  481. return;
  482. } else {
  483. InsertSub = true;
  484. }
  485. } else {
  486. // Can't update the instruction.
  487. InsertSub = true;
  488. }
  489. } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
  490. // Since SUBS sets the condition flags, we can't place the base reset
  491. // after an instruction that has a live CPSR def.
  492. // The base register might also contain an argument for a function call.
  493. InsertSub = true;
  494. }
  495. if (InsertSub) {
  496. // An instruction above couldn't be updated, so insert a sub.
  497. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
  498. .add(t1CondCodeOp(true))
  499. .addReg(Base)
  500. .addImm(WordOffset * 4)
  501. .addImm(Pred)
  502. .addReg(PredReg);
  503. return;
  504. }
  505. if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
  506. // Register got killed. Stop updating.
  507. return;
  508. }
  509. // End of block was reached.
  510. if (MBB.succ_size() > 0) {
  511. // FIXME: Because of a bug, live registers are sometimes missing from
  512. // the successor blocks' live-in sets. This means we can't trust that
  513. // information and *always* have to reset at the end of a block.
  514. // See PR21029.
  515. if (MBBI != MBB.end()) --MBBI;
  516. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
  517. .add(t1CondCodeOp(true))
  518. .addReg(Base)
  519. .addImm(WordOffset * 4)
  520. .addImm(Pred)
  521. .addReg(PredReg);
  522. }
  523. }
  524. /// Return the first register of class \p RegClass that is not in \p Regs.
  525. unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
  526. if (!RegClassInfoValid) {
  527. RegClassInfo.runOnMachineFunction(*MF);
  528. RegClassInfoValid = true;
  529. }
  530. for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
  531. if (!LiveRegs.contains(Reg))
  532. return Reg;
  533. return 0;
  534. }
  535. /// Compute live registers just before instruction \p Before (in normal schedule
  536. /// direction). Computes backwards so multiple queries in the same block must
  537. /// come in reverse order.
  538. void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
  539. MachineBasicBlock::const_iterator Before) {
  540. // Initialize if we never queried in this block.
  541. if (!LiveRegsValid) {
  542. LiveRegs.init(*TRI);
  543. LiveRegs.addLiveOuts(MBB);
  544. LiveRegPos = MBB.end();
  545. LiveRegsValid = true;
  546. }
  547. // Move backward just before the "Before" position.
  548. while (LiveRegPos != Before) {
  549. --LiveRegPos;
  550. LiveRegs.stepBackward(*LiveRegPos);
  551. }
  552. }
  553. static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
  554. unsigned Reg) {
  555. for (const std::pair<unsigned, bool> &R : Regs)
  556. if (R.first == Reg)
  557. return true;
  558. return false;
  559. }
  560. /// Create and insert a LDM or STM with Base as base register and registers in
  561. /// Regs as the register operands that would be loaded / stored. It returns
  562. /// true if the transformation is done.
  563. MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
  564. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  565. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  566. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  567. ArrayRef<std::pair<unsigned, bool>> Regs,
  568. ArrayRef<MachineInstr*> Instrs) {
  569. unsigned NumRegs = Regs.size();
  570. assert(NumRegs > 1);
  571. // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
  572. // Compute liveness information for that register to make the decision.
  573. bool SafeToClobberCPSR = !isThumb1 ||
  574. (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
  575. MachineBasicBlock::LQR_Dead);
  576. bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
  577. // Exception: If the base register is in the input reglist, Thumb1 LDM is
  578. // non-writeback.
  579. // It's also not possible to merge an STR of the base register in Thumb1.
  580. if (isThumb1 && ContainsReg(Regs, Base)) {
  581. assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
  582. if (Opcode == ARM::tLDRi)
  583. Writeback = false;
  584. else if (Opcode == ARM::tSTRi)
  585. return nullptr;
  586. }
  587. ARM_AM::AMSubMode Mode = ARM_AM::ia;
  588. // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
  589. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
  590. bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
  591. if (Offset == 4 && haveIBAndDA) {
  592. Mode = ARM_AM::ib;
  593. } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
  594. Mode = ARM_AM::da;
  595. } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
  596. // VLDM/VSTM do not support DB mode without also updating the base reg.
  597. Mode = ARM_AM::db;
  598. } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
  599. // Check if this is a supported opcode before inserting instructions to
  600. // calculate a new base register.
  601. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
  602. // If starting offset isn't zero, insert a MI to materialize a new base.
  603. // But only do so if it is cost effective, i.e. merging more than two
  604. // loads / stores.
  605. if (NumRegs <= 2)
  606. return nullptr;
  607. // On Thumb1, it's not worth materializing a new base register without
  608. // clobbering the CPSR (i.e. not using ADDS/SUBS).
  609. if (!SafeToClobberCPSR)
  610. return nullptr;
  611. unsigned NewBase;
  612. if (isi32Load(Opcode)) {
  613. // If it is a load, then just use one of the destination registers
  614. // as the new base. Will no longer be writeback in Thumb1.
  615. NewBase = Regs[NumRegs-1].first;
  616. Writeback = false;
  617. } else {
  618. // Find a free register that we can use as scratch register.
  619. moveLiveRegsBefore(MBB, InsertBefore);
  620. // The merged instruction does not exist yet but will use several Regs if
  621. // it is a Store.
  622. if (!isLoadSingle(Opcode))
  623. for (const std::pair<unsigned, bool> &R : Regs)
  624. LiveRegs.addReg(R.first);
  625. NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
  626. if (NewBase == 0)
  627. return nullptr;
  628. }
  629. int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm
  630. : ARM::t2ADDri)
  631. : (isThumb1 && Base == ARM::SP)
  632. ? ARM::tADDrSPi
  633. : (isThumb1 && Offset < 8)
  634. ? ARM::tADDi3
  635. : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
  636. if (Offset < 0) {
  637. // FIXME: There are no Thumb1 load/store instructions with negative
  638. // offsets. So the Base != ARM::SP might be unnecessary.
  639. Offset = -Offset;
  640. BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm
  641. : ARM::t2SUBri)
  642. : (isThumb1 && Offset < 8 && Base != ARM::SP)
  643. ? ARM::tSUBi3
  644. : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
  645. }
  646. if (!TL->isLegalAddImmediate(Offset))
  647. // FIXME: Try add with register operand?
  648. return nullptr; // Probably not worth it then.
  649. // We can only append a kill flag to the add/sub input if the value is not
  650. // used in the register list of the stm as well.
  651. bool KillOldBase = BaseKill &&
  652. (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
  653. if (isThumb1) {
  654. // Thumb1: depending on immediate size, use either
  655. // ADDS NewBase, Base, #imm3
  656. // or
  657. // MOV NewBase, Base
  658. // ADDS NewBase, #imm8.
  659. if (Base != NewBase &&
  660. (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
  661. // Need to insert a MOV to the new base first.
  662. if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
  663. !STI->hasV6Ops()) {
  664. // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
  665. if (Pred != ARMCC::AL)
  666. return nullptr;
  667. BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
  668. .addReg(Base, getKillRegState(KillOldBase));
  669. } else
  670. BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
  671. .addReg(Base, getKillRegState(KillOldBase))
  672. .add(predOps(Pred, PredReg));
  673. // The following ADDS/SUBS becomes an update.
  674. Base = NewBase;
  675. KillOldBase = true;
  676. }
  677. if (BaseOpc == ARM::tADDrSPi) {
  678. assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
  679. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  680. .addReg(Base, getKillRegState(KillOldBase))
  681. .addImm(Offset / 4)
  682. .add(predOps(Pred, PredReg));
  683. } else
  684. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  685. .add(t1CondCodeOp(true))
  686. .addReg(Base, getKillRegState(KillOldBase))
  687. .addImm(Offset)
  688. .add(predOps(Pred, PredReg));
  689. } else {
  690. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  691. .addReg(Base, getKillRegState(KillOldBase))
  692. .addImm(Offset)
  693. .add(predOps(Pred, PredReg))
  694. .add(condCodeOp());
  695. }
  696. Base = NewBase;
  697. BaseKill = true; // New base is always killed straight away.
  698. }
  699. bool isDef = isLoadSingle(Opcode);
  700. // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
  701. // base register writeback.
  702. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
  703. if (!Opcode)
  704. return nullptr;
  705. // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
  706. // - There is no writeback (LDM of base register),
  707. // - the base register is killed by the merged instruction,
  708. // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
  709. // to reset the base register.
  710. // Otherwise, don't merge.
  711. // It's safe to return here since the code to materialize a new base register
  712. // above is also conditional on SafeToClobberCPSR.
  713. if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
  714. return nullptr;
  715. MachineInstrBuilder MIB;
  716. if (Writeback) {
  717. assert(isThumb1 && "expected Writeback only inThumb1");
  718. if (Opcode == ARM::tLDMIA) {
  719. assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
  720. // Update tLDMIA with writeback if necessary.
  721. Opcode = ARM::tLDMIA_UPD;
  722. }
  723. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
  724. // Thumb1: we might need to set base writeback when building the MI.
  725. MIB.addReg(Base, getDefRegState(true))
  726. .addReg(Base, getKillRegState(BaseKill));
  727. // The base isn't dead after a merged instruction with writeback.
  728. // Insert a sub instruction after the newly formed instruction to reset.
  729. if (!BaseKill)
  730. UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
  731. } else {
  732. // No writeback, simply build the MachineInstr.
  733. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
  734. MIB.addReg(Base, getKillRegState(BaseKill));
  735. }
  736. MIB.addImm(Pred).addReg(PredReg);
  737. for (const std::pair<unsigned, bool> &R : Regs)
  738. MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
  739. MIB.cloneMergedMemRefs(Instrs);
  740. return MIB.getInstr();
  741. }
  742. MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
  743. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  744. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  745. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  746. ArrayRef<std::pair<unsigned, bool>> Regs,
  747. ArrayRef<MachineInstr*> Instrs) const {
  748. bool IsLoad = isi32Load(Opcode);
  749. assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
  750. unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
  751. assert(Regs.size() == 2);
  752. MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
  753. TII->get(LoadStoreOpcode));
  754. if (IsLoad) {
  755. MIB.addReg(Regs[0].first, RegState::Define)
  756. .addReg(Regs[1].first, RegState::Define);
  757. } else {
  758. MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
  759. .addReg(Regs[1].first, getKillRegState(Regs[1].second));
  760. }
  761. MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  762. MIB.cloneMergedMemRefs(Instrs);
  763. return MIB.getInstr();
  764. }
  765. /// Call MergeOps and update MemOps and merges accordingly on success.
  766. MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
  767. const MachineInstr *First = Cand.Instrs.front();
  768. unsigned Opcode = First->getOpcode();
  769. bool IsLoad = isLoadSingle(Opcode);
  770. SmallVector<std::pair<unsigned, bool>, 8> Regs;
  771. SmallVector<unsigned, 4> ImpDefs;
  772. DenseSet<unsigned> KilledRegs;
  773. DenseSet<unsigned> UsedRegs;
  774. // Determine list of registers and list of implicit super-register defs.
  775. for (const MachineInstr *MI : Cand.Instrs) {
  776. const MachineOperand &MO = getLoadStoreRegOp(*MI);
  777. Register Reg = MO.getReg();
  778. bool IsKill = MO.isKill();
  779. if (IsKill)
  780. KilledRegs.insert(Reg);
  781. Regs.push_back(std::make_pair(Reg, IsKill));
  782. UsedRegs.insert(Reg);
  783. if (IsLoad) {
  784. // Collect any implicit defs of super-registers, after merging we can't
  785. // be sure anymore that we properly preserved these live ranges and must
  786. // removed these implicit operands.
  787. for (const MachineOperand &MO : MI->implicit_operands()) {
  788. if (!MO.isReg() || !MO.isDef() || MO.isDead())
  789. continue;
  790. assert(MO.isImplicit());
  791. Register DefReg = MO.getReg();
  792. if (is_contained(ImpDefs, DefReg))
  793. continue;
  794. // We can ignore cases where the super-reg is read and written.
  795. if (MI->readsRegister(DefReg))
  796. continue;
  797. ImpDefs.push_back(DefReg);
  798. }
  799. }
  800. }
  801. // Attempt the merge.
  802. using iterator = MachineBasicBlock::iterator;
  803. MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
  804. iterator InsertBefore = std::next(iterator(LatestMI));
  805. MachineBasicBlock &MBB = *LatestMI->getParent();
  806. unsigned Offset = getMemoryOpOffset(*First);
  807. Register Base = getLoadStoreBaseOp(*First).getReg();
  808. bool BaseKill = LatestMI->killsRegister(Base);
  809. Register PredReg;
  810. ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
  811. DebugLoc DL = First->getDebugLoc();
  812. MachineInstr *Merged = nullptr;
  813. if (Cand.CanMergeToLSDouble)
  814. Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
  815. Opcode, Pred, PredReg, DL, Regs,
  816. Cand.Instrs);
  817. if (!Merged && Cand.CanMergeToLSMulti)
  818. Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
  819. Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
  820. if (!Merged)
  821. return nullptr;
  822. // Determine earliest instruction that will get removed. We then keep an
  823. // iterator just above it so the following erases don't invalidated it.
  824. iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
  825. bool EarliestAtBegin = false;
  826. if (EarliestI == MBB.begin()) {
  827. EarliestAtBegin = true;
  828. } else {
  829. EarliestI = std::prev(EarliestI);
  830. }
  831. // Remove instructions which have been merged.
  832. for (MachineInstr *MI : Cand.Instrs)
  833. MBB.erase(MI);
  834. // Determine range between the earliest removed instruction and the new one.
  835. if (EarliestAtBegin)
  836. EarliestI = MBB.begin();
  837. else
  838. EarliestI = std::next(EarliestI);
  839. auto FixupRange = make_range(EarliestI, iterator(Merged));
  840. if (isLoadSingle(Opcode)) {
  841. // If the previous loads defined a super-reg, then we have to mark earlier
  842. // operands undef; Replicate the super-reg def on the merged instruction.
  843. for (MachineInstr &MI : FixupRange) {
  844. for (unsigned &ImpDefReg : ImpDefs) {
  845. for (MachineOperand &MO : MI.implicit_operands()) {
  846. if (!MO.isReg() || MO.getReg() != ImpDefReg)
  847. continue;
  848. if (MO.readsReg())
  849. MO.setIsUndef();
  850. else if (MO.isDef())
  851. ImpDefReg = 0;
  852. }
  853. }
  854. }
  855. MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
  856. for (unsigned ImpDef : ImpDefs)
  857. MIB.addReg(ImpDef, RegState::ImplicitDefine);
  858. } else {
  859. // Remove kill flags: We are possibly storing the values later now.
  860. assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
  861. for (MachineInstr &MI : FixupRange) {
  862. for (MachineOperand &MO : MI.uses()) {
  863. if (!MO.isReg() || !MO.isKill())
  864. continue;
  865. if (UsedRegs.count(MO.getReg()))
  866. MO.setIsKill(false);
  867. }
  868. }
  869. assert(ImpDefs.empty());
  870. }
  871. return Merged;
  872. }
  873. static bool isValidLSDoubleOffset(int Offset) {
  874. unsigned Value = abs(Offset);
  875. // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
  876. // multiplied by 4.
  877. return (Value % 4) == 0 && Value < 1024;
  878. }
  879. /// Return true for loads/stores that can be combined to a double/multi
  880. /// operation without increasing the requirements for alignment.
  881. static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
  882. const MachineInstr &MI) {
  883. // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
  884. // difference.
  885. unsigned Opcode = MI.getOpcode();
  886. if (!isi32Load(Opcode) && !isi32Store(Opcode))
  887. return true;
  888. // Stack pointer alignment is out of the programmers control so we can trust
  889. // SP-relative loads/stores.
  890. if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
  891. STI.getFrameLowering()->getTransientStackAlign() >= Align(4))
  892. return true;
  893. return false;
  894. }
  895. /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
  896. void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
  897. const MachineInstr *FirstMI = MemOps[0].MI;
  898. unsigned Opcode = FirstMI->getOpcode();
  899. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
  900. unsigned Size = getLSMultipleTransferSize(FirstMI);
  901. unsigned SIndex = 0;
  902. unsigned EIndex = MemOps.size();
  903. do {
  904. // Look at the first instruction.
  905. const MachineInstr *MI = MemOps[SIndex].MI;
  906. int Offset = MemOps[SIndex].Offset;
  907. const MachineOperand &PMO = getLoadStoreRegOp(*MI);
  908. Register PReg = PMO.getReg();
  909. unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
  910. : TRI->getEncodingValue(PReg);
  911. unsigned Latest = SIndex;
  912. unsigned Earliest = SIndex;
  913. unsigned Count = 1;
  914. bool CanMergeToLSDouble =
  915. STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
  916. // ARM errata 602117: LDRD with base in list may result in incorrect base
  917. // register when interrupted or faulted.
  918. if (STI->isCortexM3() && isi32Load(Opcode) &&
  919. PReg == getLoadStoreBaseOp(*MI).getReg())
  920. CanMergeToLSDouble = false;
  921. bool CanMergeToLSMulti = true;
  922. // On swift vldm/vstm starting with an odd register number as that needs
  923. // more uops than single vldrs.
  924. if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
  925. CanMergeToLSMulti = false;
  926. // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
  927. // deprecated; LDM to PC is fine but cannot happen here.
  928. if (PReg == ARM::SP || PReg == ARM::PC)
  929. CanMergeToLSMulti = CanMergeToLSDouble = false;
  930. // Should we be conservative?
  931. if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
  932. CanMergeToLSMulti = CanMergeToLSDouble = false;
  933. // vldm / vstm limit are 32 for S variants, 16 for D variants.
  934. unsigned Limit;
  935. switch (Opcode) {
  936. default:
  937. Limit = UINT_MAX;
  938. break;
  939. case ARM::VLDRD:
  940. case ARM::VSTRD:
  941. Limit = 16;
  942. break;
  943. }
  944. // Merge following instructions where possible.
  945. for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
  946. int NewOffset = MemOps[I].Offset;
  947. if (NewOffset != Offset + (int)Size)
  948. break;
  949. const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
  950. Register Reg = MO.getReg();
  951. if (Reg == ARM::SP || Reg == ARM::PC)
  952. break;
  953. if (Count == Limit)
  954. break;
  955. // See if the current load/store may be part of a multi load/store.
  956. unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
  957. : TRI->getEncodingValue(Reg);
  958. bool PartOfLSMulti = CanMergeToLSMulti;
  959. if (PartOfLSMulti) {
  960. // Register numbers must be in ascending order.
  961. if (RegNum <= PRegNum)
  962. PartOfLSMulti = false;
  963. // For VFP / NEON load/store multiples, the registers must be
  964. // consecutive and within the limit on the number of registers per
  965. // instruction.
  966. else if (!isNotVFP && RegNum != PRegNum+1)
  967. PartOfLSMulti = false;
  968. }
  969. // See if the current load/store may be part of a double load/store.
  970. bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
  971. if (!PartOfLSMulti && !PartOfLSDouble)
  972. break;
  973. CanMergeToLSMulti &= PartOfLSMulti;
  974. CanMergeToLSDouble &= PartOfLSDouble;
  975. // Track MemOp with latest and earliest position (Positions are
  976. // counted in reverse).
  977. unsigned Position = MemOps[I].Position;
  978. if (Position < MemOps[Latest].Position)
  979. Latest = I;
  980. else if (Position > MemOps[Earliest].Position)
  981. Earliest = I;
  982. // Prepare for next MemOp.
  983. Offset += Size;
  984. PRegNum = RegNum;
  985. }
  986. // Form a candidate from the Ops collected so far.
  987. MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
  988. for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
  989. Candidate->Instrs.push_back(MemOps[C].MI);
  990. Candidate->LatestMIIdx = Latest - SIndex;
  991. Candidate->EarliestMIIdx = Earliest - SIndex;
  992. Candidate->InsertPos = MemOps[Latest].Position;
  993. if (Count == 1)
  994. CanMergeToLSMulti = CanMergeToLSDouble = false;
  995. Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
  996. Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
  997. Candidates.push_back(Candidate);
  998. // Continue after the chain.
  999. SIndex += Count;
  1000. } while (SIndex < EIndex);
  1001. }
  1002. static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
  1003. ARM_AM::AMSubMode Mode) {
  1004. switch (Opc) {
  1005. default: llvm_unreachable("Unhandled opcode!");
  1006. case ARM::LDMIA:
  1007. case ARM::LDMDA:
  1008. case ARM::LDMDB:
  1009. case ARM::LDMIB:
  1010. switch (Mode) {
  1011. default: llvm_unreachable("Unhandled submode!");
  1012. case ARM_AM::ia: return ARM::LDMIA_UPD;
  1013. case ARM_AM::ib: return ARM::LDMIB_UPD;
  1014. case ARM_AM::da: return ARM::LDMDA_UPD;
  1015. case ARM_AM::db: return ARM::LDMDB_UPD;
  1016. }
  1017. case ARM::STMIA:
  1018. case ARM::STMDA:
  1019. case ARM::STMDB:
  1020. case ARM::STMIB:
  1021. switch (Mode) {
  1022. default: llvm_unreachable("Unhandled submode!");
  1023. case ARM_AM::ia: return ARM::STMIA_UPD;
  1024. case ARM_AM::ib: return ARM::STMIB_UPD;
  1025. case ARM_AM::da: return ARM::STMDA_UPD;
  1026. case ARM_AM::db: return ARM::STMDB_UPD;
  1027. }
  1028. case ARM::t2LDMIA:
  1029. case ARM::t2LDMDB:
  1030. switch (Mode) {
  1031. default: llvm_unreachable("Unhandled submode!");
  1032. case ARM_AM::ia: return ARM::t2LDMIA_UPD;
  1033. case ARM_AM::db: return ARM::t2LDMDB_UPD;
  1034. }
  1035. case ARM::t2STMIA:
  1036. case ARM::t2STMDB:
  1037. switch (Mode) {
  1038. default: llvm_unreachable("Unhandled submode!");
  1039. case ARM_AM::ia: return ARM::t2STMIA_UPD;
  1040. case ARM_AM::db: return ARM::t2STMDB_UPD;
  1041. }
  1042. case ARM::VLDMSIA:
  1043. switch (Mode) {
  1044. default: llvm_unreachable("Unhandled submode!");
  1045. case ARM_AM::ia: return ARM::VLDMSIA_UPD;
  1046. case ARM_AM::db: return ARM::VLDMSDB_UPD;
  1047. }
  1048. case ARM::VLDMDIA:
  1049. switch (Mode) {
  1050. default: llvm_unreachable("Unhandled submode!");
  1051. case ARM_AM::ia: return ARM::VLDMDIA_UPD;
  1052. case ARM_AM::db: return ARM::VLDMDDB_UPD;
  1053. }
  1054. case ARM::VSTMSIA:
  1055. switch (Mode) {
  1056. default: llvm_unreachable("Unhandled submode!");
  1057. case ARM_AM::ia: return ARM::VSTMSIA_UPD;
  1058. case ARM_AM::db: return ARM::VSTMSDB_UPD;
  1059. }
  1060. case ARM::VSTMDIA:
  1061. switch (Mode) {
  1062. default: llvm_unreachable("Unhandled submode!");
  1063. case ARM_AM::ia: return ARM::VSTMDIA_UPD;
  1064. case ARM_AM::db: return ARM::VSTMDDB_UPD;
  1065. }
  1066. }
  1067. }
  1068. /// Check if the given instruction increments or decrements a register and
  1069. /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
  1070. /// generated by the instruction are possibly read as well.
  1071. static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg,
  1072. ARMCC::CondCodes Pred, Register PredReg) {
  1073. bool CheckCPSRDef;
  1074. int Scale;
  1075. switch (MI.getOpcode()) {
  1076. case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
  1077. case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
  1078. case ARM::t2SUBri:
  1079. case ARM::t2SUBspImm:
  1080. case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
  1081. case ARM::t2ADDri:
  1082. case ARM::t2ADDspImm:
  1083. case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
  1084. case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
  1085. case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
  1086. default: return 0;
  1087. }
  1088. Register MIPredReg;
  1089. if (MI.getOperand(0).getReg() != Reg ||
  1090. MI.getOperand(1).getReg() != Reg ||
  1091. getInstrPredicate(MI, MIPredReg) != Pred ||
  1092. MIPredReg != PredReg)
  1093. return 0;
  1094. if (CheckCPSRDef && definesCPSR(MI))
  1095. return 0;
  1096. return MI.getOperand(2).getImm() * Scale;
  1097. }
  1098. /// Searches for an increment or decrement of \p Reg before \p MBBI.
  1099. static MachineBasicBlock::iterator
  1100. findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg,
  1101. ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
  1102. Offset = 0;
  1103. MachineBasicBlock &MBB = *MBBI->getParent();
  1104. MachineBasicBlock::iterator BeginMBBI = MBB.begin();
  1105. MachineBasicBlock::iterator EndMBBI = MBB.end();
  1106. if (MBBI == BeginMBBI)
  1107. return EndMBBI;
  1108. // Skip debug values.
  1109. MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
  1110. while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
  1111. --PrevMBBI;
  1112. Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
  1113. return Offset == 0 ? EndMBBI : PrevMBBI;
  1114. }
  1115. /// Searches for a increment or decrement of \p Reg after \p MBBI.
  1116. static MachineBasicBlock::iterator
  1117. findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg,
  1118. ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
  1119. Offset = 0;
  1120. MachineBasicBlock &MBB = *MBBI->getParent();
  1121. MachineBasicBlock::iterator EndMBBI = MBB.end();
  1122. MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
  1123. // Skip debug values.
  1124. while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
  1125. ++NextMBBI;
  1126. if (NextMBBI == EndMBBI)
  1127. return EndMBBI;
  1128. Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
  1129. return Offset == 0 ? EndMBBI : NextMBBI;
  1130. }
  1131. /// Fold proceeding/trailing inc/dec of base register into the
  1132. /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
  1133. ///
  1134. /// stmia rn, <ra, rb, rc>
  1135. /// rn := rn + 4 * 3;
  1136. /// =>
  1137. /// stmia rn!, <ra, rb, rc>
  1138. ///
  1139. /// rn := rn - 4 * 3;
  1140. /// ldmia rn, <ra, rb, rc>
  1141. /// =>
  1142. /// ldmdb rn!, <ra, rb, rc>
  1143. bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
  1144. // Thumb1 is already using updating loads/stores.
  1145. if (isThumb1) return false;
  1146. LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
  1147. const MachineOperand &BaseOP = MI->getOperand(0);
  1148. Register Base = BaseOP.getReg();
  1149. bool BaseKill = BaseOP.isKill();
  1150. Register PredReg;
  1151. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1152. unsigned Opcode = MI->getOpcode();
  1153. DebugLoc DL = MI->getDebugLoc();
  1154. // Can't use an updating ld/st if the base register is also a dest
  1155. // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
  1156. for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
  1157. if (MI->getOperand(i).getReg() == Base)
  1158. return false;
  1159. int Bytes = getLSMultipleTransferSize(MI);
  1160. MachineBasicBlock &MBB = *MI->getParent();
  1161. MachineBasicBlock::iterator MBBI(MI);
  1162. int Offset;
  1163. MachineBasicBlock::iterator MergeInstr
  1164. = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
  1165. ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
  1166. if (Mode == ARM_AM::ia && Offset == -Bytes) {
  1167. Mode = ARM_AM::db;
  1168. } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
  1169. Mode = ARM_AM::da;
  1170. } else {
  1171. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
  1172. if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
  1173. ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
  1174. // We couldn't find an inc/dec to merge. But if the base is dead, we
  1175. // can still change to a writeback form as that will save us 2 bytes
  1176. // of code size. It can create WAW hazards though, so only do it if
  1177. // we're minimizing code size.
  1178. if (!STI->hasMinSize() || !BaseKill)
  1179. return false;
  1180. bool HighRegsUsed = false;
  1181. for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
  1182. if (MI->getOperand(i).getReg() >= ARM::R8) {
  1183. HighRegsUsed = true;
  1184. break;
  1185. }
  1186. if (!HighRegsUsed)
  1187. MergeInstr = MBB.end();
  1188. else
  1189. return false;
  1190. }
  1191. }
  1192. if (MergeInstr != MBB.end()) {
  1193. LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
  1194. MBB.erase(MergeInstr);
  1195. }
  1196. unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
  1197. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
  1198. .addReg(Base, getDefRegState(true)) // WB base register
  1199. .addReg(Base, getKillRegState(BaseKill))
  1200. .addImm(Pred).addReg(PredReg);
  1201. // Transfer the rest of operands.
  1202. for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
  1203. MIB.add(MI->getOperand(OpNum));
  1204. // Transfer memoperands.
  1205. MIB.setMemRefs(MI->memoperands());
  1206. LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
  1207. MBB.erase(MBBI);
  1208. return true;
  1209. }
  1210. static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
  1211. ARM_AM::AddrOpc Mode) {
  1212. switch (Opc) {
  1213. case ARM::LDRi12:
  1214. return ARM::LDR_PRE_IMM;
  1215. case ARM::STRi12:
  1216. return ARM::STR_PRE_IMM;
  1217. case ARM::VLDRS:
  1218. return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
  1219. case ARM::VLDRD:
  1220. return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
  1221. case ARM::VSTRS:
  1222. return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
  1223. case ARM::VSTRD:
  1224. return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
  1225. case ARM::t2LDRi8:
  1226. case ARM::t2LDRi12:
  1227. return ARM::t2LDR_PRE;
  1228. case ARM::t2STRi8:
  1229. case ARM::t2STRi12:
  1230. return ARM::t2STR_PRE;
  1231. default: llvm_unreachable("Unhandled opcode!");
  1232. }
  1233. }
  1234. static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
  1235. ARM_AM::AddrOpc Mode) {
  1236. switch (Opc) {
  1237. case ARM::LDRi12:
  1238. return ARM::LDR_POST_IMM;
  1239. case ARM::STRi12:
  1240. return ARM::STR_POST_IMM;
  1241. case ARM::VLDRS:
  1242. return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
  1243. case ARM::VLDRD:
  1244. return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
  1245. case ARM::VSTRS:
  1246. return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
  1247. case ARM::VSTRD:
  1248. return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
  1249. case ARM::t2LDRi8:
  1250. case ARM::t2LDRi12:
  1251. return ARM::t2LDR_POST;
  1252. case ARM::t2LDRBi8:
  1253. case ARM::t2LDRBi12:
  1254. return ARM::t2LDRB_POST;
  1255. case ARM::t2LDRSBi8:
  1256. case ARM::t2LDRSBi12:
  1257. return ARM::t2LDRSB_POST;
  1258. case ARM::t2LDRHi8:
  1259. case ARM::t2LDRHi12:
  1260. return ARM::t2LDRH_POST;
  1261. case ARM::t2LDRSHi8:
  1262. case ARM::t2LDRSHi12:
  1263. return ARM::t2LDRSH_POST;
  1264. case ARM::t2STRi8:
  1265. case ARM::t2STRi12:
  1266. return ARM::t2STR_POST;
  1267. case ARM::t2STRBi8:
  1268. case ARM::t2STRBi12:
  1269. return ARM::t2STRB_POST;
  1270. case ARM::t2STRHi8:
  1271. case ARM::t2STRHi12:
  1272. return ARM::t2STRH_POST;
  1273. case ARM::MVE_VLDRBS16:
  1274. return ARM::MVE_VLDRBS16_post;
  1275. case ARM::MVE_VLDRBS32:
  1276. return ARM::MVE_VLDRBS32_post;
  1277. case ARM::MVE_VLDRBU16:
  1278. return ARM::MVE_VLDRBU16_post;
  1279. case ARM::MVE_VLDRBU32:
  1280. return ARM::MVE_VLDRBU32_post;
  1281. case ARM::MVE_VLDRHS32:
  1282. return ARM::MVE_VLDRHS32_post;
  1283. case ARM::MVE_VLDRHU32:
  1284. return ARM::MVE_VLDRHU32_post;
  1285. case ARM::MVE_VLDRBU8:
  1286. return ARM::MVE_VLDRBU8_post;
  1287. case ARM::MVE_VLDRHU16:
  1288. return ARM::MVE_VLDRHU16_post;
  1289. case ARM::MVE_VLDRWU32:
  1290. return ARM::MVE_VLDRWU32_post;
  1291. case ARM::MVE_VSTRB16:
  1292. return ARM::MVE_VSTRB16_post;
  1293. case ARM::MVE_VSTRB32:
  1294. return ARM::MVE_VSTRB32_post;
  1295. case ARM::MVE_VSTRH32:
  1296. return ARM::MVE_VSTRH32_post;
  1297. case ARM::MVE_VSTRBU8:
  1298. return ARM::MVE_VSTRBU8_post;
  1299. case ARM::MVE_VSTRHU16:
  1300. return ARM::MVE_VSTRHU16_post;
  1301. case ARM::MVE_VSTRWU32:
  1302. return ARM::MVE_VSTRWU32_post;
  1303. default: llvm_unreachable("Unhandled opcode!");
  1304. }
  1305. }
  1306. /// Fold proceeding/trailing inc/dec of base register into the
  1307. /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
  1308. bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
  1309. // Thumb1 doesn't have updating LDR/STR.
  1310. // FIXME: Use LDM/STM with single register instead.
  1311. if (isThumb1) return false;
  1312. LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
  1313. Register Base = getLoadStoreBaseOp(*MI).getReg();
  1314. bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
  1315. unsigned Opcode = MI->getOpcode();
  1316. DebugLoc DL = MI->getDebugLoc();
  1317. bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
  1318. Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
  1319. bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
  1320. if (isi32Load(Opcode) || isi32Store(Opcode))
  1321. if (MI->getOperand(2).getImm() != 0)
  1322. return false;
  1323. if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
  1324. return false;
  1325. // Can't do the merge if the destination register is the same as the would-be
  1326. // writeback register.
  1327. if (MI->getOperand(0).getReg() == Base)
  1328. return false;
  1329. Register PredReg;
  1330. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1331. int Bytes = getLSMultipleTransferSize(MI);
  1332. MachineBasicBlock &MBB = *MI->getParent();
  1333. MachineBasicBlock::iterator MBBI(MI);
  1334. int Offset;
  1335. MachineBasicBlock::iterator MergeInstr
  1336. = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
  1337. unsigned NewOpc;
  1338. if (!isAM5 && Offset == Bytes) {
  1339. NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
  1340. } else if (Offset == -Bytes) {
  1341. NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
  1342. } else {
  1343. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
  1344. if (Offset == Bytes) {
  1345. NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
  1346. } else if (!isAM5 && Offset == -Bytes) {
  1347. NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
  1348. } else
  1349. return false;
  1350. }
  1351. LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
  1352. MBB.erase(MergeInstr);
  1353. ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
  1354. bool isLd = isLoadSingle(Opcode);
  1355. if (isAM5) {
  1356. // VLDM[SD]_UPD, VSTM[SD]_UPD
  1357. // (There are no base-updating versions of VLDR/VSTR instructions, but the
  1358. // updating load/store-multiple instructions can be used with only one
  1359. // register.)
  1360. MachineOperand &MO = MI->getOperand(0);
  1361. auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
  1362. .addReg(Base, getDefRegState(true)) // WB base register
  1363. .addReg(Base, getKillRegState(isLd ? BaseKill : false))
  1364. .addImm(Pred)
  1365. .addReg(PredReg)
  1366. .addReg(MO.getReg(), (isLd ? getDefRegState(true)
  1367. : getKillRegState(MO.isKill())))
  1368. .cloneMemRefs(*MI);
  1369. (void)MIB;
  1370. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1371. } else if (isLd) {
  1372. if (isAM2) {
  1373. // LDR_PRE, LDR_POST
  1374. if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
  1375. auto MIB =
  1376. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1377. .addReg(Base, RegState::Define)
  1378. .addReg(Base)
  1379. .addImm(Offset)
  1380. .addImm(Pred)
  1381. .addReg(PredReg)
  1382. .cloneMemRefs(*MI);
  1383. (void)MIB;
  1384. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1385. } else {
  1386. int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
  1387. auto MIB =
  1388. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1389. .addReg(Base, RegState::Define)
  1390. .addReg(Base)
  1391. .addReg(0)
  1392. .addImm(Imm)
  1393. .add(predOps(Pred, PredReg))
  1394. .cloneMemRefs(*MI);
  1395. (void)MIB;
  1396. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1397. }
  1398. } else {
  1399. // t2LDR_PRE, t2LDR_POST
  1400. auto MIB =
  1401. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1402. .addReg(Base, RegState::Define)
  1403. .addReg(Base)
  1404. .addImm(Offset)
  1405. .add(predOps(Pred, PredReg))
  1406. .cloneMemRefs(*MI);
  1407. (void)MIB;
  1408. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1409. }
  1410. } else {
  1411. MachineOperand &MO = MI->getOperand(0);
  1412. // FIXME: post-indexed stores use am2offset_imm, which still encodes
  1413. // the vestigal zero-reg offset register. When that's fixed, this clause
  1414. // can be removed entirely.
  1415. if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
  1416. int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
  1417. // STR_PRE, STR_POST
  1418. auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
  1419. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  1420. .addReg(Base)
  1421. .addReg(0)
  1422. .addImm(Imm)
  1423. .add(predOps(Pred, PredReg))
  1424. .cloneMemRefs(*MI);
  1425. (void)MIB;
  1426. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1427. } else {
  1428. // t2STR_PRE, t2STR_POST
  1429. auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
  1430. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  1431. .addReg(Base)
  1432. .addImm(Offset)
  1433. .add(predOps(Pred, PredReg))
  1434. .cloneMemRefs(*MI);
  1435. (void)MIB;
  1436. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1437. }
  1438. }
  1439. MBB.erase(MBBI);
  1440. return true;
  1441. }
  1442. bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
  1443. unsigned Opcode = MI.getOpcode();
  1444. assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
  1445. "Must have t2STRDi8 or t2LDRDi8");
  1446. if (MI.getOperand(3).getImm() != 0)
  1447. return false;
  1448. LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
  1449. // Behaviour for writeback is undefined if base register is the same as one
  1450. // of the others.
  1451. const MachineOperand &BaseOp = MI.getOperand(2);
  1452. Register Base = BaseOp.getReg();
  1453. const MachineOperand &Reg0Op = MI.getOperand(0);
  1454. const MachineOperand &Reg1Op = MI.getOperand(1);
  1455. if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
  1456. return false;
  1457. Register PredReg;
  1458. ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
  1459. MachineBasicBlock::iterator MBBI(MI);
  1460. MachineBasicBlock &MBB = *MI.getParent();
  1461. int Offset;
  1462. MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
  1463. PredReg, Offset);
  1464. unsigned NewOpc;
  1465. if (Offset == 8 || Offset == -8) {
  1466. NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
  1467. } else {
  1468. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
  1469. if (Offset == 8 || Offset == -8) {
  1470. NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
  1471. } else
  1472. return false;
  1473. }
  1474. LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
  1475. MBB.erase(MergeInstr);
  1476. DebugLoc DL = MI.getDebugLoc();
  1477. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
  1478. if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
  1479. MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
  1480. } else {
  1481. assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
  1482. MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
  1483. }
  1484. MIB.addReg(BaseOp.getReg(), RegState::Kill)
  1485. .addImm(Offset).addImm(Pred).addReg(PredReg);
  1486. assert(TII->get(Opcode).getNumOperands() == 6 &&
  1487. TII->get(NewOpc).getNumOperands() == 7 &&
  1488. "Unexpected number of operands in Opcode specification.");
  1489. // Transfer implicit operands.
  1490. for (const MachineOperand &MO : MI.implicit_operands())
  1491. MIB.add(MO);
  1492. MIB.cloneMemRefs(MI);
  1493. LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
  1494. MBB.erase(MBBI);
  1495. return true;
  1496. }
  1497. /// Returns true if instruction is a memory operation that this pass is capable
  1498. /// of operating on.
  1499. static bool isMemoryOp(const MachineInstr &MI) {
  1500. unsigned Opcode = MI.getOpcode();
  1501. switch (Opcode) {
  1502. case ARM::VLDRS:
  1503. case ARM::VSTRS:
  1504. case ARM::VLDRD:
  1505. case ARM::VSTRD:
  1506. case ARM::LDRi12:
  1507. case ARM::STRi12:
  1508. case ARM::tLDRi:
  1509. case ARM::tSTRi:
  1510. case ARM::tLDRspi:
  1511. case ARM::tSTRspi:
  1512. case ARM::t2LDRi8:
  1513. case ARM::t2LDRi12:
  1514. case ARM::t2STRi8:
  1515. case ARM::t2STRi12:
  1516. break;
  1517. default:
  1518. return false;
  1519. }
  1520. if (!MI.getOperand(1).isReg())
  1521. return false;
  1522. // When no memory operands are present, conservatively assume unaligned,
  1523. // volatile, unfoldable.
  1524. if (!MI.hasOneMemOperand())
  1525. return false;
  1526. const MachineMemOperand &MMO = **MI.memoperands_begin();
  1527. // Don't touch volatile memory accesses - we may be changing their order.
  1528. // TODO: We could allow unordered and monotonic atomics here, but we need to
  1529. // make sure the resulting ldm/stm is correctly marked as atomic.
  1530. if (MMO.isVolatile() || MMO.isAtomic())
  1531. return false;
  1532. // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
  1533. // not.
  1534. if (MMO.getAlign() < Align(4))
  1535. return false;
  1536. // str <undef> could probably be eliminated entirely, but for now we just want
  1537. // to avoid making a mess of it.
  1538. // FIXME: Use str <undef> as a wildcard to enable better stm folding.
  1539. if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
  1540. return false;
  1541. // Likewise don't mess with references to undefined addresses.
  1542. if (MI.getOperand(1).isUndef())
  1543. return false;
  1544. return true;
  1545. }
  1546. static void InsertLDR_STR(MachineBasicBlock &MBB,
  1547. MachineBasicBlock::iterator &MBBI, int Offset,
  1548. bool isDef, unsigned NewOpc, unsigned Reg,
  1549. bool RegDeadKill, bool RegUndef, unsigned BaseReg,
  1550. bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
  1551. unsigned PredReg, const TargetInstrInfo *TII,
  1552. MachineInstr *MI) {
  1553. if (isDef) {
  1554. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1555. TII->get(NewOpc))
  1556. .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
  1557. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  1558. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1559. // FIXME: This is overly conservative; the new instruction accesses 4
  1560. // bytes, not 8.
  1561. MIB.cloneMemRefs(*MI);
  1562. } else {
  1563. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1564. TII->get(NewOpc))
  1565. .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
  1566. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  1567. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1568. // FIXME: This is overly conservative; the new instruction accesses 4
  1569. // bytes, not 8.
  1570. MIB.cloneMemRefs(*MI);
  1571. }
  1572. }
  1573. bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
  1574. MachineBasicBlock::iterator &MBBI) {
  1575. MachineInstr *MI = &*MBBI;
  1576. unsigned Opcode = MI->getOpcode();
  1577. // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
  1578. // if we see this opcode.
  1579. if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
  1580. return false;
  1581. const MachineOperand &BaseOp = MI->getOperand(2);
  1582. Register BaseReg = BaseOp.getReg();
  1583. Register EvenReg = MI->getOperand(0).getReg();
  1584. Register OddReg = MI->getOperand(1).getReg();
  1585. unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
  1586. unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
  1587. // ARM errata 602117: LDRD with base in list may result in incorrect base
  1588. // register when interrupted or faulted.
  1589. bool Errata602117 = EvenReg == BaseReg &&
  1590. (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
  1591. // ARM LDRD/STRD needs consecutive registers.
  1592. bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
  1593. (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
  1594. if (!Errata602117 && !NonConsecutiveRegs)
  1595. return false;
  1596. bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
  1597. bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
  1598. bool EvenDeadKill = isLd ?
  1599. MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
  1600. bool EvenUndef = MI->getOperand(0).isUndef();
  1601. bool OddDeadKill = isLd ?
  1602. MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
  1603. bool OddUndef = MI->getOperand(1).isUndef();
  1604. bool BaseKill = BaseOp.isKill();
  1605. bool BaseUndef = BaseOp.isUndef();
  1606. assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
  1607. "register offset not handled below");
  1608. int OffImm = getMemoryOpOffset(*MI);
  1609. Register PredReg;
  1610. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1611. if (OddRegNum > EvenRegNum && OffImm == 0) {
  1612. // Ascending register numbers and no offset. It's safe to change it to a
  1613. // ldm or stm.
  1614. unsigned NewOpc = (isLd)
  1615. ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
  1616. : (isT2 ? ARM::t2STMIA : ARM::STMIA);
  1617. if (isLd) {
  1618. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  1619. .addReg(BaseReg, getKillRegState(BaseKill))
  1620. .addImm(Pred).addReg(PredReg)
  1621. .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
  1622. .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
  1623. .cloneMemRefs(*MI);
  1624. ++NumLDRD2LDM;
  1625. } else {
  1626. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  1627. .addReg(BaseReg, getKillRegState(BaseKill))
  1628. .addImm(Pred).addReg(PredReg)
  1629. .addReg(EvenReg,
  1630. getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
  1631. .addReg(OddReg,
  1632. getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
  1633. .cloneMemRefs(*MI);
  1634. ++NumSTRD2STM;
  1635. }
  1636. } else {
  1637. // Split into two instructions.
  1638. unsigned NewOpc = (isLd)
  1639. ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
  1640. : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
  1641. // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
  1642. // so adjust and use t2LDRi12 here for that.
  1643. unsigned NewOpc2 = (isLd)
  1644. ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
  1645. : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
  1646. // If this is a load, make sure the first load does not clobber the base
  1647. // register before the second load reads it.
  1648. if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
  1649. assert(!TRI->regsOverlap(OddReg, BaseReg));
  1650. InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
  1651. false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
  1652. InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
  1653. false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
  1654. MI);
  1655. } else {
  1656. if (OddReg == EvenReg && EvenDeadKill) {
  1657. // If the two source operands are the same, the kill marker is
  1658. // probably on the first one. e.g.
  1659. // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
  1660. EvenDeadKill = false;
  1661. OddDeadKill = true;
  1662. }
  1663. // Never kill the base register in the first instruction.
  1664. if (EvenReg == BaseReg)
  1665. EvenDeadKill = false;
  1666. InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
  1667. EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
  1668. MI);
  1669. InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
  1670. OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
  1671. MI);
  1672. }
  1673. if (isLd)
  1674. ++NumLDRD2LDR;
  1675. else
  1676. ++NumSTRD2STR;
  1677. }
  1678. MBBI = MBB.erase(MBBI);
  1679. return true;
  1680. }
  1681. /// An optimization pass to turn multiple LDR / STR ops of the same base and
  1682. /// incrementing offset into LDM / STM ops.
  1683. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
  1684. MemOpQueue MemOps;
  1685. unsigned CurrBase = 0;
  1686. unsigned CurrOpc = ~0u;
  1687. ARMCC::CondCodes CurrPred = ARMCC::AL;
  1688. unsigned Position = 0;
  1689. assert(Candidates.size() == 0);
  1690. assert(MergeBaseCandidates.size() == 0);
  1691. LiveRegsValid = false;
  1692. for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
  1693. I = MBBI) {
  1694. // The instruction in front of the iterator is the one we look at.
  1695. MBBI = std::prev(I);
  1696. if (FixInvalidRegPairOp(MBB, MBBI))
  1697. continue;
  1698. ++Position;
  1699. if (isMemoryOp(*MBBI)) {
  1700. unsigned Opcode = MBBI->getOpcode();
  1701. const MachineOperand &MO = MBBI->getOperand(0);
  1702. Register Reg = MO.getReg();
  1703. Register Base = getLoadStoreBaseOp(*MBBI).getReg();
  1704. Register PredReg;
  1705. ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
  1706. int Offset = getMemoryOpOffset(*MBBI);
  1707. if (CurrBase == 0) {
  1708. // Start of a new chain.
  1709. CurrBase = Base;
  1710. CurrOpc = Opcode;
  1711. CurrPred = Pred;
  1712. MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
  1713. continue;
  1714. }
  1715. // Note: No need to match PredReg in the next if.
  1716. if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
  1717. // Watch out for:
  1718. // r4 := ldr [r0, #8]
  1719. // r4 := ldr [r0, #4]
  1720. // or
  1721. // r0 := ldr [r0]
  1722. // If a load overrides the base register or a register loaded by
  1723. // another load in our chain, we cannot take this instruction.
  1724. bool Overlap = false;
  1725. if (isLoadSingle(Opcode)) {
  1726. Overlap = (Base == Reg);
  1727. if (!Overlap) {
  1728. for (const MemOpQueueEntry &E : MemOps) {
  1729. if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
  1730. Overlap = true;
  1731. break;
  1732. }
  1733. }
  1734. }
  1735. }
  1736. if (!Overlap) {
  1737. // Check offset and sort memory operation into the current chain.
  1738. if (Offset > MemOps.back().Offset) {
  1739. MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
  1740. continue;
  1741. } else {
  1742. MemOpQueue::iterator MI, ME;
  1743. for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
  1744. if (Offset < MI->Offset) {
  1745. // Found a place to insert.
  1746. break;
  1747. }
  1748. if (Offset == MI->Offset) {
  1749. // Collision, abort.
  1750. MI = ME;
  1751. break;
  1752. }
  1753. }
  1754. if (MI != MemOps.end()) {
  1755. MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
  1756. continue;
  1757. }
  1758. }
  1759. }
  1760. }
  1761. // Don't advance the iterator; The op will start a new chain next.
  1762. MBBI = I;
  1763. --Position;
  1764. // Fallthrough to look into existing chain.
  1765. } else if (MBBI->isDebugInstr()) {
  1766. continue;
  1767. } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
  1768. MBBI->getOpcode() == ARM::t2STRDi8) {
  1769. // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
  1770. // remember them because we may still be able to merge add/sub into them.
  1771. MergeBaseCandidates.push_back(&*MBBI);
  1772. }
  1773. // If we are here then the chain is broken; Extract candidates for a merge.
  1774. if (MemOps.size() > 0) {
  1775. FormCandidates(MemOps);
  1776. // Reset for the next chain.
  1777. CurrBase = 0;
  1778. CurrOpc = ~0u;
  1779. CurrPred = ARMCC::AL;
  1780. MemOps.clear();
  1781. }
  1782. }
  1783. if (MemOps.size() > 0)
  1784. FormCandidates(MemOps);
  1785. // Sort candidates so they get processed from end to begin of the basic
  1786. // block later; This is necessary for liveness calculation.
  1787. auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
  1788. return M0->InsertPos < M1->InsertPos;
  1789. };
  1790. llvm::sort(Candidates, LessThan);
  1791. // Go through list of candidates and merge.
  1792. bool Changed = false;
  1793. for (const MergeCandidate *Candidate : Candidates) {
  1794. if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
  1795. MachineInstr *Merged = MergeOpsUpdate(*Candidate);
  1796. // Merge preceding/trailing base inc/dec into the merged op.
  1797. if (Merged) {
  1798. Changed = true;
  1799. unsigned Opcode = Merged->getOpcode();
  1800. if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
  1801. MergeBaseUpdateLSDouble(*Merged);
  1802. else
  1803. MergeBaseUpdateLSMultiple(Merged);
  1804. } else {
  1805. for (MachineInstr *MI : Candidate->Instrs) {
  1806. if (MergeBaseUpdateLoadStore(MI))
  1807. Changed = true;
  1808. }
  1809. }
  1810. } else {
  1811. assert(Candidate->Instrs.size() == 1);
  1812. if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
  1813. Changed = true;
  1814. }
  1815. }
  1816. Candidates.clear();
  1817. // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
  1818. for (MachineInstr *MI : MergeBaseCandidates)
  1819. MergeBaseUpdateLSDouble(*MI);
  1820. MergeBaseCandidates.clear();
  1821. return Changed;
  1822. }
  1823. /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
  1824. /// into the preceding stack restore so it directly restore the value of LR
  1825. /// into pc.
  1826. /// ldmfd sp!, {..., lr}
  1827. /// bx lr
  1828. /// or
  1829. /// ldmfd sp!, {..., lr}
  1830. /// mov pc, lr
  1831. /// =>
  1832. /// ldmfd sp!, {..., pc}
  1833. bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
  1834. // Thumb1 LDM doesn't allow high registers.
  1835. if (isThumb1) return false;
  1836. if (MBB.empty()) return false;
  1837. MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  1838. if (MBBI != MBB.begin() && MBBI != MBB.end() &&
  1839. (MBBI->getOpcode() == ARM::BX_RET ||
  1840. MBBI->getOpcode() == ARM::tBX_RET ||
  1841. MBBI->getOpcode() == ARM::MOVPCLR)) {
  1842. MachineBasicBlock::iterator PrevI = std::prev(MBBI);
  1843. // Ignore any debug instructions.
  1844. while (PrevI->isDebugInstr() && PrevI != MBB.begin())
  1845. --PrevI;
  1846. MachineInstr &PrevMI = *PrevI;
  1847. unsigned Opcode = PrevMI.getOpcode();
  1848. if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
  1849. Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
  1850. Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
  1851. MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
  1852. if (MO.getReg() != ARM::LR)
  1853. return false;
  1854. unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
  1855. assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
  1856. Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
  1857. PrevMI.setDesc(TII->get(NewOpc));
  1858. MO.setReg(ARM::PC);
  1859. PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
  1860. MBB.erase(MBBI);
  1861. // We now restore LR into PC so it is not live-out of the return block
  1862. // anymore: Clear the CSI Restored bit.
  1863. MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
  1864. // CSI should be fixed after PrologEpilog Insertion
  1865. assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid");
  1866. for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
  1867. if (Info.getReg() == ARM::LR) {
  1868. Info.setRestored(false);
  1869. break;
  1870. }
  1871. }
  1872. return true;
  1873. }
  1874. }
  1875. return false;
  1876. }
  1877. bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
  1878. MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
  1879. if (MBBI == MBB.begin() || MBBI == MBB.end() ||
  1880. MBBI->getOpcode() != ARM::tBX_RET)
  1881. return false;
  1882. MachineBasicBlock::iterator Prev = MBBI;
  1883. --Prev;
  1884. if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
  1885. return false;
  1886. for (auto Use : Prev->uses())
  1887. if (Use.isKill()) {
  1888. assert(STI->hasV4TOps());
  1889. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
  1890. .addReg(Use.getReg(), RegState::Kill)
  1891. .add(predOps(ARMCC::AL))
  1892. .copyImplicitOps(*MBBI);
  1893. MBB.erase(MBBI);
  1894. MBB.erase(Prev);
  1895. return true;
  1896. }
  1897. llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
  1898. }
  1899. bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1900. if (skipFunction(Fn.getFunction()))
  1901. return false;
  1902. MF = &Fn;
  1903. STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
  1904. TL = STI->getTargetLowering();
  1905. AFI = Fn.getInfo<ARMFunctionInfo>();
  1906. TII = STI->getInstrInfo();
  1907. TRI = STI->getRegisterInfo();
  1908. RegClassInfoValid = false;
  1909. isThumb2 = AFI->isThumb2Function();
  1910. isThumb1 = AFI->isThumbFunction() && !isThumb2;
  1911. bool Modified = false;
  1912. for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
  1913. ++MFI) {
  1914. MachineBasicBlock &MBB = *MFI;
  1915. Modified |= LoadStoreMultipleOpti(MBB);
  1916. if (STI->hasV5TOps())
  1917. Modified |= MergeReturnIntoLDM(MBB);
  1918. if (isThumb1)
  1919. Modified |= CombineMovBx(MBB);
  1920. }
  1921. Allocator.DestroyAll();
  1922. return Modified;
  1923. }
  1924. #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
  1925. "ARM pre- register allocation load / store optimization pass"
  1926. namespace {
  1927. /// Pre- register allocation pass that move load / stores from consecutive
  1928. /// locations close to make it more likely they will be combined later.
  1929. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
  1930. static char ID;
  1931. AliasAnalysis *AA;
  1932. const DataLayout *TD;
  1933. const TargetInstrInfo *TII;
  1934. const TargetRegisterInfo *TRI;
  1935. const ARMSubtarget *STI;
  1936. MachineRegisterInfo *MRI;
  1937. MachineDominatorTree *DT;
  1938. MachineFunction *MF;
  1939. ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
  1940. bool runOnMachineFunction(MachineFunction &Fn) override;
  1941. StringRef getPassName() const override {
  1942. return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
  1943. }
  1944. void getAnalysisUsage(AnalysisUsage &AU) const override {
  1945. AU.addRequired<AAResultsWrapperPass>();
  1946. AU.addRequired<MachineDominatorTree>();
  1947. AU.addPreserved<MachineDominatorTree>();
  1948. MachineFunctionPass::getAnalysisUsage(AU);
  1949. }
  1950. private:
  1951. bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
  1952. unsigned &NewOpc, Register &EvenReg, Register &OddReg,
  1953. Register &BaseReg, int &Offset, Register &PredReg,
  1954. ARMCC::CondCodes &Pred, bool &isT2);
  1955. bool RescheduleOps(MachineBasicBlock *MBB,
  1956. SmallVectorImpl<MachineInstr *> &Ops,
  1957. unsigned Base, bool isLd,
  1958. DenseMap<MachineInstr*, unsigned> &MI2LocMap);
  1959. bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
  1960. bool DistributeIncrements();
  1961. bool DistributeIncrements(Register Base);
  1962. };
  1963. } // end anonymous namespace
  1964. char ARMPreAllocLoadStoreOpt::ID = 0;
  1965. INITIALIZE_PASS_BEGIN(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
  1966. ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
  1967. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  1968. INITIALIZE_PASS_END(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
  1969. ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
  1970. // Limit the number of instructions to be rescheduled.
  1971. // FIXME: tune this limit, and/or come up with some better heuristics.
  1972. static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
  1973. cl::init(8), cl::Hidden);
  1974. bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1975. if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
  1976. return false;
  1977. TD = &Fn.getDataLayout();
  1978. STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
  1979. TII = STI->getInstrInfo();
  1980. TRI = STI->getRegisterInfo();
  1981. MRI = &Fn.getRegInfo();
  1982. DT = &getAnalysis<MachineDominatorTree>();
  1983. MF = &Fn;
  1984. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  1985. bool Modified = DistributeIncrements();
  1986. for (MachineBasicBlock &MFI : Fn)
  1987. Modified |= RescheduleLoadStoreInstrs(&MFI);
  1988. return Modified;
  1989. }
  1990. static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
  1991. MachineBasicBlock::iterator I,
  1992. MachineBasicBlock::iterator E,
  1993. SmallPtrSetImpl<MachineInstr*> &MemOps,
  1994. SmallSet<unsigned, 4> &MemRegs,
  1995. const TargetRegisterInfo *TRI,
  1996. AliasAnalysis *AA) {
  1997. // Are there stores / loads / calls between them?
  1998. SmallSet<unsigned, 4> AddedRegPressure;
  1999. while (++I != E) {
  2000. if (I->isDebugInstr() || MemOps.count(&*I))
  2001. continue;
  2002. if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
  2003. return false;
  2004. if (I->mayStore() || (!isLd && I->mayLoad()))
  2005. for (MachineInstr *MemOp : MemOps)
  2006. if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
  2007. return false;
  2008. for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
  2009. MachineOperand &MO = I->getOperand(j);
  2010. if (!MO.isReg())
  2011. continue;
  2012. Register Reg = MO.getReg();
  2013. if (MO.isDef() && TRI->regsOverlap(Reg, Base))
  2014. return false;
  2015. if (Reg != Base && !MemRegs.count(Reg))
  2016. AddedRegPressure.insert(Reg);
  2017. }
  2018. }
  2019. // Estimate register pressure increase due to the transformation.
  2020. if (MemRegs.size() <= 4)
  2021. // Ok if we are moving small number of instructions.
  2022. return true;
  2023. return AddedRegPressure.size() <= MemRegs.size() * 2;
  2024. }
  2025. bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
  2026. MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc,
  2027. Register &FirstReg, Register &SecondReg, Register &BaseReg, int &Offset,
  2028. Register &PredReg, ARMCC::CondCodes &Pred, bool &isT2) {
  2029. // Make sure we're allowed to generate LDRD/STRD.
  2030. if (!STI->hasV5TEOps())
  2031. return false;
  2032. // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
  2033. unsigned Scale = 1;
  2034. unsigned Opcode = Op0->getOpcode();
  2035. if (Opcode == ARM::LDRi12) {
  2036. NewOpc = ARM::LDRD;
  2037. } else if (Opcode == ARM::STRi12) {
  2038. NewOpc = ARM::STRD;
  2039. } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
  2040. NewOpc = ARM::t2LDRDi8;
  2041. Scale = 4;
  2042. isT2 = true;
  2043. } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
  2044. NewOpc = ARM::t2STRDi8;
  2045. Scale = 4;
  2046. isT2 = true;
  2047. } else {
  2048. return false;
  2049. }
  2050. // Make sure the base address satisfies i64 ld / st alignment requirement.
  2051. // At the moment, we ignore the memoryoperand's value.
  2052. // If we want to use AliasAnalysis, we should check it accordingly.
  2053. if (!Op0->hasOneMemOperand() ||
  2054. (*Op0->memoperands_begin())->isVolatile() ||
  2055. (*Op0->memoperands_begin())->isAtomic())
  2056. return false;
  2057. Align Alignment = (*Op0->memoperands_begin())->getAlign();
  2058. const Function &Func = MF->getFunction();
  2059. Align ReqAlign =
  2060. STI->hasV6Ops() ? TD->getABITypeAlign(Type::getInt64Ty(Func.getContext()))
  2061. : Align(8); // Pre-v6 need 8-byte align
  2062. if (Alignment < ReqAlign)
  2063. return false;
  2064. // Then make sure the immediate offset fits.
  2065. int OffImm = getMemoryOpOffset(*Op0);
  2066. if (isT2) {
  2067. int Limit = (1 << 8) * Scale;
  2068. if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
  2069. return false;
  2070. Offset = OffImm;
  2071. } else {
  2072. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  2073. if (OffImm < 0) {
  2074. AddSub = ARM_AM::sub;
  2075. OffImm = - OffImm;
  2076. }
  2077. int Limit = (1 << 8) * Scale;
  2078. if (OffImm >= Limit || (OffImm & (Scale-1)))
  2079. return false;
  2080. Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
  2081. }
  2082. FirstReg = Op0->getOperand(0).getReg();
  2083. SecondReg = Op1->getOperand(0).getReg();
  2084. if (FirstReg == SecondReg)
  2085. return false;
  2086. BaseReg = Op0->getOperand(1).getReg();
  2087. Pred = getInstrPredicate(*Op0, PredReg);
  2088. dl = Op0->getDebugLoc();
  2089. return true;
  2090. }
  2091. bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
  2092. SmallVectorImpl<MachineInstr *> &Ops,
  2093. unsigned Base, bool isLd,
  2094. DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
  2095. bool RetVal = false;
  2096. // Sort by offset (in reverse order).
  2097. llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
  2098. int LOffset = getMemoryOpOffset(*LHS);
  2099. int ROffset = getMemoryOpOffset(*RHS);
  2100. assert(LHS == RHS || LOffset != ROffset);
  2101. return LOffset > ROffset;
  2102. });
  2103. // The loads / stores of the same base are in order. Scan them from first to
  2104. // last and check for the following:
  2105. // 1. Any def of base.
  2106. // 2. Any gaps.
  2107. while (Ops.size() > 1) {
  2108. unsigned FirstLoc = ~0U;
  2109. unsigned LastLoc = 0;
  2110. MachineInstr *FirstOp = nullptr;
  2111. MachineInstr *LastOp = nullptr;
  2112. int LastOffset = 0;
  2113. unsigned LastOpcode = 0;
  2114. unsigned LastBytes = 0;
  2115. unsigned NumMove = 0;
  2116. for (int i = Ops.size() - 1; i >= 0; --i) {
  2117. // Make sure each operation has the same kind.
  2118. MachineInstr *Op = Ops[i];
  2119. unsigned LSMOpcode
  2120. = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
  2121. if (LastOpcode && LSMOpcode != LastOpcode)
  2122. break;
  2123. // Check that we have a continuous set of offsets.
  2124. int Offset = getMemoryOpOffset(*Op);
  2125. unsigned Bytes = getLSMultipleTransferSize(Op);
  2126. if (LastBytes) {
  2127. if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
  2128. break;
  2129. }
  2130. // Don't try to reschedule too many instructions.
  2131. if (NumMove == InstReorderLimit)
  2132. break;
  2133. // Found a mergable instruction; save information about it.
  2134. ++NumMove;
  2135. LastOffset = Offset;
  2136. LastBytes = Bytes;
  2137. LastOpcode = LSMOpcode;
  2138. unsigned Loc = MI2LocMap[Op];
  2139. if (Loc <= FirstLoc) {
  2140. FirstLoc = Loc;
  2141. FirstOp = Op;
  2142. }
  2143. if (Loc >= LastLoc) {
  2144. LastLoc = Loc;
  2145. LastOp = Op;
  2146. }
  2147. }
  2148. if (NumMove <= 1)
  2149. Ops.pop_back();
  2150. else {
  2151. SmallPtrSet<MachineInstr*, 4> MemOps;
  2152. SmallSet<unsigned, 4> MemRegs;
  2153. for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
  2154. MemOps.insert(Ops[i]);
  2155. MemRegs.insert(Ops[i]->getOperand(0).getReg());
  2156. }
  2157. // Be conservative, if the instructions are too far apart, don't
  2158. // move them. We want to limit the increase of register pressure.
  2159. bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
  2160. if (DoMove)
  2161. DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
  2162. MemOps, MemRegs, TRI, AA);
  2163. if (!DoMove) {
  2164. for (unsigned i = 0; i != NumMove; ++i)
  2165. Ops.pop_back();
  2166. } else {
  2167. // This is the new location for the loads / stores.
  2168. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
  2169. while (InsertPos != MBB->end() &&
  2170. (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
  2171. ++InsertPos;
  2172. // If we are moving a pair of loads / stores, see if it makes sense
  2173. // to try to allocate a pair of registers that can form register pairs.
  2174. MachineInstr *Op0 = Ops.back();
  2175. MachineInstr *Op1 = Ops[Ops.size()-2];
  2176. Register FirstReg, SecondReg;
  2177. Register BaseReg, PredReg;
  2178. ARMCC::CondCodes Pred = ARMCC::AL;
  2179. bool isT2 = false;
  2180. unsigned NewOpc = 0;
  2181. int Offset = 0;
  2182. DebugLoc dl;
  2183. if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
  2184. FirstReg, SecondReg, BaseReg,
  2185. Offset, PredReg, Pred, isT2)) {
  2186. Ops.pop_back();
  2187. Ops.pop_back();
  2188. const MCInstrDesc &MCID = TII->get(NewOpc);
  2189. const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
  2190. MRI->constrainRegClass(FirstReg, TRC);
  2191. MRI->constrainRegClass(SecondReg, TRC);
  2192. // Form the pair instruction.
  2193. if (isLd) {
  2194. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
  2195. .addReg(FirstReg, RegState::Define)
  2196. .addReg(SecondReg, RegState::Define)
  2197. .addReg(BaseReg);
  2198. // FIXME: We're converting from LDRi12 to an insn that still
  2199. // uses addrmode2, so we need an explicit offset reg. It should
  2200. // always by reg0 since we're transforming LDRi12s.
  2201. if (!isT2)
  2202. MIB.addReg(0);
  2203. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  2204. MIB.cloneMergedMemRefs({Op0, Op1});
  2205. LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
  2206. ++NumLDRDFormed;
  2207. } else {
  2208. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
  2209. .addReg(FirstReg)
  2210. .addReg(SecondReg)
  2211. .addReg(BaseReg);
  2212. // FIXME: We're converting from LDRi12 to an insn that still
  2213. // uses addrmode2, so we need an explicit offset reg. It should
  2214. // always by reg0 since we're transforming STRi12s.
  2215. if (!isT2)
  2216. MIB.addReg(0);
  2217. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  2218. MIB.cloneMergedMemRefs({Op0, Op1});
  2219. LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
  2220. ++NumSTRDFormed;
  2221. }
  2222. MBB->erase(Op0);
  2223. MBB->erase(Op1);
  2224. if (!isT2) {
  2225. // Add register allocation hints to form register pairs.
  2226. MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
  2227. MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
  2228. }
  2229. } else {
  2230. for (unsigned i = 0; i != NumMove; ++i) {
  2231. MachineInstr *Op = Ops.back();
  2232. Ops.pop_back();
  2233. MBB->splice(InsertPos, MBB, Op);
  2234. }
  2235. }
  2236. NumLdStMoved += NumMove;
  2237. RetVal = true;
  2238. }
  2239. }
  2240. }
  2241. return RetVal;
  2242. }
  2243. bool
  2244. ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
  2245. bool RetVal = false;
  2246. DenseMap<MachineInstr*, unsigned> MI2LocMap;
  2247. using MapIt = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator;
  2248. using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
  2249. using BaseVec = SmallVector<unsigned, 4>;
  2250. Base2InstMap Base2LdsMap;
  2251. Base2InstMap Base2StsMap;
  2252. BaseVec LdBases;
  2253. BaseVec StBases;
  2254. unsigned Loc = 0;
  2255. MachineBasicBlock::iterator MBBI = MBB->begin();
  2256. MachineBasicBlock::iterator E = MBB->end();
  2257. while (MBBI != E) {
  2258. for (; MBBI != E; ++MBBI) {
  2259. MachineInstr &MI = *MBBI;
  2260. if (MI.isCall() || MI.isTerminator()) {
  2261. // Stop at barriers.
  2262. ++MBBI;
  2263. break;
  2264. }
  2265. if (!MI.isDebugInstr())
  2266. MI2LocMap[&MI] = ++Loc;
  2267. if (!isMemoryOp(MI))
  2268. continue;
  2269. Register PredReg;
  2270. if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
  2271. continue;
  2272. int Opc = MI.getOpcode();
  2273. bool isLd = isLoadSingle(Opc);
  2274. Register Base = MI.getOperand(1).getReg();
  2275. int Offset = getMemoryOpOffset(MI);
  2276. bool StopHere = false;
  2277. auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) {
  2278. MapIt BI = Base2Ops.find(Base);
  2279. if (BI == Base2Ops.end()) {
  2280. Base2Ops[Base].push_back(&MI);
  2281. Bases.push_back(Base);
  2282. return;
  2283. }
  2284. for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
  2285. if (Offset == getMemoryOpOffset(*BI->second[i])) {
  2286. StopHere = true;
  2287. break;
  2288. }
  2289. }
  2290. if (!StopHere)
  2291. BI->second.push_back(&MI);
  2292. };
  2293. if (isLd)
  2294. FindBases(Base2LdsMap, LdBases);
  2295. else
  2296. FindBases(Base2StsMap, StBases);
  2297. if (StopHere) {
  2298. // Found a duplicate (a base+offset combination that's seen earlier).
  2299. // Backtrack.
  2300. --Loc;
  2301. break;
  2302. }
  2303. }
  2304. // Re-schedule loads.
  2305. for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
  2306. unsigned Base = LdBases[i];
  2307. SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
  2308. if (Lds.size() > 1)
  2309. RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
  2310. }
  2311. // Re-schedule stores.
  2312. for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
  2313. unsigned Base = StBases[i];
  2314. SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
  2315. if (Sts.size() > 1)
  2316. RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
  2317. }
  2318. if (MBBI != E) {
  2319. Base2LdsMap.clear();
  2320. Base2StsMap.clear();
  2321. LdBases.clear();
  2322. StBases.clear();
  2323. }
  2324. }
  2325. return RetVal;
  2326. }
  2327. // Get the Base register operand index from the memory access MachineInst if we
  2328. // should attempt to distribute postinc on it. Return -1 if not of a valid
  2329. // instruction type. If it returns an index, it is assumed that instruction is a
  2330. // r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
  2331. static int getBaseOperandIndex(MachineInstr &MI) {
  2332. switch (MI.getOpcode()) {
  2333. case ARM::MVE_VLDRBS16:
  2334. case ARM::MVE_VLDRBS32:
  2335. case ARM::MVE_VLDRBU16:
  2336. case ARM::MVE_VLDRBU32:
  2337. case ARM::MVE_VLDRHS32:
  2338. case ARM::MVE_VLDRHU32:
  2339. case ARM::MVE_VLDRBU8:
  2340. case ARM::MVE_VLDRHU16:
  2341. case ARM::MVE_VLDRWU32:
  2342. case ARM::MVE_VSTRB16:
  2343. case ARM::MVE_VSTRB32:
  2344. case ARM::MVE_VSTRH32:
  2345. case ARM::MVE_VSTRBU8:
  2346. case ARM::MVE_VSTRHU16:
  2347. case ARM::MVE_VSTRWU32:
  2348. case ARM::t2LDRHi8:
  2349. case ARM::t2LDRHi12:
  2350. case ARM::t2LDRSHi8:
  2351. case ARM::t2LDRSHi12:
  2352. case ARM::t2LDRBi8:
  2353. case ARM::t2LDRBi12:
  2354. case ARM::t2LDRSBi8:
  2355. case ARM::t2LDRSBi12:
  2356. case ARM::t2STRBi8:
  2357. case ARM::t2STRBi12:
  2358. case ARM::t2STRHi8:
  2359. case ARM::t2STRHi12:
  2360. return 1;
  2361. case ARM::MVE_VLDRBS16_post:
  2362. case ARM::MVE_VLDRBS32_post:
  2363. case ARM::MVE_VLDRBU16_post:
  2364. case ARM::MVE_VLDRBU32_post:
  2365. case ARM::MVE_VLDRHS32_post:
  2366. case ARM::MVE_VLDRHU32_post:
  2367. case ARM::MVE_VLDRBU8_post:
  2368. case ARM::MVE_VLDRHU16_post:
  2369. case ARM::MVE_VLDRWU32_post:
  2370. case ARM::MVE_VSTRB16_post:
  2371. case ARM::MVE_VSTRB32_post:
  2372. case ARM::MVE_VSTRH32_post:
  2373. case ARM::MVE_VSTRBU8_post:
  2374. case ARM::MVE_VSTRHU16_post:
  2375. case ARM::MVE_VSTRWU32_post:
  2376. case ARM::MVE_VLDRBS16_pre:
  2377. case ARM::MVE_VLDRBS32_pre:
  2378. case ARM::MVE_VLDRBU16_pre:
  2379. case ARM::MVE_VLDRBU32_pre:
  2380. case ARM::MVE_VLDRHS32_pre:
  2381. case ARM::MVE_VLDRHU32_pre:
  2382. case ARM::MVE_VLDRBU8_pre:
  2383. case ARM::MVE_VLDRHU16_pre:
  2384. case ARM::MVE_VLDRWU32_pre:
  2385. case ARM::MVE_VSTRB16_pre:
  2386. case ARM::MVE_VSTRB32_pre:
  2387. case ARM::MVE_VSTRH32_pre:
  2388. case ARM::MVE_VSTRBU8_pre:
  2389. case ARM::MVE_VSTRHU16_pre:
  2390. case ARM::MVE_VSTRWU32_pre:
  2391. return 2;
  2392. }
  2393. return -1;
  2394. }
  2395. static bool isPostIndex(MachineInstr &MI) {
  2396. switch (MI.getOpcode()) {
  2397. case ARM::MVE_VLDRBS16_post:
  2398. case ARM::MVE_VLDRBS32_post:
  2399. case ARM::MVE_VLDRBU16_post:
  2400. case ARM::MVE_VLDRBU32_post:
  2401. case ARM::MVE_VLDRHS32_post:
  2402. case ARM::MVE_VLDRHU32_post:
  2403. case ARM::MVE_VLDRBU8_post:
  2404. case ARM::MVE_VLDRHU16_post:
  2405. case ARM::MVE_VLDRWU32_post:
  2406. case ARM::MVE_VSTRB16_post:
  2407. case ARM::MVE_VSTRB32_post:
  2408. case ARM::MVE_VSTRH32_post:
  2409. case ARM::MVE_VSTRBU8_post:
  2410. case ARM::MVE_VSTRHU16_post:
  2411. case ARM::MVE_VSTRWU32_post:
  2412. return true;
  2413. }
  2414. return false;
  2415. }
  2416. static bool isPreIndex(MachineInstr &MI) {
  2417. switch (MI.getOpcode()) {
  2418. case ARM::MVE_VLDRBS16_pre:
  2419. case ARM::MVE_VLDRBS32_pre:
  2420. case ARM::MVE_VLDRBU16_pre:
  2421. case ARM::MVE_VLDRBU32_pre:
  2422. case ARM::MVE_VLDRHS32_pre:
  2423. case ARM::MVE_VLDRHU32_pre:
  2424. case ARM::MVE_VLDRBU8_pre:
  2425. case ARM::MVE_VLDRHU16_pre:
  2426. case ARM::MVE_VLDRWU32_pre:
  2427. case ARM::MVE_VSTRB16_pre:
  2428. case ARM::MVE_VSTRB32_pre:
  2429. case ARM::MVE_VSTRH32_pre:
  2430. case ARM::MVE_VSTRBU8_pre:
  2431. case ARM::MVE_VSTRHU16_pre:
  2432. case ARM::MVE_VSTRWU32_pre:
  2433. return true;
  2434. }
  2435. return false;
  2436. }
  2437. // Given a memory access Opcode, check that the give Imm would be a valid Offset
  2438. // for this instruction (same as isLegalAddressImm), Or if the instruction
  2439. // could be easily converted to one where that was valid. For example converting
  2440. // t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
  2441. // AdjustBaseAndOffset below.
  2442. static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
  2443. const TargetInstrInfo *TII,
  2444. int &CodesizeEstimate) {
  2445. if (isLegalAddressImm(Opcode, Imm, TII))
  2446. return true;
  2447. // We can convert AddrModeT2_i12 to AddrModeT2_i8.
  2448. const MCInstrDesc &Desc = TII->get(Opcode);
  2449. unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
  2450. switch (AddrMode) {
  2451. case ARMII::AddrModeT2_i12:
  2452. CodesizeEstimate += 1;
  2453. return std::abs(Imm) < (((1 << 8) * 1) - 1);
  2454. }
  2455. return false;
  2456. }
  2457. // Given an MI adjust its address BaseReg to use NewBaseReg and address offset
  2458. // by -Offset. This can either happen in-place or be a replacement as MI is
  2459. // converted to another instruction type.
  2460. static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg,
  2461. int Offset, const TargetInstrInfo *TII) {
  2462. unsigned BaseOp = getBaseOperandIndex(*MI);
  2463. MI->getOperand(BaseOp).setReg(NewBaseReg);
  2464. int OldOffset = MI->getOperand(BaseOp + 1).getImm();
  2465. if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
  2466. MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
  2467. else {
  2468. unsigned ConvOpcode;
  2469. switch (MI->getOpcode()) {
  2470. case ARM::t2LDRHi12:
  2471. ConvOpcode = ARM::t2LDRHi8;
  2472. break;
  2473. case ARM::t2LDRSHi12:
  2474. ConvOpcode = ARM::t2LDRSHi8;
  2475. break;
  2476. case ARM::t2LDRBi12:
  2477. ConvOpcode = ARM::t2LDRBi8;
  2478. break;
  2479. case ARM::t2LDRSBi12:
  2480. ConvOpcode = ARM::t2LDRSBi8;
  2481. break;
  2482. case ARM::t2STRHi12:
  2483. ConvOpcode = ARM::t2STRHi8;
  2484. break;
  2485. case ARM::t2STRBi12:
  2486. ConvOpcode = ARM::t2STRBi8;
  2487. break;
  2488. default:
  2489. llvm_unreachable("Unhandled convertable opcode");
  2490. }
  2491. assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
  2492. "Illegal Address Immediate after convert!");
  2493. const MCInstrDesc &MCID = TII->get(ConvOpcode);
  2494. BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
  2495. .add(MI->getOperand(0))
  2496. .add(MI->getOperand(1))
  2497. .addImm(OldOffset - Offset)
  2498. .add(MI->getOperand(3))
  2499. .add(MI->getOperand(4))
  2500. .cloneMemRefs(*MI);
  2501. MI->eraseFromParent();
  2502. }
  2503. }
  2504. static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
  2505. Register NewReg,
  2506. const TargetInstrInfo *TII,
  2507. const TargetRegisterInfo *TRI) {
  2508. MachineFunction *MF = MI->getMF();
  2509. MachineRegisterInfo &MRI = MF->getRegInfo();
  2510. unsigned NewOpcode = getPostIndexedLoadStoreOpcode(
  2511. MI->getOpcode(), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
  2512. const MCInstrDesc &MCID = TII->get(NewOpcode);
  2513. // Constrain the def register class
  2514. const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
  2515. MRI.constrainRegClass(NewReg, TRC);
  2516. // And do the same for the base operand
  2517. TRC = TII->getRegClass(MCID, 2, TRI, *MF);
  2518. MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
  2519. unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
  2520. switch (AddrMode) {
  2521. case ARMII::AddrModeT2_i7:
  2522. case ARMII::AddrModeT2_i7s2:
  2523. case ARMII::AddrModeT2_i7s4:
  2524. // Any MVE load/store
  2525. return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
  2526. .addReg(NewReg, RegState::Define)
  2527. .add(MI->getOperand(0))
  2528. .add(MI->getOperand(1))
  2529. .addImm(Offset)
  2530. .add(MI->getOperand(3))
  2531. .add(MI->getOperand(4))
  2532. .cloneMemRefs(*MI);
  2533. case ARMII::AddrModeT2_i8:
  2534. if (MI->mayLoad()) {
  2535. return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
  2536. .add(MI->getOperand(0))
  2537. .addReg(NewReg, RegState::Define)
  2538. .add(MI->getOperand(1))
  2539. .addImm(Offset)
  2540. .add(MI->getOperand(3))
  2541. .add(MI->getOperand(4))
  2542. .cloneMemRefs(*MI);
  2543. } else {
  2544. return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
  2545. .addReg(NewReg, RegState::Define)
  2546. .add(MI->getOperand(0))
  2547. .add(MI->getOperand(1))
  2548. .addImm(Offset)
  2549. .add(MI->getOperand(3))
  2550. .add(MI->getOperand(4))
  2551. .cloneMemRefs(*MI);
  2552. }
  2553. default:
  2554. llvm_unreachable("Unhandled createPostIncLoadStore");
  2555. }
  2556. }
  2557. // Given a Base Register, optimise the load/store uses to attempt to create more
  2558. // post-inc accesses and less register moves. We do this by taking zero offset
  2559. // loads/stores with an add, and convert them to a postinc load/store of the
  2560. // same type. Any subsequent accesses will be adjusted to use and account for
  2561. // the post-inc value.
  2562. // For example:
  2563. // LDR #0 LDR_POSTINC #16
  2564. // LDR #4 LDR #-12
  2565. // LDR #8 LDR #-8
  2566. // LDR #12 LDR #-4
  2567. // ADD #16
  2568. //
  2569. // At the same time if we do not find an increment but do find an existing
  2570. // pre/post inc instruction, we can still adjust the offsets of subsequent
  2571. // instructions to save the register move that would otherwise be needed for the
  2572. // in-place increment.
  2573. bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
  2574. // We are looking for:
  2575. // One zero offset load/store that can become postinc
  2576. MachineInstr *BaseAccess = nullptr;
  2577. MachineInstr *PrePostInc = nullptr;
  2578. // An increment that can be folded in
  2579. MachineInstr *Increment = nullptr;
  2580. // Other accesses after BaseAccess that will need to be updated to use the
  2581. // postinc value.
  2582. SmallPtrSet<MachineInstr *, 8> OtherAccesses;
  2583. for (auto &Use : MRI->use_nodbg_instructions(Base)) {
  2584. if (!Increment && getAddSubImmediate(Use) != 0) {
  2585. Increment = &Use;
  2586. continue;
  2587. }
  2588. int BaseOp = getBaseOperandIndex(Use);
  2589. if (BaseOp == -1)
  2590. return false;
  2591. if (!Use.getOperand(BaseOp).isReg() ||
  2592. Use.getOperand(BaseOp).getReg() != Base)
  2593. return false;
  2594. if (isPreIndex(Use) || isPostIndex(Use))
  2595. PrePostInc = &Use;
  2596. else if (Use.getOperand(BaseOp + 1).getImm() == 0)
  2597. BaseAccess = &Use;
  2598. else
  2599. OtherAccesses.insert(&Use);
  2600. }
  2601. int IncrementOffset;
  2602. Register NewBaseReg;
  2603. if (BaseAccess && Increment) {
  2604. if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
  2605. return false;
  2606. Register PredReg;
  2607. if (Increment->definesRegister(ARM::CPSR) ||
  2608. getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
  2609. return false;
  2610. LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
  2611. << Base.virtRegIndex() << "\n");
  2612. // Make sure that Increment has no uses before BaseAccess.
  2613. for (MachineInstr &Use :
  2614. MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
  2615. if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) {
  2616. LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
  2617. return false;
  2618. }
  2619. }
  2620. // Make sure that Increment can be folded into Base
  2621. IncrementOffset = getAddSubImmediate(*Increment);
  2622. unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
  2623. BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
  2624. if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
  2625. LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
  2626. return false;
  2627. }
  2628. }
  2629. else if (PrePostInc) {
  2630. // If we already have a pre/post index load/store then set BaseAccess,
  2631. // IncrementOffset and NewBaseReg to the values it already produces,
  2632. // allowing us to update and subsequent uses of BaseOp reg with the
  2633. // incremented value.
  2634. if (Increment)
  2635. return false;
  2636. LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
  2637. << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
  2638. int BaseOp = getBaseOperandIndex(*PrePostInc);
  2639. IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
  2640. BaseAccess = PrePostInc;
  2641. NewBaseReg = PrePostInc->getOperand(0).getReg();
  2642. }
  2643. else
  2644. return false;
  2645. // And make sure that the negative value of increment can be added to all
  2646. // other offsets after the BaseAccess. We rely on either
  2647. // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
  2648. // to keep things simple.
  2649. // This also adds a simple codesize metric, to detect if an instruction (like
  2650. // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
  2651. // cannot because it is converted to something else (t2LDRBi8). We start this
  2652. // at -1 for the gain from removing the increment.
  2653. SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
  2654. int CodesizeEstimate = -1;
  2655. for (auto *Use : OtherAccesses) {
  2656. if (DT->dominates(BaseAccess, Use)) {
  2657. SuccessorAccesses.insert(Use);
  2658. unsigned BaseOp = getBaseOperandIndex(*Use);
  2659. if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
  2660. Use->getOperand(BaseOp + 1).getImm() -
  2661. IncrementOffset,
  2662. TII, CodesizeEstimate)) {
  2663. LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
  2664. return false;
  2665. }
  2666. } else if (!DT->dominates(Use, BaseAccess)) {
  2667. LLVM_DEBUG(
  2668. dbgs() << " Unknown dominance relation between Base and Use\n");
  2669. return false;
  2670. }
  2671. }
  2672. if (STI->hasMinSize() && CodesizeEstimate > 0) {
  2673. LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
  2674. return false;
  2675. }
  2676. if (!PrePostInc) {
  2677. // Replace BaseAccess with a post inc
  2678. LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
  2679. LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
  2680. NewBaseReg = Increment->getOperand(0).getReg();
  2681. MachineInstr *BaseAccessPost =
  2682. createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
  2683. BaseAccess->eraseFromParent();
  2684. Increment->eraseFromParent();
  2685. (void)BaseAccessPost;
  2686. LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
  2687. }
  2688. for (auto *Use : SuccessorAccesses) {
  2689. LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
  2690. AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII);
  2691. LLVM_DEBUG(dbgs() << " To : "; Use->dump());
  2692. }
  2693. // Remove the kill flag from all uses of NewBaseReg, in case any old uses
  2694. // remain.
  2695. for (MachineOperand &Op : MRI->use_nodbg_operands(NewBaseReg))
  2696. Op.setIsKill(false);
  2697. return true;
  2698. }
  2699. bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
  2700. bool Changed = false;
  2701. SmallSetVector<Register, 4> Visited;
  2702. for (auto &MBB : *MF) {
  2703. for (auto &MI : MBB) {
  2704. int BaseOp = getBaseOperandIndex(MI);
  2705. if (BaseOp == -1 || !MI.getOperand(BaseOp).isReg())
  2706. continue;
  2707. Register Base = MI.getOperand(BaseOp).getReg();
  2708. if (!Base.isVirtual() || Visited.count(Base))
  2709. continue;
  2710. Visited.insert(Base);
  2711. }
  2712. }
  2713. for (auto Base : Visited)
  2714. Changed |= DistributeIncrements(Base);
  2715. return Changed;
  2716. }
  2717. /// Returns an instance of the load / store optimization pass.
  2718. FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
  2719. if (PreAlloc)
  2720. return new ARMPreAllocLoadStoreOpt();
  2721. return new ARMLoadStoreOpt();
  2722. }