ARMLoadStoreOptimizer.cpp 105 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023
  1. //===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file This file contains a pass that performs load / store related peephole
  10. /// optimizations. This pass should be run after register allocation.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "ARM.h"
  14. #include "ARMBaseInstrInfo.h"
  15. #include "ARMBaseRegisterInfo.h"
  16. #include "ARMISelLowering.h"
  17. #include "ARMMachineFunctionInfo.h"
  18. #include "ARMSubtarget.h"
  19. #include "MCTargetDesc/ARMAddressingModes.h"
  20. #include "MCTargetDesc/ARMBaseInfo.h"
  21. #include "Utils/ARMBaseInfo.h"
  22. #include "llvm/ADT/ArrayRef.h"
  23. #include "llvm/ADT/DenseMap.h"
  24. #include "llvm/ADT/DenseSet.h"
  25. #include "llvm/ADT/STLExtras.h"
  26. #include "llvm/ADT/SetVector.h"
  27. #include "llvm/ADT/SmallPtrSet.h"
  28. #include "llvm/ADT/SmallSet.h"
  29. #include "llvm/ADT/SmallVector.h"
  30. #include "llvm/ADT/Statistic.h"
  31. #include "llvm/ADT/iterator_range.h"
  32. #include "llvm/Analysis/AliasAnalysis.h"
  33. #include "llvm/CodeGen/LivePhysRegs.h"
  34. #include "llvm/CodeGen/MachineBasicBlock.h"
  35. #include "llvm/CodeGen/MachineDominators.h"
  36. #include "llvm/CodeGen/MachineFrameInfo.h"
  37. #include "llvm/CodeGen/MachineFunction.h"
  38. #include "llvm/CodeGen/MachineFunctionPass.h"
  39. #include "llvm/CodeGen/MachineInstr.h"
  40. #include "llvm/CodeGen/MachineInstrBuilder.h"
  41. #include "llvm/CodeGen/MachineMemOperand.h"
  42. #include "llvm/CodeGen/MachineOperand.h"
  43. #include "llvm/CodeGen/MachineRegisterInfo.h"
  44. #include "llvm/CodeGen/RegisterClassInfo.h"
  45. #include "llvm/CodeGen/TargetFrameLowering.h"
  46. #include "llvm/CodeGen/TargetInstrInfo.h"
  47. #include "llvm/CodeGen/TargetLowering.h"
  48. #include "llvm/CodeGen/TargetRegisterInfo.h"
  49. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  50. #include "llvm/IR/DataLayout.h"
  51. #include "llvm/IR/DebugLoc.h"
  52. #include "llvm/IR/DerivedTypes.h"
  53. #include "llvm/IR/Function.h"
  54. #include "llvm/IR/Type.h"
  55. #include "llvm/InitializePasses.h"
  56. #include "llvm/MC/MCInstrDesc.h"
  57. #include "llvm/Pass.h"
  58. #include "llvm/Support/Allocator.h"
  59. #include "llvm/Support/CommandLine.h"
  60. #include "llvm/Support/Debug.h"
  61. #include "llvm/Support/ErrorHandling.h"
  62. #include "llvm/Support/raw_ostream.h"
  63. #include <algorithm>
  64. #include <cassert>
  65. #include <cstddef>
  66. #include <cstdlib>
  67. #include <iterator>
  68. #include <limits>
  69. #include <utility>
  70. using namespace llvm;
  71. #define DEBUG_TYPE "arm-ldst-opt"
  72. STATISTIC(NumLDMGened , "Number of ldm instructions generated");
  73. STATISTIC(NumSTMGened , "Number of stm instructions generated");
  74. STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
  75. STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
  76. STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
  77. STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
  78. STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
  79. STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
  80. STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
  81. STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
  82. STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
  83. /// This switch disables formation of double/multi instructions that could
  84. /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
  85. /// disabled. This can be used to create libraries that are robust even when
  86. /// users provoke undefined behaviour by supplying misaligned pointers.
  87. /// \see mayCombineMisaligned()
  88. static cl::opt<bool>
  89. AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
  90. cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
  91. #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
  92. namespace {
  93. /// Post- register allocation pass the combine load / store instructions to
  94. /// form ldm / stm instructions.
  95. struct ARMLoadStoreOpt : public MachineFunctionPass {
  96. static char ID;
  97. const MachineFunction *MF;
  98. const TargetInstrInfo *TII;
  99. const TargetRegisterInfo *TRI;
  100. const ARMSubtarget *STI;
  101. const TargetLowering *TL;
  102. ARMFunctionInfo *AFI;
  103. LivePhysRegs LiveRegs;
  104. RegisterClassInfo RegClassInfo;
  105. MachineBasicBlock::const_iterator LiveRegPos;
  106. bool LiveRegsValid;
  107. bool RegClassInfoValid;
  108. bool isThumb1, isThumb2;
  109. ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
  110. bool runOnMachineFunction(MachineFunction &Fn) override;
  111. MachineFunctionProperties getRequiredProperties() const override {
  112. return MachineFunctionProperties().set(
  113. MachineFunctionProperties::Property::NoVRegs);
  114. }
  115. StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
  116. private:
  117. /// A set of load/store MachineInstrs with same base register sorted by
  118. /// offset.
  119. struct MemOpQueueEntry {
  120. MachineInstr *MI;
  121. int Offset; ///< Load/Store offset.
  122. unsigned Position; ///< Position as counted from end of basic block.
  123. MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
  124. : MI(&MI), Offset(Offset), Position(Position) {}
  125. };
  126. using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
  127. /// A set of MachineInstrs that fulfill (nearly all) conditions to get
  128. /// merged into a LDM/STM.
  129. struct MergeCandidate {
  130. /// List of instructions ordered by load/store offset.
  131. SmallVector<MachineInstr*, 4> Instrs;
  132. /// Index in Instrs of the instruction being latest in the schedule.
  133. unsigned LatestMIIdx;
  134. /// Index in Instrs of the instruction being earliest in the schedule.
  135. unsigned EarliestMIIdx;
  136. /// Index into the basic block where the merged instruction will be
  137. /// inserted. (See MemOpQueueEntry.Position)
  138. unsigned InsertPos;
  139. /// Whether the instructions can be merged into a ldm/stm instruction.
  140. bool CanMergeToLSMulti;
  141. /// Whether the instructions can be merged into a ldrd/strd instruction.
  142. bool CanMergeToLSDouble;
  143. };
  144. SpecificBumpPtrAllocator<MergeCandidate> Allocator;
  145. SmallVector<const MergeCandidate*,4> Candidates;
  146. SmallVector<MachineInstr*,4> MergeBaseCandidates;
  147. void moveLiveRegsBefore(const MachineBasicBlock &MBB,
  148. MachineBasicBlock::const_iterator Before);
  149. unsigned findFreeReg(const TargetRegisterClass &RegClass);
  150. void UpdateBaseRegUses(MachineBasicBlock &MBB,
  151. MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
  152. unsigned Base, unsigned WordOffset,
  153. ARMCC::CondCodes Pred, unsigned PredReg);
  154. MachineInstr *CreateLoadStoreMulti(
  155. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  156. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  157. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  158. ArrayRef<std::pair<unsigned, bool>> Regs,
  159. ArrayRef<MachineInstr*> Instrs);
  160. MachineInstr *CreateLoadStoreDouble(
  161. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  162. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  163. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  164. ArrayRef<std::pair<unsigned, bool>> Regs,
  165. ArrayRef<MachineInstr*> Instrs) const;
  166. void FormCandidates(const MemOpQueue &MemOps);
  167. MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
  168. bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
  169. MachineBasicBlock::iterator &MBBI);
  170. bool MergeBaseUpdateLoadStore(MachineInstr *MI);
  171. bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
  172. bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
  173. bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
  174. bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
  175. bool CombineMovBx(MachineBasicBlock &MBB);
  176. };
  177. } // end anonymous namespace
  178. char ARMLoadStoreOpt::ID = 0;
  179. INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
  180. false)
  181. static bool definesCPSR(const MachineInstr &MI) {
  182. for (const auto &MO : MI.operands()) {
  183. if (!MO.isReg())
  184. continue;
  185. if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
  186. // If the instruction has live CPSR def, then it's not safe to fold it
  187. // into load / store.
  188. return true;
  189. }
  190. return false;
  191. }
  192. static int getMemoryOpOffset(const MachineInstr &MI) {
  193. unsigned Opcode = MI.getOpcode();
  194. bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
  195. unsigned NumOperands = MI.getDesc().getNumOperands();
  196. unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
  197. if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
  198. Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
  199. Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
  200. Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
  201. return OffField;
  202. // Thumb1 immediate offsets are scaled by 4
  203. if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
  204. Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
  205. return OffField * 4;
  206. int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
  207. : ARM_AM::getAM5Offset(OffField) * 4;
  208. ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
  209. : ARM_AM::getAM5Op(OffField);
  210. if (Op == ARM_AM::sub)
  211. return -Offset;
  212. return Offset;
  213. }
  214. static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
  215. return MI.getOperand(1);
  216. }
  217. static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
  218. return MI.getOperand(0);
  219. }
  220. static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
  221. switch (Opcode) {
  222. default: llvm_unreachable("Unhandled opcode!");
  223. case ARM::LDRi12:
  224. ++NumLDMGened;
  225. switch (Mode) {
  226. default: llvm_unreachable("Unhandled submode!");
  227. case ARM_AM::ia: return ARM::LDMIA;
  228. case ARM_AM::da: return ARM::LDMDA;
  229. case ARM_AM::db: return ARM::LDMDB;
  230. case ARM_AM::ib: return ARM::LDMIB;
  231. }
  232. case ARM::STRi12:
  233. ++NumSTMGened;
  234. switch (Mode) {
  235. default: llvm_unreachable("Unhandled submode!");
  236. case ARM_AM::ia: return ARM::STMIA;
  237. case ARM_AM::da: return ARM::STMDA;
  238. case ARM_AM::db: return ARM::STMDB;
  239. case ARM_AM::ib: return ARM::STMIB;
  240. }
  241. case ARM::tLDRi:
  242. case ARM::tLDRspi:
  243. // tLDMIA is writeback-only - unless the base register is in the input
  244. // reglist.
  245. ++NumLDMGened;
  246. switch (Mode) {
  247. default: llvm_unreachable("Unhandled submode!");
  248. case ARM_AM::ia: return ARM::tLDMIA;
  249. }
  250. case ARM::tSTRi:
  251. case ARM::tSTRspi:
  252. // There is no non-writeback tSTMIA either.
  253. ++NumSTMGened;
  254. switch (Mode) {
  255. default: llvm_unreachable("Unhandled submode!");
  256. case ARM_AM::ia: return ARM::tSTMIA_UPD;
  257. }
  258. case ARM::t2LDRi8:
  259. case ARM::t2LDRi12:
  260. ++NumLDMGened;
  261. switch (Mode) {
  262. default: llvm_unreachable("Unhandled submode!");
  263. case ARM_AM::ia: return ARM::t2LDMIA;
  264. case ARM_AM::db: return ARM::t2LDMDB;
  265. }
  266. case ARM::t2STRi8:
  267. case ARM::t2STRi12:
  268. ++NumSTMGened;
  269. switch (Mode) {
  270. default: llvm_unreachable("Unhandled submode!");
  271. case ARM_AM::ia: return ARM::t2STMIA;
  272. case ARM_AM::db: return ARM::t2STMDB;
  273. }
  274. case ARM::VLDRS:
  275. ++NumVLDMGened;
  276. switch (Mode) {
  277. default: llvm_unreachable("Unhandled submode!");
  278. case ARM_AM::ia: return ARM::VLDMSIA;
  279. case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
  280. }
  281. case ARM::VSTRS:
  282. ++NumVSTMGened;
  283. switch (Mode) {
  284. default: llvm_unreachable("Unhandled submode!");
  285. case ARM_AM::ia: return ARM::VSTMSIA;
  286. case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
  287. }
  288. case ARM::VLDRD:
  289. ++NumVLDMGened;
  290. switch (Mode) {
  291. default: llvm_unreachable("Unhandled submode!");
  292. case ARM_AM::ia: return ARM::VLDMDIA;
  293. case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
  294. }
  295. case ARM::VSTRD:
  296. ++NumVSTMGened;
  297. switch (Mode) {
  298. default: llvm_unreachable("Unhandled submode!");
  299. case ARM_AM::ia: return ARM::VSTMDIA;
  300. case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
  301. }
  302. }
  303. }
  304. static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
  305. switch (Opcode) {
  306. default: llvm_unreachable("Unhandled opcode!");
  307. case ARM::LDMIA_RET:
  308. case ARM::LDMIA:
  309. case ARM::LDMIA_UPD:
  310. case ARM::STMIA:
  311. case ARM::STMIA_UPD:
  312. case ARM::tLDMIA:
  313. case ARM::tLDMIA_UPD:
  314. case ARM::tSTMIA_UPD:
  315. case ARM::t2LDMIA_RET:
  316. case ARM::t2LDMIA:
  317. case ARM::t2LDMIA_UPD:
  318. case ARM::t2STMIA:
  319. case ARM::t2STMIA_UPD:
  320. case ARM::VLDMSIA:
  321. case ARM::VLDMSIA_UPD:
  322. case ARM::VSTMSIA:
  323. case ARM::VSTMSIA_UPD:
  324. case ARM::VLDMDIA:
  325. case ARM::VLDMDIA_UPD:
  326. case ARM::VSTMDIA:
  327. case ARM::VSTMDIA_UPD:
  328. return ARM_AM::ia;
  329. case ARM::LDMDA:
  330. case ARM::LDMDA_UPD:
  331. case ARM::STMDA:
  332. case ARM::STMDA_UPD:
  333. return ARM_AM::da;
  334. case ARM::LDMDB:
  335. case ARM::LDMDB_UPD:
  336. case ARM::STMDB:
  337. case ARM::STMDB_UPD:
  338. case ARM::t2LDMDB:
  339. case ARM::t2LDMDB_UPD:
  340. case ARM::t2STMDB:
  341. case ARM::t2STMDB_UPD:
  342. case ARM::VLDMSDB_UPD:
  343. case ARM::VSTMSDB_UPD:
  344. case ARM::VLDMDDB_UPD:
  345. case ARM::VSTMDDB_UPD:
  346. return ARM_AM::db;
  347. case ARM::LDMIB:
  348. case ARM::LDMIB_UPD:
  349. case ARM::STMIB:
  350. case ARM::STMIB_UPD:
  351. return ARM_AM::ib;
  352. }
  353. }
  354. static bool isT1i32Load(unsigned Opc) {
  355. return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
  356. }
  357. static bool isT2i32Load(unsigned Opc) {
  358. return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
  359. }
  360. static bool isi32Load(unsigned Opc) {
  361. return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
  362. }
  363. static bool isT1i32Store(unsigned Opc) {
  364. return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
  365. }
  366. static bool isT2i32Store(unsigned Opc) {
  367. return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
  368. }
  369. static bool isi32Store(unsigned Opc) {
  370. return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
  371. }
  372. static bool isLoadSingle(unsigned Opc) {
  373. return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
  374. }
  375. static unsigned getImmScale(unsigned Opc) {
  376. switch (Opc) {
  377. default: llvm_unreachable("Unhandled opcode!");
  378. case ARM::tLDRi:
  379. case ARM::tSTRi:
  380. case ARM::tLDRspi:
  381. case ARM::tSTRspi:
  382. return 1;
  383. case ARM::tLDRHi:
  384. case ARM::tSTRHi:
  385. return 2;
  386. case ARM::tLDRBi:
  387. case ARM::tSTRBi:
  388. return 4;
  389. }
  390. }
  391. static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
  392. switch (MI->getOpcode()) {
  393. default: return 0;
  394. case ARM::LDRi12:
  395. case ARM::STRi12:
  396. case ARM::tLDRi:
  397. case ARM::tSTRi:
  398. case ARM::tLDRspi:
  399. case ARM::tSTRspi:
  400. case ARM::t2LDRi8:
  401. case ARM::t2LDRi12:
  402. case ARM::t2STRi8:
  403. case ARM::t2STRi12:
  404. case ARM::VLDRS:
  405. case ARM::VSTRS:
  406. return 4;
  407. case ARM::VLDRD:
  408. case ARM::VSTRD:
  409. return 8;
  410. case ARM::LDMIA:
  411. case ARM::LDMDA:
  412. case ARM::LDMDB:
  413. case ARM::LDMIB:
  414. case ARM::STMIA:
  415. case ARM::STMDA:
  416. case ARM::STMDB:
  417. case ARM::STMIB:
  418. case ARM::tLDMIA:
  419. case ARM::tLDMIA_UPD:
  420. case ARM::tSTMIA_UPD:
  421. case ARM::t2LDMIA:
  422. case ARM::t2LDMDB:
  423. case ARM::t2STMIA:
  424. case ARM::t2STMDB:
  425. case ARM::VLDMSIA:
  426. case ARM::VSTMSIA:
  427. return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
  428. case ARM::VLDMDIA:
  429. case ARM::VSTMDIA:
  430. return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
  431. }
  432. }
  433. /// Update future uses of the base register with the offset introduced
  434. /// due to writeback. This function only works on Thumb1.
  435. void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
  436. MachineBasicBlock::iterator MBBI,
  437. const DebugLoc &DL, unsigned Base,
  438. unsigned WordOffset,
  439. ARMCC::CondCodes Pred,
  440. unsigned PredReg) {
  441. assert(isThumb1 && "Can only update base register uses for Thumb1!");
  442. // Start updating any instructions with immediate offsets. Insert a SUB before
  443. // the first non-updateable instruction (if any).
  444. for (; MBBI != MBB.end(); ++MBBI) {
  445. bool InsertSub = false;
  446. unsigned Opc = MBBI->getOpcode();
  447. if (MBBI->readsRegister(Base)) {
  448. int Offset;
  449. bool IsLoad =
  450. Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
  451. bool IsStore =
  452. Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
  453. if (IsLoad || IsStore) {
  454. // Loads and stores with immediate offsets can be updated, but only if
  455. // the new offset isn't negative.
  456. // The MachineOperand containing the offset immediate is the last one
  457. // before predicates.
  458. MachineOperand &MO =
  459. MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
  460. // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
  461. Offset = MO.getImm() - WordOffset * getImmScale(Opc);
  462. // If storing the base register, it needs to be reset first.
  463. Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
  464. if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
  465. MO.setImm(Offset);
  466. else
  467. InsertSub = true;
  468. } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
  469. !definesCPSR(*MBBI)) {
  470. // SUBS/ADDS using this register, with a dead def of the CPSR.
  471. // Merge it with the update; if the merged offset is too large,
  472. // insert a new sub instead.
  473. MachineOperand &MO =
  474. MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
  475. Offset = (Opc == ARM::tSUBi8) ?
  476. MO.getImm() + WordOffset * 4 :
  477. MO.getImm() - WordOffset * 4 ;
  478. if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
  479. // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
  480. // Offset == 0.
  481. MO.setImm(Offset);
  482. // The base register has now been reset, so exit early.
  483. return;
  484. } else {
  485. InsertSub = true;
  486. }
  487. } else {
  488. // Can't update the instruction.
  489. InsertSub = true;
  490. }
  491. } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
  492. // Since SUBS sets the condition flags, we can't place the base reset
  493. // after an instruction that has a live CPSR def.
  494. // The base register might also contain an argument for a function call.
  495. InsertSub = true;
  496. }
  497. if (InsertSub) {
  498. // An instruction above couldn't be updated, so insert a sub.
  499. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
  500. .add(t1CondCodeOp(true))
  501. .addReg(Base)
  502. .addImm(WordOffset * 4)
  503. .addImm(Pred)
  504. .addReg(PredReg);
  505. return;
  506. }
  507. if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
  508. // Register got killed. Stop updating.
  509. return;
  510. }
  511. // End of block was reached.
  512. if (!MBB.succ_empty()) {
  513. // FIXME: Because of a bug, live registers are sometimes missing from
  514. // the successor blocks' live-in sets. This means we can't trust that
  515. // information and *always* have to reset at the end of a block.
  516. // See PR21029.
  517. if (MBBI != MBB.end()) --MBBI;
  518. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
  519. .add(t1CondCodeOp(true))
  520. .addReg(Base)
  521. .addImm(WordOffset * 4)
  522. .addImm(Pred)
  523. .addReg(PredReg);
  524. }
  525. }
  526. /// Return the first register of class \p RegClass that is not in \p Regs.
  527. unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
  528. if (!RegClassInfoValid) {
  529. RegClassInfo.runOnMachineFunction(*MF);
  530. RegClassInfoValid = true;
  531. }
  532. for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
  533. if (LiveRegs.available(MF->getRegInfo(), Reg))
  534. return Reg;
  535. return 0;
  536. }
  537. /// Compute live registers just before instruction \p Before (in normal schedule
  538. /// direction). Computes backwards so multiple queries in the same block must
  539. /// come in reverse order.
  540. void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
  541. MachineBasicBlock::const_iterator Before) {
  542. // Initialize if we never queried in this block.
  543. if (!LiveRegsValid) {
  544. LiveRegs.init(*TRI);
  545. LiveRegs.addLiveOuts(MBB);
  546. LiveRegPos = MBB.end();
  547. LiveRegsValid = true;
  548. }
  549. // Move backward just before the "Before" position.
  550. while (LiveRegPos != Before) {
  551. --LiveRegPos;
  552. LiveRegs.stepBackward(*LiveRegPos);
  553. }
  554. }
  555. static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
  556. unsigned Reg) {
  557. for (const std::pair<unsigned, bool> &R : Regs)
  558. if (R.first == Reg)
  559. return true;
  560. return false;
  561. }
  562. /// Create and insert a LDM or STM with Base as base register and registers in
  563. /// Regs as the register operands that would be loaded / stored. It returns
  564. /// true if the transformation is done.
  565. MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
  566. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  567. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  568. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  569. ArrayRef<std::pair<unsigned, bool>> Regs,
  570. ArrayRef<MachineInstr*> Instrs) {
  571. unsigned NumRegs = Regs.size();
  572. assert(NumRegs > 1);
  573. // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
  574. // Compute liveness information for that register to make the decision.
  575. bool SafeToClobberCPSR = !isThumb1 ||
  576. (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
  577. MachineBasicBlock::LQR_Dead);
  578. bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
  579. // Exception: If the base register is in the input reglist, Thumb1 LDM is
  580. // non-writeback.
  581. // It's also not possible to merge an STR of the base register in Thumb1.
  582. if (isThumb1 && ContainsReg(Regs, Base)) {
  583. assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
  584. if (Opcode == ARM::tLDRi)
  585. Writeback = false;
  586. else if (Opcode == ARM::tSTRi)
  587. return nullptr;
  588. }
  589. ARM_AM::AMSubMode Mode = ARM_AM::ia;
  590. // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
  591. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
  592. bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
  593. if (Offset == 4 && haveIBAndDA) {
  594. Mode = ARM_AM::ib;
  595. } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
  596. Mode = ARM_AM::da;
  597. } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
  598. // VLDM/VSTM do not support DB mode without also updating the base reg.
  599. Mode = ARM_AM::db;
  600. } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
  601. // Check if this is a supported opcode before inserting instructions to
  602. // calculate a new base register.
  603. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
  604. // If starting offset isn't zero, insert a MI to materialize a new base.
  605. // But only do so if it is cost effective, i.e. merging more than two
  606. // loads / stores.
  607. if (NumRegs <= 2)
  608. return nullptr;
  609. // On Thumb1, it's not worth materializing a new base register without
  610. // clobbering the CPSR (i.e. not using ADDS/SUBS).
  611. if (!SafeToClobberCPSR)
  612. return nullptr;
  613. unsigned NewBase;
  614. if (isi32Load(Opcode)) {
  615. // If it is a load, then just use one of the destination registers
  616. // as the new base. Will no longer be writeback in Thumb1.
  617. NewBase = Regs[NumRegs-1].first;
  618. Writeback = false;
  619. } else {
  620. // Find a free register that we can use as scratch register.
  621. moveLiveRegsBefore(MBB, InsertBefore);
  622. // The merged instruction does not exist yet but will use several Regs if
  623. // it is a Store.
  624. if (!isLoadSingle(Opcode))
  625. for (const std::pair<unsigned, bool> &R : Regs)
  626. LiveRegs.addReg(R.first);
  627. NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
  628. if (NewBase == 0)
  629. return nullptr;
  630. }
  631. int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm
  632. : ARM::t2ADDri)
  633. : (isThumb1 && Base == ARM::SP)
  634. ? ARM::tADDrSPi
  635. : (isThumb1 && Offset < 8)
  636. ? ARM::tADDi3
  637. : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
  638. if (Offset < 0) {
  639. // FIXME: There are no Thumb1 load/store instructions with negative
  640. // offsets. So the Base != ARM::SP might be unnecessary.
  641. Offset = -Offset;
  642. BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm
  643. : ARM::t2SUBri)
  644. : (isThumb1 && Offset < 8 && Base != ARM::SP)
  645. ? ARM::tSUBi3
  646. : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
  647. }
  648. if (!TL->isLegalAddImmediate(Offset))
  649. // FIXME: Try add with register operand?
  650. return nullptr; // Probably not worth it then.
  651. // We can only append a kill flag to the add/sub input if the value is not
  652. // used in the register list of the stm as well.
  653. bool KillOldBase = BaseKill &&
  654. (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
  655. if (isThumb1) {
  656. // Thumb1: depending on immediate size, use either
  657. // ADDS NewBase, Base, #imm3
  658. // or
  659. // MOV NewBase, Base
  660. // ADDS NewBase, #imm8.
  661. if (Base != NewBase &&
  662. (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
  663. // Need to insert a MOV to the new base first.
  664. if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
  665. !STI->hasV6Ops()) {
  666. // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
  667. if (Pred != ARMCC::AL)
  668. return nullptr;
  669. BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
  670. .addReg(Base, getKillRegState(KillOldBase));
  671. } else
  672. BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
  673. .addReg(Base, getKillRegState(KillOldBase))
  674. .add(predOps(Pred, PredReg));
  675. // The following ADDS/SUBS becomes an update.
  676. Base = NewBase;
  677. KillOldBase = true;
  678. }
  679. if (BaseOpc == ARM::tADDrSPi) {
  680. assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
  681. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  682. .addReg(Base, getKillRegState(KillOldBase))
  683. .addImm(Offset / 4)
  684. .add(predOps(Pred, PredReg));
  685. } else
  686. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  687. .add(t1CondCodeOp(true))
  688. .addReg(Base, getKillRegState(KillOldBase))
  689. .addImm(Offset)
  690. .add(predOps(Pred, PredReg));
  691. } else {
  692. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  693. .addReg(Base, getKillRegState(KillOldBase))
  694. .addImm(Offset)
  695. .add(predOps(Pred, PredReg))
  696. .add(condCodeOp());
  697. }
  698. Base = NewBase;
  699. BaseKill = true; // New base is always killed straight away.
  700. }
  701. bool isDef = isLoadSingle(Opcode);
  702. // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
  703. // base register writeback.
  704. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
  705. if (!Opcode)
  706. return nullptr;
  707. // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
  708. // - There is no writeback (LDM of base register),
  709. // - the base register is killed by the merged instruction,
  710. // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
  711. // to reset the base register.
  712. // Otherwise, don't merge.
  713. // It's safe to return here since the code to materialize a new base register
  714. // above is also conditional on SafeToClobberCPSR.
  715. if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
  716. return nullptr;
  717. MachineInstrBuilder MIB;
  718. if (Writeback) {
  719. assert(isThumb1 && "expected Writeback only inThumb1");
  720. if (Opcode == ARM::tLDMIA) {
  721. assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
  722. // Update tLDMIA with writeback if necessary.
  723. Opcode = ARM::tLDMIA_UPD;
  724. }
  725. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
  726. // Thumb1: we might need to set base writeback when building the MI.
  727. MIB.addReg(Base, getDefRegState(true))
  728. .addReg(Base, getKillRegState(BaseKill));
  729. // The base isn't dead after a merged instruction with writeback.
  730. // Insert a sub instruction after the newly formed instruction to reset.
  731. if (!BaseKill)
  732. UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
  733. } else {
  734. // No writeback, simply build the MachineInstr.
  735. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
  736. MIB.addReg(Base, getKillRegState(BaseKill));
  737. }
  738. MIB.addImm(Pred).addReg(PredReg);
  739. for (const std::pair<unsigned, bool> &R : Regs)
  740. MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
  741. MIB.cloneMergedMemRefs(Instrs);
  742. return MIB.getInstr();
  743. }
  744. MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
  745. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  746. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  747. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  748. ArrayRef<std::pair<unsigned, bool>> Regs,
  749. ArrayRef<MachineInstr*> Instrs) const {
  750. bool IsLoad = isi32Load(Opcode);
  751. assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
  752. unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
  753. assert(Regs.size() == 2);
  754. MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
  755. TII->get(LoadStoreOpcode));
  756. if (IsLoad) {
  757. MIB.addReg(Regs[0].first, RegState::Define)
  758. .addReg(Regs[1].first, RegState::Define);
  759. } else {
  760. MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
  761. .addReg(Regs[1].first, getKillRegState(Regs[1].second));
  762. }
  763. MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  764. MIB.cloneMergedMemRefs(Instrs);
  765. return MIB.getInstr();
  766. }
  767. /// Call MergeOps and update MemOps and merges accordingly on success.
  768. MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
  769. const MachineInstr *First = Cand.Instrs.front();
  770. unsigned Opcode = First->getOpcode();
  771. bool IsLoad = isLoadSingle(Opcode);
  772. SmallVector<std::pair<unsigned, bool>, 8> Regs;
  773. SmallVector<unsigned, 4> ImpDefs;
  774. DenseSet<unsigned> KilledRegs;
  775. DenseSet<unsigned> UsedRegs;
  776. // Determine list of registers and list of implicit super-register defs.
  777. for (const MachineInstr *MI : Cand.Instrs) {
  778. const MachineOperand &MO = getLoadStoreRegOp(*MI);
  779. Register Reg = MO.getReg();
  780. bool IsKill = MO.isKill();
  781. if (IsKill)
  782. KilledRegs.insert(Reg);
  783. Regs.push_back(std::make_pair(Reg, IsKill));
  784. UsedRegs.insert(Reg);
  785. if (IsLoad) {
  786. // Collect any implicit defs of super-registers, after merging we can't
  787. // be sure anymore that we properly preserved these live ranges and must
  788. // removed these implicit operands.
  789. for (const MachineOperand &MO : MI->implicit_operands()) {
  790. if (!MO.isReg() || !MO.isDef() || MO.isDead())
  791. continue;
  792. assert(MO.isImplicit());
  793. Register DefReg = MO.getReg();
  794. if (is_contained(ImpDefs, DefReg))
  795. continue;
  796. // We can ignore cases where the super-reg is read and written.
  797. if (MI->readsRegister(DefReg))
  798. continue;
  799. ImpDefs.push_back(DefReg);
  800. }
  801. }
  802. }
  803. // Attempt the merge.
  804. using iterator = MachineBasicBlock::iterator;
  805. MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
  806. iterator InsertBefore = std::next(iterator(LatestMI));
  807. MachineBasicBlock &MBB = *LatestMI->getParent();
  808. unsigned Offset = getMemoryOpOffset(*First);
  809. Register Base = getLoadStoreBaseOp(*First).getReg();
  810. bool BaseKill = LatestMI->killsRegister(Base);
  811. Register PredReg;
  812. ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
  813. DebugLoc DL = First->getDebugLoc();
  814. MachineInstr *Merged = nullptr;
  815. if (Cand.CanMergeToLSDouble)
  816. Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
  817. Opcode, Pred, PredReg, DL, Regs,
  818. Cand.Instrs);
  819. if (!Merged && Cand.CanMergeToLSMulti)
  820. Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
  821. Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
  822. if (!Merged)
  823. return nullptr;
  824. // Determine earliest instruction that will get removed. We then keep an
  825. // iterator just above it so the following erases don't invalidated it.
  826. iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
  827. bool EarliestAtBegin = false;
  828. if (EarliestI == MBB.begin()) {
  829. EarliestAtBegin = true;
  830. } else {
  831. EarliestI = std::prev(EarliestI);
  832. }
  833. // Remove instructions which have been merged.
  834. for (MachineInstr *MI : Cand.Instrs)
  835. MBB.erase(MI);
  836. // Determine range between the earliest removed instruction and the new one.
  837. if (EarliestAtBegin)
  838. EarliestI = MBB.begin();
  839. else
  840. EarliestI = std::next(EarliestI);
  841. auto FixupRange = make_range(EarliestI, iterator(Merged));
  842. if (isLoadSingle(Opcode)) {
  843. // If the previous loads defined a super-reg, then we have to mark earlier
  844. // operands undef; Replicate the super-reg def on the merged instruction.
  845. for (MachineInstr &MI : FixupRange) {
  846. for (unsigned &ImpDefReg : ImpDefs) {
  847. for (MachineOperand &MO : MI.implicit_operands()) {
  848. if (!MO.isReg() || MO.getReg() != ImpDefReg)
  849. continue;
  850. if (MO.readsReg())
  851. MO.setIsUndef();
  852. else if (MO.isDef())
  853. ImpDefReg = 0;
  854. }
  855. }
  856. }
  857. MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
  858. for (unsigned ImpDef : ImpDefs)
  859. MIB.addReg(ImpDef, RegState::ImplicitDefine);
  860. } else {
  861. // Remove kill flags: We are possibly storing the values later now.
  862. assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
  863. for (MachineInstr &MI : FixupRange) {
  864. for (MachineOperand &MO : MI.uses()) {
  865. if (!MO.isReg() || !MO.isKill())
  866. continue;
  867. if (UsedRegs.count(MO.getReg()))
  868. MO.setIsKill(false);
  869. }
  870. }
  871. assert(ImpDefs.empty());
  872. }
  873. return Merged;
  874. }
  875. static bool isValidLSDoubleOffset(int Offset) {
  876. unsigned Value = abs(Offset);
  877. // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
  878. // multiplied by 4.
  879. return (Value % 4) == 0 && Value < 1024;
  880. }
  881. /// Return true for loads/stores that can be combined to a double/multi
  882. /// operation without increasing the requirements for alignment.
  883. static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
  884. const MachineInstr &MI) {
  885. // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
  886. // difference.
  887. unsigned Opcode = MI.getOpcode();
  888. if (!isi32Load(Opcode) && !isi32Store(Opcode))
  889. return true;
  890. // Stack pointer alignment is out of the programmers control so we can trust
  891. // SP-relative loads/stores.
  892. if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
  893. STI.getFrameLowering()->getTransientStackAlign() >= Align(4))
  894. return true;
  895. return false;
  896. }
  897. /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
  898. void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
  899. const MachineInstr *FirstMI = MemOps[0].MI;
  900. unsigned Opcode = FirstMI->getOpcode();
  901. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
  902. unsigned Size = getLSMultipleTransferSize(FirstMI);
  903. unsigned SIndex = 0;
  904. unsigned EIndex = MemOps.size();
  905. do {
  906. // Look at the first instruction.
  907. const MachineInstr *MI = MemOps[SIndex].MI;
  908. int Offset = MemOps[SIndex].Offset;
  909. const MachineOperand &PMO = getLoadStoreRegOp(*MI);
  910. Register PReg = PMO.getReg();
  911. unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
  912. : TRI->getEncodingValue(PReg);
  913. unsigned Latest = SIndex;
  914. unsigned Earliest = SIndex;
  915. unsigned Count = 1;
  916. bool CanMergeToLSDouble =
  917. STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
  918. // ARM errata 602117: LDRD with base in list may result in incorrect base
  919. // register when interrupted or faulted.
  920. if (STI->isCortexM3() && isi32Load(Opcode) &&
  921. PReg == getLoadStoreBaseOp(*MI).getReg())
  922. CanMergeToLSDouble = false;
  923. bool CanMergeToLSMulti = true;
  924. // On swift vldm/vstm starting with an odd register number as that needs
  925. // more uops than single vldrs.
  926. if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
  927. CanMergeToLSMulti = false;
  928. // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
  929. // deprecated; LDM to PC is fine but cannot happen here.
  930. if (PReg == ARM::SP || PReg == ARM::PC)
  931. CanMergeToLSMulti = CanMergeToLSDouble = false;
  932. // Should we be conservative?
  933. if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
  934. CanMergeToLSMulti = CanMergeToLSDouble = false;
  935. // vldm / vstm limit are 32 for S variants, 16 for D variants.
  936. unsigned Limit;
  937. switch (Opcode) {
  938. default:
  939. Limit = UINT_MAX;
  940. break;
  941. case ARM::VLDRD:
  942. case ARM::VSTRD:
  943. Limit = 16;
  944. break;
  945. }
  946. // Merge following instructions where possible.
  947. for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
  948. int NewOffset = MemOps[I].Offset;
  949. if (NewOffset != Offset + (int)Size)
  950. break;
  951. const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
  952. Register Reg = MO.getReg();
  953. if (Reg == ARM::SP || Reg == ARM::PC)
  954. break;
  955. if (Count == Limit)
  956. break;
  957. // See if the current load/store may be part of a multi load/store.
  958. unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
  959. : TRI->getEncodingValue(Reg);
  960. bool PartOfLSMulti = CanMergeToLSMulti;
  961. if (PartOfLSMulti) {
  962. // Register numbers must be in ascending order.
  963. if (RegNum <= PRegNum)
  964. PartOfLSMulti = false;
  965. // For VFP / NEON load/store multiples, the registers must be
  966. // consecutive and within the limit on the number of registers per
  967. // instruction.
  968. else if (!isNotVFP && RegNum != PRegNum+1)
  969. PartOfLSMulti = false;
  970. }
  971. // See if the current load/store may be part of a double load/store.
  972. bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
  973. if (!PartOfLSMulti && !PartOfLSDouble)
  974. break;
  975. CanMergeToLSMulti &= PartOfLSMulti;
  976. CanMergeToLSDouble &= PartOfLSDouble;
  977. // Track MemOp with latest and earliest position (Positions are
  978. // counted in reverse).
  979. unsigned Position = MemOps[I].Position;
  980. if (Position < MemOps[Latest].Position)
  981. Latest = I;
  982. else if (Position > MemOps[Earliest].Position)
  983. Earliest = I;
  984. // Prepare for next MemOp.
  985. Offset += Size;
  986. PRegNum = RegNum;
  987. }
  988. // Form a candidate from the Ops collected so far.
  989. MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
  990. for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
  991. Candidate->Instrs.push_back(MemOps[C].MI);
  992. Candidate->LatestMIIdx = Latest - SIndex;
  993. Candidate->EarliestMIIdx = Earliest - SIndex;
  994. Candidate->InsertPos = MemOps[Latest].Position;
  995. if (Count == 1)
  996. CanMergeToLSMulti = CanMergeToLSDouble = false;
  997. Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
  998. Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
  999. Candidates.push_back(Candidate);
  1000. // Continue after the chain.
  1001. SIndex += Count;
  1002. } while (SIndex < EIndex);
  1003. }
  1004. static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
  1005. ARM_AM::AMSubMode Mode) {
  1006. switch (Opc) {
  1007. default: llvm_unreachable("Unhandled opcode!");
  1008. case ARM::LDMIA:
  1009. case ARM::LDMDA:
  1010. case ARM::LDMDB:
  1011. case ARM::LDMIB:
  1012. switch (Mode) {
  1013. default: llvm_unreachable("Unhandled submode!");
  1014. case ARM_AM::ia: return ARM::LDMIA_UPD;
  1015. case ARM_AM::ib: return ARM::LDMIB_UPD;
  1016. case ARM_AM::da: return ARM::LDMDA_UPD;
  1017. case ARM_AM::db: return ARM::LDMDB_UPD;
  1018. }
  1019. case ARM::STMIA:
  1020. case ARM::STMDA:
  1021. case ARM::STMDB:
  1022. case ARM::STMIB:
  1023. switch (Mode) {
  1024. default: llvm_unreachable("Unhandled submode!");
  1025. case ARM_AM::ia: return ARM::STMIA_UPD;
  1026. case ARM_AM::ib: return ARM::STMIB_UPD;
  1027. case ARM_AM::da: return ARM::STMDA_UPD;
  1028. case ARM_AM::db: return ARM::STMDB_UPD;
  1029. }
  1030. case ARM::t2LDMIA:
  1031. case ARM::t2LDMDB:
  1032. switch (Mode) {
  1033. default: llvm_unreachable("Unhandled submode!");
  1034. case ARM_AM::ia: return ARM::t2LDMIA_UPD;
  1035. case ARM_AM::db: return ARM::t2LDMDB_UPD;
  1036. }
  1037. case ARM::t2STMIA:
  1038. case ARM::t2STMDB:
  1039. switch (Mode) {
  1040. default: llvm_unreachable("Unhandled submode!");
  1041. case ARM_AM::ia: return ARM::t2STMIA_UPD;
  1042. case ARM_AM::db: return ARM::t2STMDB_UPD;
  1043. }
  1044. case ARM::VLDMSIA:
  1045. switch (Mode) {
  1046. default: llvm_unreachable("Unhandled submode!");
  1047. case ARM_AM::ia: return ARM::VLDMSIA_UPD;
  1048. case ARM_AM::db: return ARM::VLDMSDB_UPD;
  1049. }
  1050. case ARM::VLDMDIA:
  1051. switch (Mode) {
  1052. default: llvm_unreachable("Unhandled submode!");
  1053. case ARM_AM::ia: return ARM::VLDMDIA_UPD;
  1054. case ARM_AM::db: return ARM::VLDMDDB_UPD;
  1055. }
  1056. case ARM::VSTMSIA:
  1057. switch (Mode) {
  1058. default: llvm_unreachable("Unhandled submode!");
  1059. case ARM_AM::ia: return ARM::VSTMSIA_UPD;
  1060. case ARM_AM::db: return ARM::VSTMSDB_UPD;
  1061. }
  1062. case ARM::VSTMDIA:
  1063. switch (Mode) {
  1064. default: llvm_unreachable("Unhandled submode!");
  1065. case ARM_AM::ia: return ARM::VSTMDIA_UPD;
  1066. case ARM_AM::db: return ARM::VSTMDDB_UPD;
  1067. }
  1068. }
  1069. }
  1070. /// Check if the given instruction increments or decrements a register and
  1071. /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
  1072. /// generated by the instruction are possibly read as well.
  1073. static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg,
  1074. ARMCC::CondCodes Pred, Register PredReg) {
  1075. bool CheckCPSRDef;
  1076. int Scale;
  1077. switch (MI.getOpcode()) {
  1078. case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
  1079. case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
  1080. case ARM::t2SUBri:
  1081. case ARM::t2SUBspImm:
  1082. case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
  1083. case ARM::t2ADDri:
  1084. case ARM::t2ADDspImm:
  1085. case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
  1086. case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
  1087. case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
  1088. default: return 0;
  1089. }
  1090. Register MIPredReg;
  1091. if (MI.getOperand(0).getReg() != Reg ||
  1092. MI.getOperand(1).getReg() != Reg ||
  1093. getInstrPredicate(MI, MIPredReg) != Pred ||
  1094. MIPredReg != PredReg)
  1095. return 0;
  1096. if (CheckCPSRDef && definesCPSR(MI))
  1097. return 0;
  1098. return MI.getOperand(2).getImm() * Scale;
  1099. }
  1100. /// Searches for an increment or decrement of \p Reg before \p MBBI.
  1101. static MachineBasicBlock::iterator
  1102. findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg,
  1103. ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
  1104. Offset = 0;
  1105. MachineBasicBlock &MBB = *MBBI->getParent();
  1106. MachineBasicBlock::iterator BeginMBBI = MBB.begin();
  1107. MachineBasicBlock::iterator EndMBBI = MBB.end();
  1108. if (MBBI == BeginMBBI)
  1109. return EndMBBI;
  1110. // Skip debug values.
  1111. MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
  1112. while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
  1113. --PrevMBBI;
  1114. Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
  1115. return Offset == 0 ? EndMBBI : PrevMBBI;
  1116. }
  1117. /// Searches for a increment or decrement of \p Reg after \p MBBI.
  1118. static MachineBasicBlock::iterator
  1119. findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg,
  1120. ARMCC::CondCodes Pred, Register PredReg, int &Offset,
  1121. const TargetRegisterInfo *TRI) {
  1122. Offset = 0;
  1123. MachineBasicBlock &MBB = *MBBI->getParent();
  1124. MachineBasicBlock::iterator EndMBBI = MBB.end();
  1125. MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
  1126. while (NextMBBI != EndMBBI) {
  1127. // Skip debug values.
  1128. while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
  1129. ++NextMBBI;
  1130. if (NextMBBI == EndMBBI)
  1131. return EndMBBI;
  1132. unsigned Off = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
  1133. if (Off) {
  1134. Offset = Off;
  1135. return NextMBBI;
  1136. }
  1137. // SP can only be combined if it is the next instruction after the original
  1138. // MBBI, otherwise we may be incrementing the stack pointer (invalidating
  1139. // anything below the new pointer) when its frame elements are still in
  1140. // use. Other registers can attempt to look further, until a different use
  1141. // or def of the register is found.
  1142. if (Reg == ARM::SP || NextMBBI->readsRegister(Reg, TRI) ||
  1143. NextMBBI->definesRegister(Reg, TRI))
  1144. return EndMBBI;
  1145. ++NextMBBI;
  1146. }
  1147. return EndMBBI;
  1148. }
  1149. /// Fold proceeding/trailing inc/dec of base register into the
  1150. /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
  1151. ///
  1152. /// stmia rn, <ra, rb, rc>
  1153. /// rn := rn + 4 * 3;
  1154. /// =>
  1155. /// stmia rn!, <ra, rb, rc>
  1156. ///
  1157. /// rn := rn - 4 * 3;
  1158. /// ldmia rn, <ra, rb, rc>
  1159. /// =>
  1160. /// ldmdb rn!, <ra, rb, rc>
  1161. bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
  1162. // Thumb1 is already using updating loads/stores.
  1163. if (isThumb1) return false;
  1164. LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
  1165. const MachineOperand &BaseOP = MI->getOperand(0);
  1166. Register Base = BaseOP.getReg();
  1167. bool BaseKill = BaseOP.isKill();
  1168. Register PredReg;
  1169. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1170. unsigned Opcode = MI->getOpcode();
  1171. DebugLoc DL = MI->getDebugLoc();
  1172. // Can't use an updating ld/st if the base register is also a dest
  1173. // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
  1174. for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
  1175. if (MO.getReg() == Base)
  1176. return false;
  1177. int Bytes = getLSMultipleTransferSize(MI);
  1178. MachineBasicBlock &MBB = *MI->getParent();
  1179. MachineBasicBlock::iterator MBBI(MI);
  1180. int Offset;
  1181. MachineBasicBlock::iterator MergeInstr
  1182. = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
  1183. ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
  1184. if (Mode == ARM_AM::ia && Offset == -Bytes) {
  1185. Mode = ARM_AM::db;
  1186. } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
  1187. Mode = ARM_AM::da;
  1188. } else {
  1189. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
  1190. if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
  1191. ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
  1192. // We couldn't find an inc/dec to merge. But if the base is dead, we
  1193. // can still change to a writeback form as that will save us 2 bytes
  1194. // of code size. It can create WAW hazards though, so only do it if
  1195. // we're minimizing code size.
  1196. if (!STI->hasMinSize() || !BaseKill)
  1197. return false;
  1198. bool HighRegsUsed = false;
  1199. for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
  1200. if (MO.getReg() >= ARM::R8) {
  1201. HighRegsUsed = true;
  1202. break;
  1203. }
  1204. if (!HighRegsUsed)
  1205. MergeInstr = MBB.end();
  1206. else
  1207. return false;
  1208. }
  1209. }
  1210. if (MergeInstr != MBB.end()) {
  1211. LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
  1212. MBB.erase(MergeInstr);
  1213. }
  1214. unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
  1215. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
  1216. .addReg(Base, getDefRegState(true)) // WB base register
  1217. .addReg(Base, getKillRegState(BaseKill))
  1218. .addImm(Pred).addReg(PredReg);
  1219. // Transfer the rest of operands.
  1220. for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3))
  1221. MIB.add(MO);
  1222. // Transfer memoperands.
  1223. MIB.setMemRefs(MI->memoperands());
  1224. LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
  1225. MBB.erase(MBBI);
  1226. return true;
  1227. }
  1228. static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
  1229. ARM_AM::AddrOpc Mode) {
  1230. switch (Opc) {
  1231. case ARM::LDRi12:
  1232. return ARM::LDR_PRE_IMM;
  1233. case ARM::STRi12:
  1234. return ARM::STR_PRE_IMM;
  1235. case ARM::VLDRS:
  1236. return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
  1237. case ARM::VLDRD:
  1238. return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
  1239. case ARM::VSTRS:
  1240. return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
  1241. case ARM::VSTRD:
  1242. return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
  1243. case ARM::t2LDRi8:
  1244. case ARM::t2LDRi12:
  1245. return ARM::t2LDR_PRE;
  1246. case ARM::t2STRi8:
  1247. case ARM::t2STRi12:
  1248. return ARM::t2STR_PRE;
  1249. default: llvm_unreachable("Unhandled opcode!");
  1250. }
  1251. }
  1252. static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
  1253. ARM_AM::AddrOpc Mode) {
  1254. switch (Opc) {
  1255. case ARM::LDRi12:
  1256. return ARM::LDR_POST_IMM;
  1257. case ARM::STRi12:
  1258. return ARM::STR_POST_IMM;
  1259. case ARM::VLDRS:
  1260. return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
  1261. case ARM::VLDRD:
  1262. return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
  1263. case ARM::VSTRS:
  1264. return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
  1265. case ARM::VSTRD:
  1266. return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
  1267. case ARM::t2LDRi8:
  1268. case ARM::t2LDRi12:
  1269. return ARM::t2LDR_POST;
  1270. case ARM::t2LDRBi8:
  1271. case ARM::t2LDRBi12:
  1272. return ARM::t2LDRB_POST;
  1273. case ARM::t2LDRSBi8:
  1274. case ARM::t2LDRSBi12:
  1275. return ARM::t2LDRSB_POST;
  1276. case ARM::t2LDRHi8:
  1277. case ARM::t2LDRHi12:
  1278. return ARM::t2LDRH_POST;
  1279. case ARM::t2LDRSHi8:
  1280. case ARM::t2LDRSHi12:
  1281. return ARM::t2LDRSH_POST;
  1282. case ARM::t2STRi8:
  1283. case ARM::t2STRi12:
  1284. return ARM::t2STR_POST;
  1285. case ARM::t2STRBi8:
  1286. case ARM::t2STRBi12:
  1287. return ARM::t2STRB_POST;
  1288. case ARM::t2STRHi8:
  1289. case ARM::t2STRHi12:
  1290. return ARM::t2STRH_POST;
  1291. case ARM::MVE_VLDRBS16:
  1292. return ARM::MVE_VLDRBS16_post;
  1293. case ARM::MVE_VLDRBS32:
  1294. return ARM::MVE_VLDRBS32_post;
  1295. case ARM::MVE_VLDRBU16:
  1296. return ARM::MVE_VLDRBU16_post;
  1297. case ARM::MVE_VLDRBU32:
  1298. return ARM::MVE_VLDRBU32_post;
  1299. case ARM::MVE_VLDRHS32:
  1300. return ARM::MVE_VLDRHS32_post;
  1301. case ARM::MVE_VLDRHU32:
  1302. return ARM::MVE_VLDRHU32_post;
  1303. case ARM::MVE_VLDRBU8:
  1304. return ARM::MVE_VLDRBU8_post;
  1305. case ARM::MVE_VLDRHU16:
  1306. return ARM::MVE_VLDRHU16_post;
  1307. case ARM::MVE_VLDRWU32:
  1308. return ARM::MVE_VLDRWU32_post;
  1309. case ARM::MVE_VSTRB16:
  1310. return ARM::MVE_VSTRB16_post;
  1311. case ARM::MVE_VSTRB32:
  1312. return ARM::MVE_VSTRB32_post;
  1313. case ARM::MVE_VSTRH32:
  1314. return ARM::MVE_VSTRH32_post;
  1315. case ARM::MVE_VSTRBU8:
  1316. return ARM::MVE_VSTRBU8_post;
  1317. case ARM::MVE_VSTRHU16:
  1318. return ARM::MVE_VSTRHU16_post;
  1319. case ARM::MVE_VSTRWU32:
  1320. return ARM::MVE_VSTRWU32_post;
  1321. default: llvm_unreachable("Unhandled opcode!");
  1322. }
  1323. }
  1324. /// Fold proceeding/trailing inc/dec of base register into the
  1325. /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
  1326. bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
  1327. // Thumb1 doesn't have updating LDR/STR.
  1328. // FIXME: Use LDM/STM with single register instead.
  1329. if (isThumb1) return false;
  1330. LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
  1331. Register Base = getLoadStoreBaseOp(*MI).getReg();
  1332. bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
  1333. unsigned Opcode = MI->getOpcode();
  1334. DebugLoc DL = MI->getDebugLoc();
  1335. bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
  1336. Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
  1337. bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
  1338. if (isi32Load(Opcode) || isi32Store(Opcode))
  1339. if (MI->getOperand(2).getImm() != 0)
  1340. return false;
  1341. if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
  1342. return false;
  1343. // Can't do the merge if the destination register is the same as the would-be
  1344. // writeback register.
  1345. if (MI->getOperand(0).getReg() == Base)
  1346. return false;
  1347. Register PredReg;
  1348. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1349. int Bytes = getLSMultipleTransferSize(MI);
  1350. MachineBasicBlock &MBB = *MI->getParent();
  1351. MachineBasicBlock::iterator MBBI(MI);
  1352. int Offset;
  1353. MachineBasicBlock::iterator MergeInstr
  1354. = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
  1355. unsigned NewOpc;
  1356. if (!isAM5 && Offset == Bytes) {
  1357. NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
  1358. } else if (Offset == -Bytes) {
  1359. NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
  1360. } else {
  1361. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
  1362. if (MergeInstr == MBB.end())
  1363. return false;
  1364. NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
  1365. if ((isAM5 && Offset != Bytes) ||
  1366. (!isAM5 && !isLegalAddressImm(NewOpc, Offset, TII))) {
  1367. NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
  1368. if (isAM5 || !isLegalAddressImm(NewOpc, Offset, TII))
  1369. return false;
  1370. }
  1371. }
  1372. LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
  1373. MBB.erase(MergeInstr);
  1374. ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
  1375. bool isLd = isLoadSingle(Opcode);
  1376. if (isAM5) {
  1377. // VLDM[SD]_UPD, VSTM[SD]_UPD
  1378. // (There are no base-updating versions of VLDR/VSTR instructions, but the
  1379. // updating load/store-multiple instructions can be used with only one
  1380. // register.)
  1381. MachineOperand &MO = MI->getOperand(0);
  1382. auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
  1383. .addReg(Base, getDefRegState(true)) // WB base register
  1384. .addReg(Base, getKillRegState(isLd ? BaseKill : false))
  1385. .addImm(Pred)
  1386. .addReg(PredReg)
  1387. .addReg(MO.getReg(), (isLd ? getDefRegState(true)
  1388. : getKillRegState(MO.isKill())))
  1389. .cloneMemRefs(*MI);
  1390. (void)MIB;
  1391. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1392. } else if (isLd) {
  1393. if (isAM2) {
  1394. // LDR_PRE, LDR_POST
  1395. if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
  1396. auto MIB =
  1397. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1398. .addReg(Base, RegState::Define)
  1399. .addReg(Base)
  1400. .addImm(Offset)
  1401. .addImm(Pred)
  1402. .addReg(PredReg)
  1403. .cloneMemRefs(*MI);
  1404. (void)MIB;
  1405. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1406. } else {
  1407. int Imm = ARM_AM::getAM2Opc(AddSub, abs(Offset), ARM_AM::no_shift);
  1408. auto MIB =
  1409. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1410. .addReg(Base, RegState::Define)
  1411. .addReg(Base)
  1412. .addReg(0)
  1413. .addImm(Imm)
  1414. .add(predOps(Pred, PredReg))
  1415. .cloneMemRefs(*MI);
  1416. (void)MIB;
  1417. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1418. }
  1419. } else {
  1420. // t2LDR_PRE, t2LDR_POST
  1421. auto MIB =
  1422. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1423. .addReg(Base, RegState::Define)
  1424. .addReg(Base)
  1425. .addImm(Offset)
  1426. .add(predOps(Pred, PredReg))
  1427. .cloneMemRefs(*MI);
  1428. (void)MIB;
  1429. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1430. }
  1431. } else {
  1432. MachineOperand &MO = MI->getOperand(0);
  1433. // FIXME: post-indexed stores use am2offset_imm, which still encodes
  1434. // the vestigal zero-reg offset register. When that's fixed, this clause
  1435. // can be removed entirely.
  1436. if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
  1437. int Imm = ARM_AM::getAM2Opc(AddSub, abs(Offset), ARM_AM::no_shift);
  1438. // STR_PRE, STR_POST
  1439. auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
  1440. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  1441. .addReg(Base)
  1442. .addReg(0)
  1443. .addImm(Imm)
  1444. .add(predOps(Pred, PredReg))
  1445. .cloneMemRefs(*MI);
  1446. (void)MIB;
  1447. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1448. } else {
  1449. // t2STR_PRE, t2STR_POST
  1450. auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
  1451. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  1452. .addReg(Base)
  1453. .addImm(Offset)
  1454. .add(predOps(Pred, PredReg))
  1455. .cloneMemRefs(*MI);
  1456. (void)MIB;
  1457. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1458. }
  1459. }
  1460. MBB.erase(MBBI);
  1461. return true;
  1462. }
  1463. bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
  1464. unsigned Opcode = MI.getOpcode();
  1465. assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
  1466. "Must have t2STRDi8 or t2LDRDi8");
  1467. if (MI.getOperand(3).getImm() != 0)
  1468. return false;
  1469. LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
  1470. // Behaviour for writeback is undefined if base register is the same as one
  1471. // of the others.
  1472. const MachineOperand &BaseOp = MI.getOperand(2);
  1473. Register Base = BaseOp.getReg();
  1474. const MachineOperand &Reg0Op = MI.getOperand(0);
  1475. const MachineOperand &Reg1Op = MI.getOperand(1);
  1476. if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
  1477. return false;
  1478. Register PredReg;
  1479. ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
  1480. MachineBasicBlock::iterator MBBI(MI);
  1481. MachineBasicBlock &MBB = *MI.getParent();
  1482. int Offset;
  1483. MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
  1484. PredReg, Offset);
  1485. unsigned NewOpc;
  1486. if (Offset == 8 || Offset == -8) {
  1487. NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
  1488. } else {
  1489. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
  1490. if (MergeInstr == MBB.end())
  1491. return false;
  1492. NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
  1493. if (!isLegalAddressImm(NewOpc, Offset, TII))
  1494. return false;
  1495. }
  1496. LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
  1497. MBB.erase(MergeInstr);
  1498. DebugLoc DL = MI.getDebugLoc();
  1499. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
  1500. if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
  1501. MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
  1502. } else {
  1503. assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
  1504. MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
  1505. }
  1506. MIB.addReg(BaseOp.getReg(), RegState::Kill)
  1507. .addImm(Offset).addImm(Pred).addReg(PredReg);
  1508. assert(TII->get(Opcode).getNumOperands() == 6 &&
  1509. TII->get(NewOpc).getNumOperands() == 7 &&
  1510. "Unexpected number of operands in Opcode specification.");
  1511. // Transfer implicit operands.
  1512. for (const MachineOperand &MO : MI.implicit_operands())
  1513. MIB.add(MO);
  1514. MIB.cloneMemRefs(MI);
  1515. LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
  1516. MBB.erase(MBBI);
  1517. return true;
  1518. }
  1519. /// Returns true if instruction is a memory operation that this pass is capable
  1520. /// of operating on.
  1521. static bool isMemoryOp(const MachineInstr &MI) {
  1522. unsigned Opcode = MI.getOpcode();
  1523. switch (Opcode) {
  1524. case ARM::VLDRS:
  1525. case ARM::VSTRS:
  1526. case ARM::VLDRD:
  1527. case ARM::VSTRD:
  1528. case ARM::LDRi12:
  1529. case ARM::STRi12:
  1530. case ARM::tLDRi:
  1531. case ARM::tSTRi:
  1532. case ARM::tLDRspi:
  1533. case ARM::tSTRspi:
  1534. case ARM::t2LDRi8:
  1535. case ARM::t2LDRi12:
  1536. case ARM::t2STRi8:
  1537. case ARM::t2STRi12:
  1538. break;
  1539. default:
  1540. return false;
  1541. }
  1542. if (!MI.getOperand(1).isReg())
  1543. return false;
  1544. // When no memory operands are present, conservatively assume unaligned,
  1545. // volatile, unfoldable.
  1546. if (!MI.hasOneMemOperand())
  1547. return false;
  1548. const MachineMemOperand &MMO = **MI.memoperands_begin();
  1549. // Don't touch volatile memory accesses - we may be changing their order.
  1550. // TODO: We could allow unordered and monotonic atomics here, but we need to
  1551. // make sure the resulting ldm/stm is correctly marked as atomic.
  1552. if (MMO.isVolatile() || MMO.isAtomic())
  1553. return false;
  1554. // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
  1555. // not.
  1556. if (MMO.getAlign() < Align(4))
  1557. return false;
  1558. // str <undef> could probably be eliminated entirely, but for now we just want
  1559. // to avoid making a mess of it.
  1560. // FIXME: Use str <undef> as a wildcard to enable better stm folding.
  1561. if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
  1562. return false;
  1563. // Likewise don't mess with references to undefined addresses.
  1564. if (MI.getOperand(1).isUndef())
  1565. return false;
  1566. return true;
  1567. }
  1568. static void InsertLDR_STR(MachineBasicBlock &MBB,
  1569. MachineBasicBlock::iterator &MBBI, int Offset,
  1570. bool isDef, unsigned NewOpc, unsigned Reg,
  1571. bool RegDeadKill, bool RegUndef, unsigned BaseReg,
  1572. bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
  1573. unsigned PredReg, const TargetInstrInfo *TII,
  1574. MachineInstr *MI) {
  1575. if (isDef) {
  1576. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1577. TII->get(NewOpc))
  1578. .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
  1579. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  1580. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1581. // FIXME: This is overly conservative; the new instruction accesses 4
  1582. // bytes, not 8.
  1583. MIB.cloneMemRefs(*MI);
  1584. } else {
  1585. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1586. TII->get(NewOpc))
  1587. .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
  1588. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  1589. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1590. // FIXME: This is overly conservative; the new instruction accesses 4
  1591. // bytes, not 8.
  1592. MIB.cloneMemRefs(*MI);
  1593. }
  1594. }
  1595. bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
  1596. MachineBasicBlock::iterator &MBBI) {
  1597. MachineInstr *MI = &*MBBI;
  1598. unsigned Opcode = MI->getOpcode();
  1599. // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
  1600. // if we see this opcode.
  1601. if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
  1602. return false;
  1603. const MachineOperand &BaseOp = MI->getOperand(2);
  1604. Register BaseReg = BaseOp.getReg();
  1605. Register EvenReg = MI->getOperand(0).getReg();
  1606. Register OddReg = MI->getOperand(1).getReg();
  1607. unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
  1608. unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
  1609. // ARM errata 602117: LDRD with base in list may result in incorrect base
  1610. // register when interrupted or faulted.
  1611. bool Errata602117 = EvenReg == BaseReg &&
  1612. (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
  1613. // ARM LDRD/STRD needs consecutive registers.
  1614. bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
  1615. (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
  1616. if (!Errata602117 && !NonConsecutiveRegs)
  1617. return false;
  1618. bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
  1619. bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
  1620. bool EvenDeadKill = isLd ?
  1621. MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
  1622. bool EvenUndef = MI->getOperand(0).isUndef();
  1623. bool OddDeadKill = isLd ?
  1624. MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
  1625. bool OddUndef = MI->getOperand(1).isUndef();
  1626. bool BaseKill = BaseOp.isKill();
  1627. bool BaseUndef = BaseOp.isUndef();
  1628. assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
  1629. "register offset not handled below");
  1630. int OffImm = getMemoryOpOffset(*MI);
  1631. Register PredReg;
  1632. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1633. if (OddRegNum > EvenRegNum && OffImm == 0) {
  1634. // Ascending register numbers and no offset. It's safe to change it to a
  1635. // ldm or stm.
  1636. unsigned NewOpc = (isLd)
  1637. ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
  1638. : (isT2 ? ARM::t2STMIA : ARM::STMIA);
  1639. if (isLd) {
  1640. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  1641. .addReg(BaseReg, getKillRegState(BaseKill))
  1642. .addImm(Pred).addReg(PredReg)
  1643. .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
  1644. .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
  1645. .cloneMemRefs(*MI);
  1646. ++NumLDRD2LDM;
  1647. } else {
  1648. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  1649. .addReg(BaseReg, getKillRegState(BaseKill))
  1650. .addImm(Pred).addReg(PredReg)
  1651. .addReg(EvenReg,
  1652. getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
  1653. .addReg(OddReg,
  1654. getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
  1655. .cloneMemRefs(*MI);
  1656. ++NumSTRD2STM;
  1657. }
  1658. } else {
  1659. // Split into two instructions.
  1660. unsigned NewOpc = (isLd)
  1661. ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
  1662. : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
  1663. // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
  1664. // so adjust and use t2LDRi12 here for that.
  1665. unsigned NewOpc2 = (isLd)
  1666. ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
  1667. : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
  1668. // If this is a load, make sure the first load does not clobber the base
  1669. // register before the second load reads it.
  1670. if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
  1671. assert(!TRI->regsOverlap(OddReg, BaseReg));
  1672. InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
  1673. false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
  1674. InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
  1675. false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
  1676. MI);
  1677. } else {
  1678. if (OddReg == EvenReg && EvenDeadKill) {
  1679. // If the two source operands are the same, the kill marker is
  1680. // probably on the first one. e.g.
  1681. // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
  1682. EvenDeadKill = false;
  1683. OddDeadKill = true;
  1684. }
  1685. // Never kill the base register in the first instruction.
  1686. if (EvenReg == BaseReg)
  1687. EvenDeadKill = false;
  1688. InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
  1689. EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
  1690. MI);
  1691. InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
  1692. OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
  1693. MI);
  1694. }
  1695. if (isLd)
  1696. ++NumLDRD2LDR;
  1697. else
  1698. ++NumSTRD2STR;
  1699. }
  1700. MBBI = MBB.erase(MBBI);
  1701. return true;
  1702. }
  1703. /// An optimization pass to turn multiple LDR / STR ops of the same base and
  1704. /// incrementing offset into LDM / STM ops.
  1705. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
  1706. MemOpQueue MemOps;
  1707. unsigned CurrBase = 0;
  1708. unsigned CurrOpc = ~0u;
  1709. ARMCC::CondCodes CurrPred = ARMCC::AL;
  1710. unsigned Position = 0;
  1711. assert(Candidates.size() == 0);
  1712. assert(MergeBaseCandidates.size() == 0);
  1713. LiveRegsValid = false;
  1714. for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
  1715. I = MBBI) {
  1716. // The instruction in front of the iterator is the one we look at.
  1717. MBBI = std::prev(I);
  1718. if (FixInvalidRegPairOp(MBB, MBBI))
  1719. continue;
  1720. ++Position;
  1721. if (isMemoryOp(*MBBI)) {
  1722. unsigned Opcode = MBBI->getOpcode();
  1723. const MachineOperand &MO = MBBI->getOperand(0);
  1724. Register Reg = MO.getReg();
  1725. Register Base = getLoadStoreBaseOp(*MBBI).getReg();
  1726. Register PredReg;
  1727. ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
  1728. int Offset = getMemoryOpOffset(*MBBI);
  1729. if (CurrBase == 0) {
  1730. // Start of a new chain.
  1731. CurrBase = Base;
  1732. CurrOpc = Opcode;
  1733. CurrPred = Pred;
  1734. MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
  1735. continue;
  1736. }
  1737. // Note: No need to match PredReg in the next if.
  1738. if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
  1739. // Watch out for:
  1740. // r4 := ldr [r0, #8]
  1741. // r4 := ldr [r0, #4]
  1742. // or
  1743. // r0 := ldr [r0]
  1744. // If a load overrides the base register or a register loaded by
  1745. // another load in our chain, we cannot take this instruction.
  1746. bool Overlap = false;
  1747. if (isLoadSingle(Opcode)) {
  1748. Overlap = (Base == Reg);
  1749. if (!Overlap) {
  1750. for (const MemOpQueueEntry &E : MemOps) {
  1751. if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
  1752. Overlap = true;
  1753. break;
  1754. }
  1755. }
  1756. }
  1757. }
  1758. if (!Overlap) {
  1759. // Check offset and sort memory operation into the current chain.
  1760. if (Offset > MemOps.back().Offset) {
  1761. MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
  1762. continue;
  1763. } else {
  1764. MemOpQueue::iterator MI, ME;
  1765. for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
  1766. if (Offset < MI->Offset) {
  1767. // Found a place to insert.
  1768. break;
  1769. }
  1770. if (Offset == MI->Offset) {
  1771. // Collision, abort.
  1772. MI = ME;
  1773. break;
  1774. }
  1775. }
  1776. if (MI != MemOps.end()) {
  1777. MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
  1778. continue;
  1779. }
  1780. }
  1781. }
  1782. }
  1783. // Don't advance the iterator; The op will start a new chain next.
  1784. MBBI = I;
  1785. --Position;
  1786. // Fallthrough to look into existing chain.
  1787. } else if (MBBI->isDebugInstr()) {
  1788. continue;
  1789. } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
  1790. MBBI->getOpcode() == ARM::t2STRDi8) {
  1791. // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
  1792. // remember them because we may still be able to merge add/sub into them.
  1793. MergeBaseCandidates.push_back(&*MBBI);
  1794. }
  1795. // If we are here then the chain is broken; Extract candidates for a merge.
  1796. if (MemOps.size() > 0) {
  1797. FormCandidates(MemOps);
  1798. // Reset for the next chain.
  1799. CurrBase = 0;
  1800. CurrOpc = ~0u;
  1801. CurrPred = ARMCC::AL;
  1802. MemOps.clear();
  1803. }
  1804. }
  1805. if (MemOps.size() > 0)
  1806. FormCandidates(MemOps);
  1807. // Sort candidates so they get processed from end to begin of the basic
  1808. // block later; This is necessary for liveness calculation.
  1809. auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
  1810. return M0->InsertPos < M1->InsertPos;
  1811. };
  1812. llvm::sort(Candidates, LessThan);
  1813. // Go through list of candidates and merge.
  1814. bool Changed = false;
  1815. for (const MergeCandidate *Candidate : Candidates) {
  1816. if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
  1817. MachineInstr *Merged = MergeOpsUpdate(*Candidate);
  1818. // Merge preceding/trailing base inc/dec into the merged op.
  1819. if (Merged) {
  1820. Changed = true;
  1821. unsigned Opcode = Merged->getOpcode();
  1822. if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
  1823. MergeBaseUpdateLSDouble(*Merged);
  1824. else
  1825. MergeBaseUpdateLSMultiple(Merged);
  1826. } else {
  1827. for (MachineInstr *MI : Candidate->Instrs) {
  1828. if (MergeBaseUpdateLoadStore(MI))
  1829. Changed = true;
  1830. }
  1831. }
  1832. } else {
  1833. assert(Candidate->Instrs.size() == 1);
  1834. if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
  1835. Changed = true;
  1836. }
  1837. }
  1838. Candidates.clear();
  1839. // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
  1840. for (MachineInstr *MI : MergeBaseCandidates)
  1841. MergeBaseUpdateLSDouble(*MI);
  1842. MergeBaseCandidates.clear();
  1843. return Changed;
  1844. }
  1845. /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
  1846. /// into the preceding stack restore so it directly restore the value of LR
  1847. /// into pc.
  1848. /// ldmfd sp!, {..., lr}
  1849. /// bx lr
  1850. /// or
  1851. /// ldmfd sp!, {..., lr}
  1852. /// mov pc, lr
  1853. /// =>
  1854. /// ldmfd sp!, {..., pc}
  1855. bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
  1856. // Thumb1 LDM doesn't allow high registers.
  1857. if (isThumb1) return false;
  1858. if (MBB.empty()) return false;
  1859. MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  1860. if (MBBI != MBB.begin() && MBBI != MBB.end() &&
  1861. (MBBI->getOpcode() == ARM::BX_RET ||
  1862. MBBI->getOpcode() == ARM::tBX_RET ||
  1863. MBBI->getOpcode() == ARM::MOVPCLR)) {
  1864. MachineBasicBlock::iterator PrevI = std::prev(MBBI);
  1865. // Ignore any debug instructions.
  1866. while (PrevI->isDebugInstr() && PrevI != MBB.begin())
  1867. --PrevI;
  1868. MachineInstr &PrevMI = *PrevI;
  1869. unsigned Opcode = PrevMI.getOpcode();
  1870. if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
  1871. Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
  1872. Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
  1873. MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
  1874. if (MO.getReg() != ARM::LR)
  1875. return false;
  1876. unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
  1877. assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
  1878. Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
  1879. PrevMI.setDesc(TII->get(NewOpc));
  1880. MO.setReg(ARM::PC);
  1881. PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
  1882. MBB.erase(MBBI);
  1883. // We now restore LR into PC so it is not live-out of the return block
  1884. // anymore: Clear the CSI Restored bit.
  1885. MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
  1886. // CSI should be fixed after PrologEpilog Insertion
  1887. assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid");
  1888. for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
  1889. if (Info.getReg() == ARM::LR) {
  1890. Info.setRestored(false);
  1891. break;
  1892. }
  1893. }
  1894. return true;
  1895. }
  1896. }
  1897. return false;
  1898. }
  1899. bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
  1900. MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
  1901. if (MBBI == MBB.begin() || MBBI == MBB.end() ||
  1902. MBBI->getOpcode() != ARM::tBX_RET)
  1903. return false;
  1904. MachineBasicBlock::iterator Prev = MBBI;
  1905. --Prev;
  1906. if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
  1907. return false;
  1908. for (auto Use : Prev->uses())
  1909. if (Use.isKill()) {
  1910. assert(STI->hasV4TOps());
  1911. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
  1912. .addReg(Use.getReg(), RegState::Kill)
  1913. .add(predOps(ARMCC::AL))
  1914. .copyImplicitOps(*MBBI);
  1915. MBB.erase(MBBI);
  1916. MBB.erase(Prev);
  1917. return true;
  1918. }
  1919. llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
  1920. }
  1921. bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1922. if (skipFunction(Fn.getFunction()))
  1923. return false;
  1924. MF = &Fn;
  1925. STI = &Fn.getSubtarget<ARMSubtarget>();
  1926. TL = STI->getTargetLowering();
  1927. AFI = Fn.getInfo<ARMFunctionInfo>();
  1928. TII = STI->getInstrInfo();
  1929. TRI = STI->getRegisterInfo();
  1930. RegClassInfoValid = false;
  1931. isThumb2 = AFI->isThumb2Function();
  1932. isThumb1 = AFI->isThumbFunction() && !isThumb2;
  1933. bool Modified = false;
  1934. for (MachineBasicBlock &MBB : Fn) {
  1935. Modified |= LoadStoreMultipleOpti(MBB);
  1936. if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
  1937. Modified |= MergeReturnIntoLDM(MBB);
  1938. if (isThumb1)
  1939. Modified |= CombineMovBx(MBB);
  1940. }
  1941. Allocator.DestroyAll();
  1942. return Modified;
  1943. }
  1944. #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
  1945. "ARM pre- register allocation load / store optimization pass"
  1946. namespace {
  1947. /// Pre- register allocation pass that move load / stores from consecutive
  1948. /// locations close to make it more likely they will be combined later.
  1949. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
  1950. static char ID;
  1951. AliasAnalysis *AA;
  1952. const DataLayout *TD;
  1953. const TargetInstrInfo *TII;
  1954. const TargetRegisterInfo *TRI;
  1955. const ARMSubtarget *STI;
  1956. MachineRegisterInfo *MRI;
  1957. MachineDominatorTree *DT;
  1958. MachineFunction *MF;
  1959. ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
  1960. bool runOnMachineFunction(MachineFunction &Fn) override;
  1961. StringRef getPassName() const override {
  1962. return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
  1963. }
  1964. void getAnalysisUsage(AnalysisUsage &AU) const override {
  1965. AU.addRequired<AAResultsWrapperPass>();
  1966. AU.addRequired<MachineDominatorTree>();
  1967. AU.addPreserved<MachineDominatorTree>();
  1968. MachineFunctionPass::getAnalysisUsage(AU);
  1969. }
  1970. private:
  1971. bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
  1972. unsigned &NewOpc, Register &EvenReg, Register &OddReg,
  1973. Register &BaseReg, int &Offset, Register &PredReg,
  1974. ARMCC::CondCodes &Pred, bool &isT2);
  1975. bool RescheduleOps(MachineBasicBlock *MBB,
  1976. SmallVectorImpl<MachineInstr *> &Ops,
  1977. unsigned Base, bool isLd,
  1978. DenseMap<MachineInstr*, unsigned> &MI2LocMap);
  1979. bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
  1980. bool DistributeIncrements();
  1981. bool DistributeIncrements(Register Base);
  1982. };
  1983. } // end anonymous namespace
  1984. char ARMPreAllocLoadStoreOpt::ID = 0;
  1985. INITIALIZE_PASS_BEGIN(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
  1986. ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
  1987. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  1988. INITIALIZE_PASS_END(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
  1989. ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
  1990. // Limit the number of instructions to be rescheduled.
  1991. // FIXME: tune this limit, and/or come up with some better heuristics.
  1992. static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
  1993. cl::init(8), cl::Hidden);
  1994. bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1995. if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
  1996. return false;
  1997. TD = &Fn.getDataLayout();
  1998. STI = &Fn.getSubtarget<ARMSubtarget>();
  1999. TII = STI->getInstrInfo();
  2000. TRI = STI->getRegisterInfo();
  2001. MRI = &Fn.getRegInfo();
  2002. DT = &getAnalysis<MachineDominatorTree>();
  2003. MF = &Fn;
  2004. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  2005. bool Modified = DistributeIncrements();
  2006. for (MachineBasicBlock &MFI : Fn)
  2007. Modified |= RescheduleLoadStoreInstrs(&MFI);
  2008. return Modified;
  2009. }
  2010. static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
  2011. MachineBasicBlock::iterator I,
  2012. MachineBasicBlock::iterator E,
  2013. SmallPtrSetImpl<MachineInstr*> &MemOps,
  2014. SmallSet<unsigned, 4> &MemRegs,
  2015. const TargetRegisterInfo *TRI,
  2016. AliasAnalysis *AA) {
  2017. // Are there stores / loads / calls between them?
  2018. SmallSet<unsigned, 4> AddedRegPressure;
  2019. while (++I != E) {
  2020. if (I->isDebugInstr() || MemOps.count(&*I))
  2021. continue;
  2022. if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
  2023. return false;
  2024. if (I->mayStore() || (!isLd && I->mayLoad()))
  2025. for (MachineInstr *MemOp : MemOps)
  2026. if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
  2027. return false;
  2028. for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
  2029. MachineOperand &MO = I->getOperand(j);
  2030. if (!MO.isReg())
  2031. continue;
  2032. Register Reg = MO.getReg();
  2033. if (MO.isDef() && TRI->regsOverlap(Reg, Base))
  2034. return false;
  2035. if (Reg != Base && !MemRegs.count(Reg))
  2036. AddedRegPressure.insert(Reg);
  2037. }
  2038. }
  2039. // Estimate register pressure increase due to the transformation.
  2040. if (MemRegs.size() <= 4)
  2041. // Ok if we are moving small number of instructions.
  2042. return true;
  2043. return AddedRegPressure.size() <= MemRegs.size() * 2;
  2044. }
  2045. bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
  2046. MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc,
  2047. Register &FirstReg, Register &SecondReg, Register &BaseReg, int &Offset,
  2048. Register &PredReg, ARMCC::CondCodes &Pred, bool &isT2) {
  2049. // Make sure we're allowed to generate LDRD/STRD.
  2050. if (!STI->hasV5TEOps())
  2051. return false;
  2052. // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
  2053. unsigned Scale = 1;
  2054. unsigned Opcode = Op0->getOpcode();
  2055. if (Opcode == ARM::LDRi12) {
  2056. NewOpc = ARM::LDRD;
  2057. } else if (Opcode == ARM::STRi12) {
  2058. NewOpc = ARM::STRD;
  2059. } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
  2060. NewOpc = ARM::t2LDRDi8;
  2061. Scale = 4;
  2062. isT2 = true;
  2063. } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
  2064. NewOpc = ARM::t2STRDi8;
  2065. Scale = 4;
  2066. isT2 = true;
  2067. } else {
  2068. return false;
  2069. }
  2070. // Make sure the base address satisfies i64 ld / st alignment requirement.
  2071. // At the moment, we ignore the memoryoperand's value.
  2072. // If we want to use AliasAnalysis, we should check it accordingly.
  2073. if (!Op0->hasOneMemOperand() ||
  2074. (*Op0->memoperands_begin())->isVolatile() ||
  2075. (*Op0->memoperands_begin())->isAtomic())
  2076. return false;
  2077. Align Alignment = (*Op0->memoperands_begin())->getAlign();
  2078. const Function &Func = MF->getFunction();
  2079. Align ReqAlign =
  2080. STI->hasV6Ops() ? TD->getABITypeAlign(Type::getInt64Ty(Func.getContext()))
  2081. : Align(8); // Pre-v6 need 8-byte align
  2082. if (Alignment < ReqAlign)
  2083. return false;
  2084. // Then make sure the immediate offset fits.
  2085. int OffImm = getMemoryOpOffset(*Op0);
  2086. if (isT2) {
  2087. int Limit = (1 << 8) * Scale;
  2088. if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
  2089. return false;
  2090. Offset = OffImm;
  2091. } else {
  2092. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  2093. if (OffImm < 0) {
  2094. AddSub = ARM_AM::sub;
  2095. OffImm = - OffImm;
  2096. }
  2097. int Limit = (1 << 8) * Scale;
  2098. if (OffImm >= Limit || (OffImm & (Scale-1)))
  2099. return false;
  2100. Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
  2101. }
  2102. FirstReg = Op0->getOperand(0).getReg();
  2103. SecondReg = Op1->getOperand(0).getReg();
  2104. if (FirstReg == SecondReg)
  2105. return false;
  2106. BaseReg = Op0->getOperand(1).getReg();
  2107. Pred = getInstrPredicate(*Op0, PredReg);
  2108. dl = Op0->getDebugLoc();
  2109. return true;
  2110. }
  2111. bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
  2112. SmallVectorImpl<MachineInstr *> &Ops,
  2113. unsigned Base, bool isLd,
  2114. DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
  2115. bool RetVal = false;
  2116. // Sort by offset (in reverse order).
  2117. llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
  2118. int LOffset = getMemoryOpOffset(*LHS);
  2119. int ROffset = getMemoryOpOffset(*RHS);
  2120. assert(LHS == RHS || LOffset != ROffset);
  2121. return LOffset > ROffset;
  2122. });
  2123. // The loads / stores of the same base are in order. Scan them from first to
  2124. // last and check for the following:
  2125. // 1. Any def of base.
  2126. // 2. Any gaps.
  2127. while (Ops.size() > 1) {
  2128. unsigned FirstLoc = ~0U;
  2129. unsigned LastLoc = 0;
  2130. MachineInstr *FirstOp = nullptr;
  2131. MachineInstr *LastOp = nullptr;
  2132. int LastOffset = 0;
  2133. unsigned LastOpcode = 0;
  2134. unsigned LastBytes = 0;
  2135. unsigned NumMove = 0;
  2136. for (MachineInstr *Op : llvm::reverse(Ops)) {
  2137. // Make sure each operation has the same kind.
  2138. unsigned LSMOpcode
  2139. = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
  2140. if (LastOpcode && LSMOpcode != LastOpcode)
  2141. break;
  2142. // Check that we have a continuous set of offsets.
  2143. int Offset = getMemoryOpOffset(*Op);
  2144. unsigned Bytes = getLSMultipleTransferSize(Op);
  2145. if (LastBytes) {
  2146. if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
  2147. break;
  2148. }
  2149. // Don't try to reschedule too many instructions.
  2150. if (NumMove == InstReorderLimit)
  2151. break;
  2152. // Found a mergable instruction; save information about it.
  2153. ++NumMove;
  2154. LastOffset = Offset;
  2155. LastBytes = Bytes;
  2156. LastOpcode = LSMOpcode;
  2157. unsigned Loc = MI2LocMap[Op];
  2158. if (Loc <= FirstLoc) {
  2159. FirstLoc = Loc;
  2160. FirstOp = Op;
  2161. }
  2162. if (Loc >= LastLoc) {
  2163. LastLoc = Loc;
  2164. LastOp = Op;
  2165. }
  2166. }
  2167. if (NumMove <= 1)
  2168. Ops.pop_back();
  2169. else {
  2170. SmallPtrSet<MachineInstr*, 4> MemOps;
  2171. SmallSet<unsigned, 4> MemRegs;
  2172. for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
  2173. MemOps.insert(Ops[i]);
  2174. MemRegs.insert(Ops[i]->getOperand(0).getReg());
  2175. }
  2176. // Be conservative, if the instructions are too far apart, don't
  2177. // move them. We want to limit the increase of register pressure.
  2178. bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
  2179. if (DoMove)
  2180. DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
  2181. MemOps, MemRegs, TRI, AA);
  2182. if (!DoMove) {
  2183. for (unsigned i = 0; i != NumMove; ++i)
  2184. Ops.pop_back();
  2185. } else {
  2186. // This is the new location for the loads / stores.
  2187. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
  2188. while (InsertPos != MBB->end() &&
  2189. (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
  2190. ++InsertPos;
  2191. // If we are moving a pair of loads / stores, see if it makes sense
  2192. // to try to allocate a pair of registers that can form register pairs.
  2193. MachineInstr *Op0 = Ops.back();
  2194. MachineInstr *Op1 = Ops[Ops.size()-2];
  2195. Register FirstReg, SecondReg;
  2196. Register BaseReg, PredReg;
  2197. ARMCC::CondCodes Pred = ARMCC::AL;
  2198. bool isT2 = false;
  2199. unsigned NewOpc = 0;
  2200. int Offset = 0;
  2201. DebugLoc dl;
  2202. if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
  2203. FirstReg, SecondReg, BaseReg,
  2204. Offset, PredReg, Pred, isT2)) {
  2205. Ops.pop_back();
  2206. Ops.pop_back();
  2207. const MCInstrDesc &MCID = TII->get(NewOpc);
  2208. const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
  2209. MRI->constrainRegClass(FirstReg, TRC);
  2210. MRI->constrainRegClass(SecondReg, TRC);
  2211. // Form the pair instruction.
  2212. if (isLd) {
  2213. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
  2214. .addReg(FirstReg, RegState::Define)
  2215. .addReg(SecondReg, RegState::Define)
  2216. .addReg(BaseReg);
  2217. // FIXME: We're converting from LDRi12 to an insn that still
  2218. // uses addrmode2, so we need an explicit offset reg. It should
  2219. // always by reg0 since we're transforming LDRi12s.
  2220. if (!isT2)
  2221. MIB.addReg(0);
  2222. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  2223. MIB.cloneMergedMemRefs({Op0, Op1});
  2224. LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
  2225. ++NumLDRDFormed;
  2226. } else {
  2227. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
  2228. .addReg(FirstReg)
  2229. .addReg(SecondReg)
  2230. .addReg(BaseReg);
  2231. // FIXME: We're converting from LDRi12 to an insn that still
  2232. // uses addrmode2, so we need an explicit offset reg. It should
  2233. // always by reg0 since we're transforming STRi12s.
  2234. if (!isT2)
  2235. MIB.addReg(0);
  2236. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  2237. MIB.cloneMergedMemRefs({Op0, Op1});
  2238. LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
  2239. ++NumSTRDFormed;
  2240. }
  2241. MBB->erase(Op0);
  2242. MBB->erase(Op1);
  2243. if (!isT2) {
  2244. // Add register allocation hints to form register pairs.
  2245. MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
  2246. MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
  2247. }
  2248. } else {
  2249. for (unsigned i = 0; i != NumMove; ++i) {
  2250. MachineInstr *Op = Ops.pop_back_val();
  2251. MBB->splice(InsertPos, MBB, Op);
  2252. }
  2253. }
  2254. NumLdStMoved += NumMove;
  2255. RetVal = true;
  2256. }
  2257. }
  2258. }
  2259. return RetVal;
  2260. }
  2261. bool
  2262. ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
  2263. bool RetVal = false;
  2264. DenseMap<MachineInstr*, unsigned> MI2LocMap;
  2265. using MapIt = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator;
  2266. using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
  2267. using BaseVec = SmallVector<unsigned, 4>;
  2268. Base2InstMap Base2LdsMap;
  2269. Base2InstMap Base2StsMap;
  2270. BaseVec LdBases;
  2271. BaseVec StBases;
  2272. unsigned Loc = 0;
  2273. MachineBasicBlock::iterator MBBI = MBB->begin();
  2274. MachineBasicBlock::iterator E = MBB->end();
  2275. while (MBBI != E) {
  2276. for (; MBBI != E; ++MBBI) {
  2277. MachineInstr &MI = *MBBI;
  2278. if (MI.isCall() || MI.isTerminator()) {
  2279. // Stop at barriers.
  2280. ++MBBI;
  2281. break;
  2282. }
  2283. if (!MI.isDebugInstr())
  2284. MI2LocMap[&MI] = ++Loc;
  2285. if (!isMemoryOp(MI))
  2286. continue;
  2287. Register PredReg;
  2288. if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
  2289. continue;
  2290. int Opc = MI.getOpcode();
  2291. bool isLd = isLoadSingle(Opc);
  2292. Register Base = MI.getOperand(1).getReg();
  2293. int Offset = getMemoryOpOffset(MI);
  2294. bool StopHere = false;
  2295. auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) {
  2296. MapIt BI = Base2Ops.find(Base);
  2297. if (BI == Base2Ops.end()) {
  2298. Base2Ops[Base].push_back(&MI);
  2299. Bases.push_back(Base);
  2300. return;
  2301. }
  2302. for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
  2303. if (Offset == getMemoryOpOffset(*BI->second[i])) {
  2304. StopHere = true;
  2305. break;
  2306. }
  2307. }
  2308. if (!StopHere)
  2309. BI->second.push_back(&MI);
  2310. };
  2311. if (isLd)
  2312. FindBases(Base2LdsMap, LdBases);
  2313. else
  2314. FindBases(Base2StsMap, StBases);
  2315. if (StopHere) {
  2316. // Found a duplicate (a base+offset combination that's seen earlier).
  2317. // Backtrack.
  2318. --Loc;
  2319. break;
  2320. }
  2321. }
  2322. // Re-schedule loads.
  2323. for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
  2324. unsigned Base = LdBases[i];
  2325. SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
  2326. if (Lds.size() > 1)
  2327. RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
  2328. }
  2329. // Re-schedule stores.
  2330. for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
  2331. unsigned Base = StBases[i];
  2332. SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
  2333. if (Sts.size() > 1)
  2334. RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
  2335. }
  2336. if (MBBI != E) {
  2337. Base2LdsMap.clear();
  2338. Base2StsMap.clear();
  2339. LdBases.clear();
  2340. StBases.clear();
  2341. }
  2342. }
  2343. return RetVal;
  2344. }
  2345. // Get the Base register operand index from the memory access MachineInst if we
  2346. // should attempt to distribute postinc on it. Return -1 if not of a valid
  2347. // instruction type. If it returns an index, it is assumed that instruction is a
  2348. // r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
  2349. static int getBaseOperandIndex(MachineInstr &MI) {
  2350. switch (MI.getOpcode()) {
  2351. case ARM::MVE_VLDRBS16:
  2352. case ARM::MVE_VLDRBS32:
  2353. case ARM::MVE_VLDRBU16:
  2354. case ARM::MVE_VLDRBU32:
  2355. case ARM::MVE_VLDRHS32:
  2356. case ARM::MVE_VLDRHU32:
  2357. case ARM::MVE_VLDRBU8:
  2358. case ARM::MVE_VLDRHU16:
  2359. case ARM::MVE_VLDRWU32:
  2360. case ARM::MVE_VSTRB16:
  2361. case ARM::MVE_VSTRB32:
  2362. case ARM::MVE_VSTRH32:
  2363. case ARM::MVE_VSTRBU8:
  2364. case ARM::MVE_VSTRHU16:
  2365. case ARM::MVE_VSTRWU32:
  2366. case ARM::t2LDRHi8:
  2367. case ARM::t2LDRHi12:
  2368. case ARM::t2LDRSHi8:
  2369. case ARM::t2LDRSHi12:
  2370. case ARM::t2LDRBi8:
  2371. case ARM::t2LDRBi12:
  2372. case ARM::t2LDRSBi8:
  2373. case ARM::t2LDRSBi12:
  2374. case ARM::t2STRBi8:
  2375. case ARM::t2STRBi12:
  2376. case ARM::t2STRHi8:
  2377. case ARM::t2STRHi12:
  2378. return 1;
  2379. case ARM::MVE_VLDRBS16_post:
  2380. case ARM::MVE_VLDRBS32_post:
  2381. case ARM::MVE_VLDRBU16_post:
  2382. case ARM::MVE_VLDRBU32_post:
  2383. case ARM::MVE_VLDRHS32_post:
  2384. case ARM::MVE_VLDRHU32_post:
  2385. case ARM::MVE_VLDRBU8_post:
  2386. case ARM::MVE_VLDRHU16_post:
  2387. case ARM::MVE_VLDRWU32_post:
  2388. case ARM::MVE_VSTRB16_post:
  2389. case ARM::MVE_VSTRB32_post:
  2390. case ARM::MVE_VSTRH32_post:
  2391. case ARM::MVE_VSTRBU8_post:
  2392. case ARM::MVE_VSTRHU16_post:
  2393. case ARM::MVE_VSTRWU32_post:
  2394. case ARM::MVE_VLDRBS16_pre:
  2395. case ARM::MVE_VLDRBS32_pre:
  2396. case ARM::MVE_VLDRBU16_pre:
  2397. case ARM::MVE_VLDRBU32_pre:
  2398. case ARM::MVE_VLDRHS32_pre:
  2399. case ARM::MVE_VLDRHU32_pre:
  2400. case ARM::MVE_VLDRBU8_pre:
  2401. case ARM::MVE_VLDRHU16_pre:
  2402. case ARM::MVE_VLDRWU32_pre:
  2403. case ARM::MVE_VSTRB16_pre:
  2404. case ARM::MVE_VSTRB32_pre:
  2405. case ARM::MVE_VSTRH32_pre:
  2406. case ARM::MVE_VSTRBU8_pre:
  2407. case ARM::MVE_VSTRHU16_pre:
  2408. case ARM::MVE_VSTRWU32_pre:
  2409. return 2;
  2410. }
  2411. return -1;
  2412. }
  2413. static bool isPostIndex(MachineInstr &MI) {
  2414. switch (MI.getOpcode()) {
  2415. case ARM::MVE_VLDRBS16_post:
  2416. case ARM::MVE_VLDRBS32_post:
  2417. case ARM::MVE_VLDRBU16_post:
  2418. case ARM::MVE_VLDRBU32_post:
  2419. case ARM::MVE_VLDRHS32_post:
  2420. case ARM::MVE_VLDRHU32_post:
  2421. case ARM::MVE_VLDRBU8_post:
  2422. case ARM::MVE_VLDRHU16_post:
  2423. case ARM::MVE_VLDRWU32_post:
  2424. case ARM::MVE_VSTRB16_post:
  2425. case ARM::MVE_VSTRB32_post:
  2426. case ARM::MVE_VSTRH32_post:
  2427. case ARM::MVE_VSTRBU8_post:
  2428. case ARM::MVE_VSTRHU16_post:
  2429. case ARM::MVE_VSTRWU32_post:
  2430. return true;
  2431. }
  2432. return false;
  2433. }
  2434. static bool isPreIndex(MachineInstr &MI) {
  2435. switch (MI.getOpcode()) {
  2436. case ARM::MVE_VLDRBS16_pre:
  2437. case ARM::MVE_VLDRBS32_pre:
  2438. case ARM::MVE_VLDRBU16_pre:
  2439. case ARM::MVE_VLDRBU32_pre:
  2440. case ARM::MVE_VLDRHS32_pre:
  2441. case ARM::MVE_VLDRHU32_pre:
  2442. case ARM::MVE_VLDRBU8_pre:
  2443. case ARM::MVE_VLDRHU16_pre:
  2444. case ARM::MVE_VLDRWU32_pre:
  2445. case ARM::MVE_VSTRB16_pre:
  2446. case ARM::MVE_VSTRB32_pre:
  2447. case ARM::MVE_VSTRH32_pre:
  2448. case ARM::MVE_VSTRBU8_pre:
  2449. case ARM::MVE_VSTRHU16_pre:
  2450. case ARM::MVE_VSTRWU32_pre:
  2451. return true;
  2452. }
  2453. return false;
  2454. }
  2455. // Given a memory access Opcode, check that the give Imm would be a valid Offset
  2456. // for this instruction (same as isLegalAddressImm), Or if the instruction
  2457. // could be easily converted to one where that was valid. For example converting
  2458. // t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
  2459. // AdjustBaseAndOffset below.
  2460. static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
  2461. const TargetInstrInfo *TII,
  2462. int &CodesizeEstimate) {
  2463. if (isLegalAddressImm(Opcode, Imm, TII))
  2464. return true;
  2465. // We can convert AddrModeT2_i12 to AddrModeT2_i8neg.
  2466. const MCInstrDesc &Desc = TII->get(Opcode);
  2467. unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
  2468. switch (AddrMode) {
  2469. case ARMII::AddrModeT2_i12:
  2470. CodesizeEstimate += 1;
  2471. return Imm < 0 && -Imm < ((1 << 8) * 1);
  2472. }
  2473. return false;
  2474. }
  2475. // Given an MI adjust its address BaseReg to use NewBaseReg and address offset
  2476. // by -Offset. This can either happen in-place or be a replacement as MI is
  2477. // converted to another instruction type.
  2478. static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg,
  2479. int Offset, const TargetInstrInfo *TII,
  2480. const TargetRegisterInfo *TRI) {
  2481. // Set the Base reg
  2482. unsigned BaseOp = getBaseOperandIndex(*MI);
  2483. MI->getOperand(BaseOp).setReg(NewBaseReg);
  2484. // and constrain the reg class to that required by the instruction.
  2485. MachineFunction *MF = MI->getMF();
  2486. MachineRegisterInfo &MRI = MF->getRegInfo();
  2487. const MCInstrDesc &MCID = TII->get(MI->getOpcode());
  2488. const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp, TRI, *MF);
  2489. MRI.constrainRegClass(NewBaseReg, TRC);
  2490. int OldOffset = MI->getOperand(BaseOp + 1).getImm();
  2491. if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
  2492. MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
  2493. else {
  2494. unsigned ConvOpcode;
  2495. switch (MI->getOpcode()) {
  2496. case ARM::t2LDRHi12:
  2497. ConvOpcode = ARM::t2LDRHi8;
  2498. break;
  2499. case ARM::t2LDRSHi12:
  2500. ConvOpcode = ARM::t2LDRSHi8;
  2501. break;
  2502. case ARM::t2LDRBi12:
  2503. ConvOpcode = ARM::t2LDRBi8;
  2504. break;
  2505. case ARM::t2LDRSBi12:
  2506. ConvOpcode = ARM::t2LDRSBi8;
  2507. break;
  2508. case ARM::t2STRHi12:
  2509. ConvOpcode = ARM::t2STRHi8;
  2510. break;
  2511. case ARM::t2STRBi12:
  2512. ConvOpcode = ARM::t2STRBi8;
  2513. break;
  2514. default:
  2515. llvm_unreachable("Unhandled convertable opcode");
  2516. }
  2517. assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
  2518. "Illegal Address Immediate after convert!");
  2519. const MCInstrDesc &MCID = TII->get(ConvOpcode);
  2520. BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
  2521. .add(MI->getOperand(0))
  2522. .add(MI->getOperand(1))
  2523. .addImm(OldOffset - Offset)
  2524. .add(MI->getOperand(3))
  2525. .add(MI->getOperand(4))
  2526. .cloneMemRefs(*MI);
  2527. MI->eraseFromParent();
  2528. }
  2529. }
  2530. static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
  2531. Register NewReg,
  2532. const TargetInstrInfo *TII,
  2533. const TargetRegisterInfo *TRI) {
  2534. MachineFunction *MF = MI->getMF();
  2535. MachineRegisterInfo &MRI = MF->getRegInfo();
  2536. unsigned NewOpcode = getPostIndexedLoadStoreOpcode(
  2537. MI->getOpcode(), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
  2538. const MCInstrDesc &MCID = TII->get(NewOpcode);
  2539. // Constrain the def register class
  2540. const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
  2541. MRI.constrainRegClass(NewReg, TRC);
  2542. // And do the same for the base operand
  2543. TRC = TII->getRegClass(MCID, 2, TRI, *MF);
  2544. MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
  2545. unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
  2546. switch (AddrMode) {
  2547. case ARMII::AddrModeT2_i7:
  2548. case ARMII::AddrModeT2_i7s2:
  2549. case ARMII::AddrModeT2_i7s4:
  2550. // Any MVE load/store
  2551. return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
  2552. .addReg(NewReg, RegState::Define)
  2553. .add(MI->getOperand(0))
  2554. .add(MI->getOperand(1))
  2555. .addImm(Offset)
  2556. .add(MI->getOperand(3))
  2557. .add(MI->getOperand(4))
  2558. .add(MI->getOperand(5))
  2559. .cloneMemRefs(*MI);
  2560. case ARMII::AddrModeT2_i8:
  2561. if (MI->mayLoad()) {
  2562. return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
  2563. .add(MI->getOperand(0))
  2564. .addReg(NewReg, RegState::Define)
  2565. .add(MI->getOperand(1))
  2566. .addImm(Offset)
  2567. .add(MI->getOperand(3))
  2568. .add(MI->getOperand(4))
  2569. .cloneMemRefs(*MI);
  2570. } else {
  2571. return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
  2572. .addReg(NewReg, RegState::Define)
  2573. .add(MI->getOperand(0))
  2574. .add(MI->getOperand(1))
  2575. .addImm(Offset)
  2576. .add(MI->getOperand(3))
  2577. .add(MI->getOperand(4))
  2578. .cloneMemRefs(*MI);
  2579. }
  2580. default:
  2581. llvm_unreachable("Unhandled createPostIncLoadStore");
  2582. }
  2583. }
  2584. // Given a Base Register, optimise the load/store uses to attempt to create more
  2585. // post-inc accesses and less register moves. We do this by taking zero offset
  2586. // loads/stores with an add, and convert them to a postinc load/store of the
  2587. // same type. Any subsequent accesses will be adjusted to use and account for
  2588. // the post-inc value.
  2589. // For example:
  2590. // LDR #0 LDR_POSTINC #16
  2591. // LDR #4 LDR #-12
  2592. // LDR #8 LDR #-8
  2593. // LDR #12 LDR #-4
  2594. // ADD #16
  2595. //
  2596. // At the same time if we do not find an increment but do find an existing
  2597. // pre/post inc instruction, we can still adjust the offsets of subsequent
  2598. // instructions to save the register move that would otherwise be needed for the
  2599. // in-place increment.
  2600. bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
  2601. // We are looking for:
  2602. // One zero offset load/store that can become postinc
  2603. MachineInstr *BaseAccess = nullptr;
  2604. MachineInstr *PrePostInc = nullptr;
  2605. // An increment that can be folded in
  2606. MachineInstr *Increment = nullptr;
  2607. // Other accesses after BaseAccess that will need to be updated to use the
  2608. // postinc value.
  2609. SmallPtrSet<MachineInstr *, 8> OtherAccesses;
  2610. for (auto &Use : MRI->use_nodbg_instructions(Base)) {
  2611. if (!Increment && getAddSubImmediate(Use) != 0) {
  2612. Increment = &Use;
  2613. continue;
  2614. }
  2615. int BaseOp = getBaseOperandIndex(Use);
  2616. if (BaseOp == -1)
  2617. return false;
  2618. if (!Use.getOperand(BaseOp).isReg() ||
  2619. Use.getOperand(BaseOp).getReg() != Base)
  2620. return false;
  2621. if (isPreIndex(Use) || isPostIndex(Use))
  2622. PrePostInc = &Use;
  2623. else if (Use.getOperand(BaseOp + 1).getImm() == 0)
  2624. BaseAccess = &Use;
  2625. else
  2626. OtherAccesses.insert(&Use);
  2627. }
  2628. int IncrementOffset;
  2629. Register NewBaseReg;
  2630. if (BaseAccess && Increment) {
  2631. if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
  2632. return false;
  2633. Register PredReg;
  2634. if (Increment->definesRegister(ARM::CPSR) ||
  2635. getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
  2636. return false;
  2637. LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
  2638. << Base.virtRegIndex() << "\n");
  2639. // Make sure that Increment has no uses before BaseAccess that are not PHI
  2640. // uses.
  2641. for (MachineInstr &Use :
  2642. MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
  2643. if (&Use == BaseAccess || (Use.getOpcode() != TargetOpcode::PHI &&
  2644. !DT->dominates(BaseAccess, &Use))) {
  2645. LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
  2646. return false;
  2647. }
  2648. }
  2649. // Make sure that Increment can be folded into Base
  2650. IncrementOffset = getAddSubImmediate(*Increment);
  2651. unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
  2652. BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
  2653. if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
  2654. LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
  2655. return false;
  2656. }
  2657. }
  2658. else if (PrePostInc) {
  2659. // If we already have a pre/post index load/store then set BaseAccess,
  2660. // IncrementOffset and NewBaseReg to the values it already produces,
  2661. // allowing us to update and subsequent uses of BaseOp reg with the
  2662. // incremented value.
  2663. if (Increment)
  2664. return false;
  2665. LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
  2666. << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
  2667. int BaseOp = getBaseOperandIndex(*PrePostInc);
  2668. IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
  2669. BaseAccess = PrePostInc;
  2670. NewBaseReg = PrePostInc->getOperand(0).getReg();
  2671. }
  2672. else
  2673. return false;
  2674. // And make sure that the negative value of increment can be added to all
  2675. // other offsets after the BaseAccess. We rely on either
  2676. // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
  2677. // to keep things simple.
  2678. // This also adds a simple codesize metric, to detect if an instruction (like
  2679. // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
  2680. // cannot because it is converted to something else (t2LDRBi8). We start this
  2681. // at -1 for the gain from removing the increment.
  2682. SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
  2683. int CodesizeEstimate = -1;
  2684. for (auto *Use : OtherAccesses) {
  2685. if (DT->dominates(BaseAccess, Use)) {
  2686. SuccessorAccesses.insert(Use);
  2687. unsigned BaseOp = getBaseOperandIndex(*Use);
  2688. if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
  2689. Use->getOperand(BaseOp + 1).getImm() -
  2690. IncrementOffset,
  2691. TII, CodesizeEstimate)) {
  2692. LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
  2693. return false;
  2694. }
  2695. } else if (!DT->dominates(Use, BaseAccess)) {
  2696. LLVM_DEBUG(
  2697. dbgs() << " Unknown dominance relation between Base and Use\n");
  2698. return false;
  2699. }
  2700. }
  2701. if (STI->hasMinSize() && CodesizeEstimate > 0) {
  2702. LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
  2703. return false;
  2704. }
  2705. if (!PrePostInc) {
  2706. // Replace BaseAccess with a post inc
  2707. LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
  2708. LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
  2709. NewBaseReg = Increment->getOperand(0).getReg();
  2710. MachineInstr *BaseAccessPost =
  2711. createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
  2712. BaseAccess->eraseFromParent();
  2713. Increment->eraseFromParent();
  2714. (void)BaseAccessPost;
  2715. LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
  2716. }
  2717. for (auto *Use : SuccessorAccesses) {
  2718. LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
  2719. AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII, TRI);
  2720. LLVM_DEBUG(dbgs() << " To : "; Use->dump());
  2721. }
  2722. // Remove the kill flag from all uses of NewBaseReg, in case any old uses
  2723. // remain.
  2724. for (MachineOperand &Op : MRI->use_nodbg_operands(NewBaseReg))
  2725. Op.setIsKill(false);
  2726. return true;
  2727. }
  2728. bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
  2729. bool Changed = false;
  2730. SmallSetVector<Register, 4> Visited;
  2731. for (auto &MBB : *MF) {
  2732. for (auto &MI : MBB) {
  2733. int BaseOp = getBaseOperandIndex(MI);
  2734. if (BaseOp == -1 || !MI.getOperand(BaseOp).isReg())
  2735. continue;
  2736. Register Base = MI.getOperand(BaseOp).getReg();
  2737. if (!Base.isVirtual() || Visited.count(Base))
  2738. continue;
  2739. Visited.insert(Base);
  2740. }
  2741. }
  2742. for (auto Base : Visited)
  2743. Changed |= DistributeIncrements(Base);
  2744. return Changed;
  2745. }
  2746. /// Returns an instance of the load / store optimization pass.
  2747. FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
  2748. if (PreAlloc)
  2749. return new ARMPreAllocLoadStoreOpt();
  2750. return new ARMLoadStoreOpt();
  2751. }