ARMLoadStoreOptimizer.cpp 105 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019
  1. //===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file This file contains a pass that performs load / store related peephole
  10. /// optimizations. This pass should be run after register allocation.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "ARM.h"
  14. #include "ARMBaseInstrInfo.h"
  15. #include "ARMBaseRegisterInfo.h"
  16. #include "ARMISelLowering.h"
  17. #include "ARMMachineFunctionInfo.h"
  18. #include "ARMSubtarget.h"
  19. #include "MCTargetDesc/ARMAddressingModes.h"
  20. #include "MCTargetDesc/ARMBaseInfo.h"
  21. #include "Utils/ARMBaseInfo.h"
  22. #include "llvm/ADT/ArrayRef.h"
  23. #include "llvm/ADT/DenseMap.h"
  24. #include "llvm/ADT/DenseSet.h"
  25. #include "llvm/ADT/STLExtras.h"
  26. #include "llvm/ADT/SmallPtrSet.h"
  27. #include "llvm/ADT/SmallSet.h"
  28. #include "llvm/ADT/SmallVector.h"
  29. #include "llvm/ADT/Statistic.h"
  30. #include "llvm/ADT/iterator_range.h"
  31. #include "llvm/Analysis/AliasAnalysis.h"
  32. #include "llvm/CodeGen/LivePhysRegs.h"
  33. #include "llvm/CodeGen/MachineBasicBlock.h"
  34. #include "llvm/CodeGen/MachineDominators.h"
  35. #include "llvm/CodeGen/MachineFunction.h"
  36. #include "llvm/CodeGen/MachineFunctionPass.h"
  37. #include "llvm/CodeGen/MachineInstr.h"
  38. #include "llvm/CodeGen/MachineInstrBuilder.h"
  39. #include "llvm/CodeGen/MachineMemOperand.h"
  40. #include "llvm/CodeGen/MachineOperand.h"
  41. #include "llvm/CodeGen/MachineRegisterInfo.h"
  42. #include "llvm/CodeGen/RegisterClassInfo.h"
  43. #include "llvm/CodeGen/TargetFrameLowering.h"
  44. #include "llvm/CodeGen/TargetInstrInfo.h"
  45. #include "llvm/CodeGen/TargetLowering.h"
  46. #include "llvm/CodeGen/TargetRegisterInfo.h"
  47. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  48. #include "llvm/IR/DataLayout.h"
  49. #include "llvm/IR/DebugLoc.h"
  50. #include "llvm/IR/DerivedTypes.h"
  51. #include "llvm/IR/Function.h"
  52. #include "llvm/IR/Type.h"
  53. #include "llvm/InitializePasses.h"
  54. #include "llvm/MC/MCInstrDesc.h"
  55. #include "llvm/Pass.h"
  56. #include "llvm/Support/Allocator.h"
  57. #include "llvm/Support/CommandLine.h"
  58. #include "llvm/Support/Debug.h"
  59. #include "llvm/Support/ErrorHandling.h"
  60. #include "llvm/Support/raw_ostream.h"
  61. #include <algorithm>
  62. #include <cassert>
  63. #include <cstddef>
  64. #include <cstdlib>
  65. #include <iterator>
  66. #include <limits>
  67. #include <utility>
  68. using namespace llvm;
  69. #define DEBUG_TYPE "arm-ldst-opt"
  70. STATISTIC(NumLDMGened , "Number of ldm instructions generated");
  71. STATISTIC(NumSTMGened , "Number of stm instructions generated");
  72. STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
  73. STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
  74. STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
  75. STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
  76. STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
  77. STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
  78. STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
  79. STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
  80. STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
  81. /// This switch disables formation of double/multi instructions that could
  82. /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
  83. /// disabled. This can be used to create libraries that are robust even when
  84. /// users provoke undefined behaviour by supplying misaligned pointers.
  85. /// \see mayCombineMisaligned()
  86. static cl::opt<bool>
  87. AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
  88. cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
  89. #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
  90. namespace {
  91. /// Post- register allocation pass the combine load / store instructions to
  92. /// form ldm / stm instructions.
  93. struct ARMLoadStoreOpt : public MachineFunctionPass {
  94. static char ID;
  95. const MachineFunction *MF;
  96. const TargetInstrInfo *TII;
  97. const TargetRegisterInfo *TRI;
  98. const ARMSubtarget *STI;
  99. const TargetLowering *TL;
  100. ARMFunctionInfo *AFI;
  101. LivePhysRegs LiveRegs;
  102. RegisterClassInfo RegClassInfo;
  103. MachineBasicBlock::const_iterator LiveRegPos;
  104. bool LiveRegsValid;
  105. bool RegClassInfoValid;
  106. bool isThumb1, isThumb2;
  107. ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
  108. bool runOnMachineFunction(MachineFunction &Fn) override;
  109. MachineFunctionProperties getRequiredProperties() const override {
  110. return MachineFunctionProperties().set(
  111. MachineFunctionProperties::Property::NoVRegs);
  112. }
  113. StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
  114. private:
  115. /// A set of load/store MachineInstrs with same base register sorted by
  116. /// offset.
  117. struct MemOpQueueEntry {
  118. MachineInstr *MI;
  119. int Offset; ///< Load/Store offset.
  120. unsigned Position; ///< Position as counted from end of basic block.
  121. MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
  122. : MI(&MI), Offset(Offset), Position(Position) {}
  123. };
  124. using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
  125. /// A set of MachineInstrs that fulfill (nearly all) conditions to get
  126. /// merged into a LDM/STM.
  127. struct MergeCandidate {
  128. /// List of instructions ordered by load/store offset.
  129. SmallVector<MachineInstr*, 4> Instrs;
  130. /// Index in Instrs of the instruction being latest in the schedule.
  131. unsigned LatestMIIdx;
  132. /// Index in Instrs of the instruction being earliest in the schedule.
  133. unsigned EarliestMIIdx;
  134. /// Index into the basic block where the merged instruction will be
  135. /// inserted. (See MemOpQueueEntry.Position)
  136. unsigned InsertPos;
  137. /// Whether the instructions can be merged into a ldm/stm instruction.
  138. bool CanMergeToLSMulti;
  139. /// Whether the instructions can be merged into a ldrd/strd instruction.
  140. bool CanMergeToLSDouble;
  141. };
  142. SpecificBumpPtrAllocator<MergeCandidate> Allocator;
  143. SmallVector<const MergeCandidate*,4> Candidates;
  144. SmallVector<MachineInstr*,4> MergeBaseCandidates;
  145. void moveLiveRegsBefore(const MachineBasicBlock &MBB,
  146. MachineBasicBlock::const_iterator Before);
  147. unsigned findFreeReg(const TargetRegisterClass &RegClass);
  148. void UpdateBaseRegUses(MachineBasicBlock &MBB,
  149. MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
  150. unsigned Base, unsigned WordOffset,
  151. ARMCC::CondCodes Pred, unsigned PredReg);
  152. MachineInstr *CreateLoadStoreMulti(
  153. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  154. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  155. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  156. ArrayRef<std::pair<unsigned, bool>> Regs,
  157. ArrayRef<MachineInstr*> Instrs);
  158. MachineInstr *CreateLoadStoreDouble(
  159. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  160. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  161. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  162. ArrayRef<std::pair<unsigned, bool>> Regs,
  163. ArrayRef<MachineInstr*> Instrs) const;
  164. void FormCandidates(const MemOpQueue &MemOps);
  165. MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
  166. bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
  167. MachineBasicBlock::iterator &MBBI);
  168. bool MergeBaseUpdateLoadStore(MachineInstr *MI);
  169. bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
  170. bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
  171. bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
  172. bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
  173. bool CombineMovBx(MachineBasicBlock &MBB);
  174. };
  175. } // end anonymous namespace
  176. char ARMLoadStoreOpt::ID = 0;
  177. INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
  178. false)
  179. static bool definesCPSR(const MachineInstr &MI) {
  180. for (const auto &MO : MI.operands()) {
  181. if (!MO.isReg())
  182. continue;
  183. if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
  184. // If the instruction has live CPSR def, then it's not safe to fold it
  185. // into load / store.
  186. return true;
  187. }
  188. return false;
  189. }
  190. static int getMemoryOpOffset(const MachineInstr &MI) {
  191. unsigned Opcode = MI.getOpcode();
  192. bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
  193. unsigned NumOperands = MI.getDesc().getNumOperands();
  194. unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
  195. if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
  196. Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
  197. Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
  198. Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
  199. return OffField;
  200. // Thumb1 immediate offsets are scaled by 4
  201. if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
  202. Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
  203. return OffField * 4;
  204. int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
  205. : ARM_AM::getAM5Offset(OffField) * 4;
  206. ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
  207. : ARM_AM::getAM5Op(OffField);
  208. if (Op == ARM_AM::sub)
  209. return -Offset;
  210. return Offset;
  211. }
  212. static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
  213. return MI.getOperand(1);
  214. }
  215. static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
  216. return MI.getOperand(0);
  217. }
  218. static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
  219. switch (Opcode) {
  220. default: llvm_unreachable("Unhandled opcode!");
  221. case ARM::LDRi12:
  222. ++NumLDMGened;
  223. switch (Mode) {
  224. default: llvm_unreachable("Unhandled submode!");
  225. case ARM_AM::ia: return ARM::LDMIA;
  226. case ARM_AM::da: return ARM::LDMDA;
  227. case ARM_AM::db: return ARM::LDMDB;
  228. case ARM_AM::ib: return ARM::LDMIB;
  229. }
  230. case ARM::STRi12:
  231. ++NumSTMGened;
  232. switch (Mode) {
  233. default: llvm_unreachable("Unhandled submode!");
  234. case ARM_AM::ia: return ARM::STMIA;
  235. case ARM_AM::da: return ARM::STMDA;
  236. case ARM_AM::db: return ARM::STMDB;
  237. case ARM_AM::ib: return ARM::STMIB;
  238. }
  239. case ARM::tLDRi:
  240. case ARM::tLDRspi:
  241. // tLDMIA is writeback-only - unless the base register is in the input
  242. // reglist.
  243. ++NumLDMGened;
  244. switch (Mode) {
  245. default: llvm_unreachable("Unhandled submode!");
  246. case ARM_AM::ia: return ARM::tLDMIA;
  247. }
  248. case ARM::tSTRi:
  249. case ARM::tSTRspi:
  250. // There is no non-writeback tSTMIA either.
  251. ++NumSTMGened;
  252. switch (Mode) {
  253. default: llvm_unreachable("Unhandled submode!");
  254. case ARM_AM::ia: return ARM::tSTMIA_UPD;
  255. }
  256. case ARM::t2LDRi8:
  257. case ARM::t2LDRi12:
  258. ++NumLDMGened;
  259. switch (Mode) {
  260. default: llvm_unreachable("Unhandled submode!");
  261. case ARM_AM::ia: return ARM::t2LDMIA;
  262. case ARM_AM::db: return ARM::t2LDMDB;
  263. }
  264. case ARM::t2STRi8:
  265. case ARM::t2STRi12:
  266. ++NumSTMGened;
  267. switch (Mode) {
  268. default: llvm_unreachable("Unhandled submode!");
  269. case ARM_AM::ia: return ARM::t2STMIA;
  270. case ARM_AM::db: return ARM::t2STMDB;
  271. }
  272. case ARM::VLDRS:
  273. ++NumVLDMGened;
  274. switch (Mode) {
  275. default: llvm_unreachable("Unhandled submode!");
  276. case ARM_AM::ia: return ARM::VLDMSIA;
  277. case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
  278. }
  279. case ARM::VSTRS:
  280. ++NumVSTMGened;
  281. switch (Mode) {
  282. default: llvm_unreachable("Unhandled submode!");
  283. case ARM_AM::ia: return ARM::VSTMSIA;
  284. case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
  285. }
  286. case ARM::VLDRD:
  287. ++NumVLDMGened;
  288. switch (Mode) {
  289. default: llvm_unreachable("Unhandled submode!");
  290. case ARM_AM::ia: return ARM::VLDMDIA;
  291. case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
  292. }
  293. case ARM::VSTRD:
  294. ++NumVSTMGened;
  295. switch (Mode) {
  296. default: llvm_unreachable("Unhandled submode!");
  297. case ARM_AM::ia: return ARM::VSTMDIA;
  298. case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
  299. }
  300. }
  301. }
  302. static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
  303. switch (Opcode) {
  304. default: llvm_unreachable("Unhandled opcode!");
  305. case ARM::LDMIA_RET:
  306. case ARM::LDMIA:
  307. case ARM::LDMIA_UPD:
  308. case ARM::STMIA:
  309. case ARM::STMIA_UPD:
  310. case ARM::tLDMIA:
  311. case ARM::tLDMIA_UPD:
  312. case ARM::tSTMIA_UPD:
  313. case ARM::t2LDMIA_RET:
  314. case ARM::t2LDMIA:
  315. case ARM::t2LDMIA_UPD:
  316. case ARM::t2STMIA:
  317. case ARM::t2STMIA_UPD:
  318. case ARM::VLDMSIA:
  319. case ARM::VLDMSIA_UPD:
  320. case ARM::VSTMSIA:
  321. case ARM::VSTMSIA_UPD:
  322. case ARM::VLDMDIA:
  323. case ARM::VLDMDIA_UPD:
  324. case ARM::VSTMDIA:
  325. case ARM::VSTMDIA_UPD:
  326. return ARM_AM::ia;
  327. case ARM::LDMDA:
  328. case ARM::LDMDA_UPD:
  329. case ARM::STMDA:
  330. case ARM::STMDA_UPD:
  331. return ARM_AM::da;
  332. case ARM::LDMDB:
  333. case ARM::LDMDB_UPD:
  334. case ARM::STMDB:
  335. case ARM::STMDB_UPD:
  336. case ARM::t2LDMDB:
  337. case ARM::t2LDMDB_UPD:
  338. case ARM::t2STMDB:
  339. case ARM::t2STMDB_UPD:
  340. case ARM::VLDMSDB_UPD:
  341. case ARM::VSTMSDB_UPD:
  342. case ARM::VLDMDDB_UPD:
  343. case ARM::VSTMDDB_UPD:
  344. return ARM_AM::db;
  345. case ARM::LDMIB:
  346. case ARM::LDMIB_UPD:
  347. case ARM::STMIB:
  348. case ARM::STMIB_UPD:
  349. return ARM_AM::ib;
  350. }
  351. }
  352. static bool isT1i32Load(unsigned Opc) {
  353. return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
  354. }
  355. static bool isT2i32Load(unsigned Opc) {
  356. return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
  357. }
  358. static bool isi32Load(unsigned Opc) {
  359. return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
  360. }
  361. static bool isT1i32Store(unsigned Opc) {
  362. return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
  363. }
  364. static bool isT2i32Store(unsigned Opc) {
  365. return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
  366. }
  367. static bool isi32Store(unsigned Opc) {
  368. return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
  369. }
  370. static bool isLoadSingle(unsigned Opc) {
  371. return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
  372. }
  373. static unsigned getImmScale(unsigned Opc) {
  374. switch (Opc) {
  375. default: llvm_unreachable("Unhandled opcode!");
  376. case ARM::tLDRi:
  377. case ARM::tSTRi:
  378. case ARM::tLDRspi:
  379. case ARM::tSTRspi:
  380. return 1;
  381. case ARM::tLDRHi:
  382. case ARM::tSTRHi:
  383. return 2;
  384. case ARM::tLDRBi:
  385. case ARM::tSTRBi:
  386. return 4;
  387. }
  388. }
  389. static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
  390. switch (MI->getOpcode()) {
  391. default: return 0;
  392. case ARM::LDRi12:
  393. case ARM::STRi12:
  394. case ARM::tLDRi:
  395. case ARM::tSTRi:
  396. case ARM::tLDRspi:
  397. case ARM::tSTRspi:
  398. case ARM::t2LDRi8:
  399. case ARM::t2LDRi12:
  400. case ARM::t2STRi8:
  401. case ARM::t2STRi12:
  402. case ARM::VLDRS:
  403. case ARM::VSTRS:
  404. return 4;
  405. case ARM::VLDRD:
  406. case ARM::VSTRD:
  407. return 8;
  408. case ARM::LDMIA:
  409. case ARM::LDMDA:
  410. case ARM::LDMDB:
  411. case ARM::LDMIB:
  412. case ARM::STMIA:
  413. case ARM::STMDA:
  414. case ARM::STMDB:
  415. case ARM::STMIB:
  416. case ARM::tLDMIA:
  417. case ARM::tLDMIA_UPD:
  418. case ARM::tSTMIA_UPD:
  419. case ARM::t2LDMIA:
  420. case ARM::t2LDMDB:
  421. case ARM::t2STMIA:
  422. case ARM::t2STMDB:
  423. case ARM::VLDMSIA:
  424. case ARM::VSTMSIA:
  425. return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
  426. case ARM::VLDMDIA:
  427. case ARM::VSTMDIA:
  428. return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
  429. }
  430. }
  431. /// Update future uses of the base register with the offset introduced
  432. /// due to writeback. This function only works on Thumb1.
  433. void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
  434. MachineBasicBlock::iterator MBBI,
  435. const DebugLoc &DL, unsigned Base,
  436. unsigned WordOffset,
  437. ARMCC::CondCodes Pred,
  438. unsigned PredReg) {
  439. assert(isThumb1 && "Can only update base register uses for Thumb1!");
  440. // Start updating any instructions with immediate offsets. Insert a SUB before
  441. // the first non-updateable instruction (if any).
  442. for (; MBBI != MBB.end(); ++MBBI) {
  443. bool InsertSub = false;
  444. unsigned Opc = MBBI->getOpcode();
  445. if (MBBI->readsRegister(Base)) {
  446. int Offset;
  447. bool IsLoad =
  448. Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
  449. bool IsStore =
  450. Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
  451. if (IsLoad || IsStore) {
  452. // Loads and stores with immediate offsets can be updated, but only if
  453. // the new offset isn't negative.
  454. // The MachineOperand containing the offset immediate is the last one
  455. // before predicates.
  456. MachineOperand &MO =
  457. MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
  458. // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
  459. Offset = MO.getImm() - WordOffset * getImmScale(Opc);
  460. // If storing the base register, it needs to be reset first.
  461. Register InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
  462. if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
  463. MO.setImm(Offset);
  464. else
  465. InsertSub = true;
  466. } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
  467. !definesCPSR(*MBBI)) {
  468. // SUBS/ADDS using this register, with a dead def of the CPSR.
  469. // Merge it with the update; if the merged offset is too large,
  470. // insert a new sub instead.
  471. MachineOperand &MO =
  472. MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
  473. Offset = (Opc == ARM::tSUBi8) ?
  474. MO.getImm() + WordOffset * 4 :
  475. MO.getImm() - WordOffset * 4 ;
  476. if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
  477. // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
  478. // Offset == 0.
  479. MO.setImm(Offset);
  480. // The base register has now been reset, so exit early.
  481. return;
  482. } else {
  483. InsertSub = true;
  484. }
  485. } else {
  486. // Can't update the instruction.
  487. InsertSub = true;
  488. }
  489. } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
  490. // Since SUBS sets the condition flags, we can't place the base reset
  491. // after an instruction that has a live CPSR def.
  492. // The base register might also contain an argument for a function call.
  493. InsertSub = true;
  494. }
  495. if (InsertSub) {
  496. // An instruction above couldn't be updated, so insert a sub.
  497. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
  498. .add(t1CondCodeOp(true))
  499. .addReg(Base)
  500. .addImm(WordOffset * 4)
  501. .addImm(Pred)
  502. .addReg(PredReg);
  503. return;
  504. }
  505. if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
  506. // Register got killed. Stop updating.
  507. return;
  508. }
  509. // End of block was reached.
  510. if (!MBB.succ_empty()) {
  511. // FIXME: Because of a bug, live registers are sometimes missing from
  512. // the successor blocks' live-in sets. This means we can't trust that
  513. // information and *always* have to reset at the end of a block.
  514. // See PR21029.
  515. if (MBBI != MBB.end()) --MBBI;
  516. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
  517. .add(t1CondCodeOp(true))
  518. .addReg(Base)
  519. .addImm(WordOffset * 4)
  520. .addImm(Pred)
  521. .addReg(PredReg);
  522. }
  523. }
  524. /// Return the first register of class \p RegClass that is not in \p Regs.
  525. unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
  526. if (!RegClassInfoValid) {
  527. RegClassInfo.runOnMachineFunction(*MF);
  528. RegClassInfoValid = true;
  529. }
  530. for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
  531. if (LiveRegs.available(MF->getRegInfo(), Reg))
  532. return Reg;
  533. return 0;
  534. }
  535. /// Compute live registers just before instruction \p Before (in normal schedule
  536. /// direction). Computes backwards so multiple queries in the same block must
  537. /// come in reverse order.
  538. void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
  539. MachineBasicBlock::const_iterator Before) {
  540. // Initialize if we never queried in this block.
  541. if (!LiveRegsValid) {
  542. LiveRegs.init(*TRI);
  543. LiveRegs.addLiveOuts(MBB);
  544. LiveRegPos = MBB.end();
  545. LiveRegsValid = true;
  546. }
  547. // Move backward just before the "Before" position.
  548. while (LiveRegPos != Before) {
  549. --LiveRegPos;
  550. LiveRegs.stepBackward(*LiveRegPos);
  551. }
  552. }
  553. static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
  554. unsigned Reg) {
  555. for (const std::pair<unsigned, bool> &R : Regs)
  556. if (R.first == Reg)
  557. return true;
  558. return false;
  559. }
  560. /// Create and insert a LDM or STM with Base as base register and registers in
  561. /// Regs as the register operands that would be loaded / stored. It returns
  562. /// true if the transformation is done.
  563. MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
  564. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  565. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  566. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  567. ArrayRef<std::pair<unsigned, bool>> Regs,
  568. ArrayRef<MachineInstr*> Instrs) {
  569. unsigned NumRegs = Regs.size();
  570. assert(NumRegs > 1);
  571. // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
  572. // Compute liveness information for that register to make the decision.
  573. bool SafeToClobberCPSR = !isThumb1 ||
  574. (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
  575. MachineBasicBlock::LQR_Dead);
  576. bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
  577. // Exception: If the base register is in the input reglist, Thumb1 LDM is
  578. // non-writeback.
  579. // It's also not possible to merge an STR of the base register in Thumb1.
  580. if (isThumb1 && ContainsReg(Regs, Base)) {
  581. assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
  582. if (Opcode == ARM::tLDRi)
  583. Writeback = false;
  584. else if (Opcode == ARM::tSTRi)
  585. return nullptr;
  586. }
  587. ARM_AM::AMSubMode Mode = ARM_AM::ia;
  588. // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
  589. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
  590. bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
  591. if (Offset == 4 && haveIBAndDA) {
  592. Mode = ARM_AM::ib;
  593. } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
  594. Mode = ARM_AM::da;
  595. } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
  596. // VLDM/VSTM do not support DB mode without also updating the base reg.
  597. Mode = ARM_AM::db;
  598. } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
  599. // Check if this is a supported opcode before inserting instructions to
  600. // calculate a new base register.
  601. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
  602. // If starting offset isn't zero, insert a MI to materialize a new base.
  603. // But only do so if it is cost effective, i.e. merging more than two
  604. // loads / stores.
  605. if (NumRegs <= 2)
  606. return nullptr;
  607. // On Thumb1, it's not worth materializing a new base register without
  608. // clobbering the CPSR (i.e. not using ADDS/SUBS).
  609. if (!SafeToClobberCPSR)
  610. return nullptr;
  611. unsigned NewBase;
  612. if (isi32Load(Opcode)) {
  613. // If it is a load, then just use one of the destination registers
  614. // as the new base. Will no longer be writeback in Thumb1.
  615. NewBase = Regs[NumRegs-1].first;
  616. Writeback = false;
  617. } else {
  618. // Find a free register that we can use as scratch register.
  619. moveLiveRegsBefore(MBB, InsertBefore);
  620. // The merged instruction does not exist yet but will use several Regs if
  621. // it is a Store.
  622. if (!isLoadSingle(Opcode))
  623. for (const std::pair<unsigned, bool> &R : Regs)
  624. LiveRegs.addReg(R.first);
  625. NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
  626. if (NewBase == 0)
  627. return nullptr;
  628. }
  629. int BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2ADDspImm
  630. : ARM::t2ADDri)
  631. : (isThumb1 && Base == ARM::SP)
  632. ? ARM::tADDrSPi
  633. : (isThumb1 && Offset < 8)
  634. ? ARM::tADDi3
  635. : isThumb1 ? ARM::tADDi8 : ARM::ADDri;
  636. if (Offset < 0) {
  637. // FIXME: There are no Thumb1 load/store instructions with negative
  638. // offsets. So the Base != ARM::SP might be unnecessary.
  639. Offset = -Offset;
  640. BaseOpc = isThumb2 ? (BaseKill && Base == ARM::SP ? ARM::t2SUBspImm
  641. : ARM::t2SUBri)
  642. : (isThumb1 && Offset < 8 && Base != ARM::SP)
  643. ? ARM::tSUBi3
  644. : isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
  645. }
  646. if (!TL->isLegalAddImmediate(Offset))
  647. // FIXME: Try add with register operand?
  648. return nullptr; // Probably not worth it then.
  649. // We can only append a kill flag to the add/sub input if the value is not
  650. // used in the register list of the stm as well.
  651. bool KillOldBase = BaseKill &&
  652. (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
  653. if (isThumb1) {
  654. // Thumb1: depending on immediate size, use either
  655. // ADDS NewBase, Base, #imm3
  656. // or
  657. // MOV NewBase, Base
  658. // ADDS NewBase, #imm8.
  659. if (Base != NewBase &&
  660. (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
  661. // Need to insert a MOV to the new base first.
  662. if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
  663. !STI->hasV6Ops()) {
  664. // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
  665. if (Pred != ARMCC::AL)
  666. return nullptr;
  667. BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
  668. .addReg(Base, getKillRegState(KillOldBase));
  669. } else
  670. BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
  671. .addReg(Base, getKillRegState(KillOldBase))
  672. .add(predOps(Pred, PredReg));
  673. // The following ADDS/SUBS becomes an update.
  674. Base = NewBase;
  675. KillOldBase = true;
  676. }
  677. if (BaseOpc == ARM::tADDrSPi) {
  678. assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
  679. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  680. .addReg(Base, getKillRegState(KillOldBase))
  681. .addImm(Offset / 4)
  682. .add(predOps(Pred, PredReg));
  683. } else
  684. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  685. .add(t1CondCodeOp(true))
  686. .addReg(Base, getKillRegState(KillOldBase))
  687. .addImm(Offset)
  688. .add(predOps(Pred, PredReg));
  689. } else {
  690. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  691. .addReg(Base, getKillRegState(KillOldBase))
  692. .addImm(Offset)
  693. .add(predOps(Pred, PredReg))
  694. .add(condCodeOp());
  695. }
  696. Base = NewBase;
  697. BaseKill = true; // New base is always killed straight away.
  698. }
  699. bool isDef = isLoadSingle(Opcode);
  700. // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
  701. // base register writeback.
  702. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
  703. if (!Opcode)
  704. return nullptr;
  705. // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
  706. // - There is no writeback (LDM of base register),
  707. // - the base register is killed by the merged instruction,
  708. // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
  709. // to reset the base register.
  710. // Otherwise, don't merge.
  711. // It's safe to return here since the code to materialize a new base register
  712. // above is also conditional on SafeToClobberCPSR.
  713. if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
  714. return nullptr;
  715. MachineInstrBuilder MIB;
  716. if (Writeback) {
  717. assert(isThumb1 && "expected Writeback only inThumb1");
  718. if (Opcode == ARM::tLDMIA) {
  719. assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
  720. // Update tLDMIA with writeback if necessary.
  721. Opcode = ARM::tLDMIA_UPD;
  722. }
  723. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
  724. // Thumb1: we might need to set base writeback when building the MI.
  725. MIB.addReg(Base, getDefRegState(true))
  726. .addReg(Base, getKillRegState(BaseKill));
  727. // The base isn't dead after a merged instruction with writeback.
  728. // Insert a sub instruction after the newly formed instruction to reset.
  729. if (!BaseKill)
  730. UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
  731. } else {
  732. // No writeback, simply build the MachineInstr.
  733. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
  734. MIB.addReg(Base, getKillRegState(BaseKill));
  735. }
  736. MIB.addImm(Pred).addReg(PredReg);
  737. for (const std::pair<unsigned, bool> &R : Regs)
  738. MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
  739. MIB.cloneMergedMemRefs(Instrs);
  740. return MIB.getInstr();
  741. }
  742. MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
  743. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  744. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  745. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  746. ArrayRef<std::pair<unsigned, bool>> Regs,
  747. ArrayRef<MachineInstr*> Instrs) const {
  748. bool IsLoad = isi32Load(Opcode);
  749. assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
  750. unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
  751. assert(Regs.size() == 2);
  752. MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
  753. TII->get(LoadStoreOpcode));
  754. if (IsLoad) {
  755. MIB.addReg(Regs[0].first, RegState::Define)
  756. .addReg(Regs[1].first, RegState::Define);
  757. } else {
  758. MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
  759. .addReg(Regs[1].first, getKillRegState(Regs[1].second));
  760. }
  761. MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  762. MIB.cloneMergedMemRefs(Instrs);
  763. return MIB.getInstr();
  764. }
  765. /// Call MergeOps and update MemOps and merges accordingly on success.
  766. MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
  767. const MachineInstr *First = Cand.Instrs.front();
  768. unsigned Opcode = First->getOpcode();
  769. bool IsLoad = isLoadSingle(Opcode);
  770. SmallVector<std::pair<unsigned, bool>, 8> Regs;
  771. SmallVector<unsigned, 4> ImpDefs;
  772. DenseSet<unsigned> KilledRegs;
  773. DenseSet<unsigned> UsedRegs;
  774. // Determine list of registers and list of implicit super-register defs.
  775. for (const MachineInstr *MI : Cand.Instrs) {
  776. const MachineOperand &MO = getLoadStoreRegOp(*MI);
  777. Register Reg = MO.getReg();
  778. bool IsKill = MO.isKill();
  779. if (IsKill)
  780. KilledRegs.insert(Reg);
  781. Regs.push_back(std::make_pair(Reg, IsKill));
  782. UsedRegs.insert(Reg);
  783. if (IsLoad) {
  784. // Collect any implicit defs of super-registers, after merging we can't
  785. // be sure anymore that we properly preserved these live ranges and must
  786. // removed these implicit operands.
  787. for (const MachineOperand &MO : MI->implicit_operands()) {
  788. if (!MO.isReg() || !MO.isDef() || MO.isDead())
  789. continue;
  790. assert(MO.isImplicit());
  791. Register DefReg = MO.getReg();
  792. if (is_contained(ImpDefs, DefReg))
  793. continue;
  794. // We can ignore cases where the super-reg is read and written.
  795. if (MI->readsRegister(DefReg))
  796. continue;
  797. ImpDefs.push_back(DefReg);
  798. }
  799. }
  800. }
  801. // Attempt the merge.
  802. using iterator = MachineBasicBlock::iterator;
  803. MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
  804. iterator InsertBefore = std::next(iterator(LatestMI));
  805. MachineBasicBlock &MBB = *LatestMI->getParent();
  806. unsigned Offset = getMemoryOpOffset(*First);
  807. Register Base = getLoadStoreBaseOp(*First).getReg();
  808. bool BaseKill = LatestMI->killsRegister(Base);
  809. Register PredReg;
  810. ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
  811. DebugLoc DL = First->getDebugLoc();
  812. MachineInstr *Merged = nullptr;
  813. if (Cand.CanMergeToLSDouble)
  814. Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
  815. Opcode, Pred, PredReg, DL, Regs,
  816. Cand.Instrs);
  817. if (!Merged && Cand.CanMergeToLSMulti)
  818. Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
  819. Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
  820. if (!Merged)
  821. return nullptr;
  822. // Determine earliest instruction that will get removed. We then keep an
  823. // iterator just above it so the following erases don't invalidated it.
  824. iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
  825. bool EarliestAtBegin = false;
  826. if (EarliestI == MBB.begin()) {
  827. EarliestAtBegin = true;
  828. } else {
  829. EarliestI = std::prev(EarliestI);
  830. }
  831. // Remove instructions which have been merged.
  832. for (MachineInstr *MI : Cand.Instrs)
  833. MBB.erase(MI);
  834. // Determine range between the earliest removed instruction and the new one.
  835. if (EarliestAtBegin)
  836. EarliestI = MBB.begin();
  837. else
  838. EarliestI = std::next(EarliestI);
  839. auto FixupRange = make_range(EarliestI, iterator(Merged));
  840. if (isLoadSingle(Opcode)) {
  841. // If the previous loads defined a super-reg, then we have to mark earlier
  842. // operands undef; Replicate the super-reg def on the merged instruction.
  843. for (MachineInstr &MI : FixupRange) {
  844. for (unsigned &ImpDefReg : ImpDefs) {
  845. for (MachineOperand &MO : MI.implicit_operands()) {
  846. if (!MO.isReg() || MO.getReg() != ImpDefReg)
  847. continue;
  848. if (MO.readsReg())
  849. MO.setIsUndef();
  850. else if (MO.isDef())
  851. ImpDefReg = 0;
  852. }
  853. }
  854. }
  855. MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
  856. for (unsigned ImpDef : ImpDefs)
  857. MIB.addReg(ImpDef, RegState::ImplicitDefine);
  858. } else {
  859. // Remove kill flags: We are possibly storing the values later now.
  860. assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
  861. for (MachineInstr &MI : FixupRange) {
  862. for (MachineOperand &MO : MI.uses()) {
  863. if (!MO.isReg() || !MO.isKill())
  864. continue;
  865. if (UsedRegs.count(MO.getReg()))
  866. MO.setIsKill(false);
  867. }
  868. }
  869. assert(ImpDefs.empty());
  870. }
  871. return Merged;
  872. }
  873. static bool isValidLSDoubleOffset(int Offset) {
  874. unsigned Value = abs(Offset);
  875. // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
  876. // multiplied by 4.
  877. return (Value % 4) == 0 && Value < 1024;
  878. }
  879. /// Return true for loads/stores that can be combined to a double/multi
  880. /// operation without increasing the requirements for alignment.
  881. static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
  882. const MachineInstr &MI) {
  883. // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
  884. // difference.
  885. unsigned Opcode = MI.getOpcode();
  886. if (!isi32Load(Opcode) && !isi32Store(Opcode))
  887. return true;
  888. // Stack pointer alignment is out of the programmers control so we can trust
  889. // SP-relative loads/stores.
  890. if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
  891. STI.getFrameLowering()->getTransientStackAlign() >= Align(4))
  892. return true;
  893. return false;
  894. }
  895. /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
  896. void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
  897. const MachineInstr *FirstMI = MemOps[0].MI;
  898. unsigned Opcode = FirstMI->getOpcode();
  899. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
  900. unsigned Size = getLSMultipleTransferSize(FirstMI);
  901. unsigned SIndex = 0;
  902. unsigned EIndex = MemOps.size();
  903. do {
  904. // Look at the first instruction.
  905. const MachineInstr *MI = MemOps[SIndex].MI;
  906. int Offset = MemOps[SIndex].Offset;
  907. const MachineOperand &PMO = getLoadStoreRegOp(*MI);
  908. Register PReg = PMO.getReg();
  909. unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
  910. : TRI->getEncodingValue(PReg);
  911. unsigned Latest = SIndex;
  912. unsigned Earliest = SIndex;
  913. unsigned Count = 1;
  914. bool CanMergeToLSDouble =
  915. STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
  916. // ARM errata 602117: LDRD with base in list may result in incorrect base
  917. // register when interrupted or faulted.
  918. if (STI->isCortexM3() && isi32Load(Opcode) &&
  919. PReg == getLoadStoreBaseOp(*MI).getReg())
  920. CanMergeToLSDouble = false;
  921. bool CanMergeToLSMulti = true;
  922. // On swift vldm/vstm starting with an odd register number as that needs
  923. // more uops than single vldrs.
  924. if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
  925. CanMergeToLSMulti = false;
  926. // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
  927. // deprecated; LDM to PC is fine but cannot happen here.
  928. if (PReg == ARM::SP || PReg == ARM::PC)
  929. CanMergeToLSMulti = CanMergeToLSDouble = false;
  930. // Should we be conservative?
  931. if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
  932. CanMergeToLSMulti = CanMergeToLSDouble = false;
  933. // vldm / vstm limit are 32 for S variants, 16 for D variants.
  934. unsigned Limit;
  935. switch (Opcode) {
  936. default:
  937. Limit = UINT_MAX;
  938. break;
  939. case ARM::VLDRD:
  940. case ARM::VSTRD:
  941. Limit = 16;
  942. break;
  943. }
  944. // Merge following instructions where possible.
  945. for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
  946. int NewOffset = MemOps[I].Offset;
  947. if (NewOffset != Offset + (int)Size)
  948. break;
  949. const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
  950. Register Reg = MO.getReg();
  951. if (Reg == ARM::SP || Reg == ARM::PC)
  952. break;
  953. if (Count == Limit)
  954. break;
  955. // See if the current load/store may be part of a multi load/store.
  956. unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
  957. : TRI->getEncodingValue(Reg);
  958. bool PartOfLSMulti = CanMergeToLSMulti;
  959. if (PartOfLSMulti) {
  960. // Register numbers must be in ascending order.
  961. if (RegNum <= PRegNum)
  962. PartOfLSMulti = false;
  963. // For VFP / NEON load/store multiples, the registers must be
  964. // consecutive and within the limit on the number of registers per
  965. // instruction.
  966. else if (!isNotVFP && RegNum != PRegNum+1)
  967. PartOfLSMulti = false;
  968. }
  969. // See if the current load/store may be part of a double load/store.
  970. bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
  971. if (!PartOfLSMulti && !PartOfLSDouble)
  972. break;
  973. CanMergeToLSMulti &= PartOfLSMulti;
  974. CanMergeToLSDouble &= PartOfLSDouble;
  975. // Track MemOp with latest and earliest position (Positions are
  976. // counted in reverse).
  977. unsigned Position = MemOps[I].Position;
  978. if (Position < MemOps[Latest].Position)
  979. Latest = I;
  980. else if (Position > MemOps[Earliest].Position)
  981. Earliest = I;
  982. // Prepare for next MemOp.
  983. Offset += Size;
  984. PRegNum = RegNum;
  985. }
  986. // Form a candidate from the Ops collected so far.
  987. MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
  988. for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
  989. Candidate->Instrs.push_back(MemOps[C].MI);
  990. Candidate->LatestMIIdx = Latest - SIndex;
  991. Candidate->EarliestMIIdx = Earliest - SIndex;
  992. Candidate->InsertPos = MemOps[Latest].Position;
  993. if (Count == 1)
  994. CanMergeToLSMulti = CanMergeToLSDouble = false;
  995. Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
  996. Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
  997. Candidates.push_back(Candidate);
  998. // Continue after the chain.
  999. SIndex += Count;
  1000. } while (SIndex < EIndex);
  1001. }
  1002. static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
  1003. ARM_AM::AMSubMode Mode) {
  1004. switch (Opc) {
  1005. default: llvm_unreachable("Unhandled opcode!");
  1006. case ARM::LDMIA:
  1007. case ARM::LDMDA:
  1008. case ARM::LDMDB:
  1009. case ARM::LDMIB:
  1010. switch (Mode) {
  1011. default: llvm_unreachable("Unhandled submode!");
  1012. case ARM_AM::ia: return ARM::LDMIA_UPD;
  1013. case ARM_AM::ib: return ARM::LDMIB_UPD;
  1014. case ARM_AM::da: return ARM::LDMDA_UPD;
  1015. case ARM_AM::db: return ARM::LDMDB_UPD;
  1016. }
  1017. case ARM::STMIA:
  1018. case ARM::STMDA:
  1019. case ARM::STMDB:
  1020. case ARM::STMIB:
  1021. switch (Mode) {
  1022. default: llvm_unreachable("Unhandled submode!");
  1023. case ARM_AM::ia: return ARM::STMIA_UPD;
  1024. case ARM_AM::ib: return ARM::STMIB_UPD;
  1025. case ARM_AM::da: return ARM::STMDA_UPD;
  1026. case ARM_AM::db: return ARM::STMDB_UPD;
  1027. }
  1028. case ARM::t2LDMIA:
  1029. case ARM::t2LDMDB:
  1030. switch (Mode) {
  1031. default: llvm_unreachable("Unhandled submode!");
  1032. case ARM_AM::ia: return ARM::t2LDMIA_UPD;
  1033. case ARM_AM::db: return ARM::t2LDMDB_UPD;
  1034. }
  1035. case ARM::t2STMIA:
  1036. case ARM::t2STMDB:
  1037. switch (Mode) {
  1038. default: llvm_unreachable("Unhandled submode!");
  1039. case ARM_AM::ia: return ARM::t2STMIA_UPD;
  1040. case ARM_AM::db: return ARM::t2STMDB_UPD;
  1041. }
  1042. case ARM::VLDMSIA:
  1043. switch (Mode) {
  1044. default: llvm_unreachable("Unhandled submode!");
  1045. case ARM_AM::ia: return ARM::VLDMSIA_UPD;
  1046. case ARM_AM::db: return ARM::VLDMSDB_UPD;
  1047. }
  1048. case ARM::VLDMDIA:
  1049. switch (Mode) {
  1050. default: llvm_unreachable("Unhandled submode!");
  1051. case ARM_AM::ia: return ARM::VLDMDIA_UPD;
  1052. case ARM_AM::db: return ARM::VLDMDDB_UPD;
  1053. }
  1054. case ARM::VSTMSIA:
  1055. switch (Mode) {
  1056. default: llvm_unreachable("Unhandled submode!");
  1057. case ARM_AM::ia: return ARM::VSTMSIA_UPD;
  1058. case ARM_AM::db: return ARM::VSTMSDB_UPD;
  1059. }
  1060. case ARM::VSTMDIA:
  1061. switch (Mode) {
  1062. default: llvm_unreachable("Unhandled submode!");
  1063. case ARM_AM::ia: return ARM::VSTMDIA_UPD;
  1064. case ARM_AM::db: return ARM::VSTMDDB_UPD;
  1065. }
  1066. }
  1067. }
  1068. /// Check if the given instruction increments or decrements a register and
  1069. /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
  1070. /// generated by the instruction are possibly read as well.
  1071. static int isIncrementOrDecrement(const MachineInstr &MI, Register Reg,
  1072. ARMCC::CondCodes Pred, Register PredReg) {
  1073. bool CheckCPSRDef;
  1074. int Scale;
  1075. switch (MI.getOpcode()) {
  1076. case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
  1077. case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
  1078. case ARM::t2SUBri:
  1079. case ARM::t2SUBspImm:
  1080. case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
  1081. case ARM::t2ADDri:
  1082. case ARM::t2ADDspImm:
  1083. case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
  1084. case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
  1085. case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
  1086. default: return 0;
  1087. }
  1088. Register MIPredReg;
  1089. if (MI.getOperand(0).getReg() != Reg ||
  1090. MI.getOperand(1).getReg() != Reg ||
  1091. getInstrPredicate(MI, MIPredReg) != Pred ||
  1092. MIPredReg != PredReg)
  1093. return 0;
  1094. if (CheckCPSRDef && definesCPSR(MI))
  1095. return 0;
  1096. return MI.getOperand(2).getImm() * Scale;
  1097. }
  1098. /// Searches for an increment or decrement of \p Reg before \p MBBI.
  1099. static MachineBasicBlock::iterator
  1100. findIncDecBefore(MachineBasicBlock::iterator MBBI, Register Reg,
  1101. ARMCC::CondCodes Pred, Register PredReg, int &Offset) {
  1102. Offset = 0;
  1103. MachineBasicBlock &MBB = *MBBI->getParent();
  1104. MachineBasicBlock::iterator BeginMBBI = MBB.begin();
  1105. MachineBasicBlock::iterator EndMBBI = MBB.end();
  1106. if (MBBI == BeginMBBI)
  1107. return EndMBBI;
  1108. // Skip debug values.
  1109. MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
  1110. while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
  1111. --PrevMBBI;
  1112. Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
  1113. return Offset == 0 ? EndMBBI : PrevMBBI;
  1114. }
  1115. /// Searches for a increment or decrement of \p Reg after \p MBBI.
  1116. static MachineBasicBlock::iterator
  1117. findIncDecAfter(MachineBasicBlock::iterator MBBI, Register Reg,
  1118. ARMCC::CondCodes Pred, Register PredReg, int &Offset,
  1119. const TargetRegisterInfo *TRI) {
  1120. Offset = 0;
  1121. MachineBasicBlock &MBB = *MBBI->getParent();
  1122. MachineBasicBlock::iterator EndMBBI = MBB.end();
  1123. MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
  1124. while (NextMBBI != EndMBBI) {
  1125. // Skip debug values.
  1126. while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
  1127. ++NextMBBI;
  1128. if (NextMBBI == EndMBBI)
  1129. return EndMBBI;
  1130. unsigned Off = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
  1131. if (Off) {
  1132. Offset = Off;
  1133. return NextMBBI;
  1134. }
  1135. // SP can only be combined if it is the next instruction after the original
  1136. // MBBI, otherwise we may be incrementing the stack pointer (invalidating
  1137. // anything below the new pointer) when its frame elements are still in
  1138. // use. Other registers can attempt to look further, until a different use
  1139. // or def of the register is found.
  1140. if (Reg == ARM::SP || NextMBBI->readsRegister(Reg, TRI) ||
  1141. NextMBBI->definesRegister(Reg, TRI))
  1142. return EndMBBI;
  1143. ++NextMBBI;
  1144. }
  1145. return EndMBBI;
  1146. }
  1147. /// Fold proceeding/trailing inc/dec of base register into the
  1148. /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
  1149. ///
  1150. /// stmia rn, <ra, rb, rc>
  1151. /// rn := rn + 4 * 3;
  1152. /// =>
  1153. /// stmia rn!, <ra, rb, rc>
  1154. ///
  1155. /// rn := rn - 4 * 3;
  1156. /// ldmia rn, <ra, rb, rc>
  1157. /// =>
  1158. /// ldmdb rn!, <ra, rb, rc>
  1159. bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
  1160. // Thumb1 is already using updating loads/stores.
  1161. if (isThumb1) return false;
  1162. LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
  1163. const MachineOperand &BaseOP = MI->getOperand(0);
  1164. Register Base = BaseOP.getReg();
  1165. bool BaseKill = BaseOP.isKill();
  1166. Register PredReg;
  1167. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1168. unsigned Opcode = MI->getOpcode();
  1169. DebugLoc DL = MI->getDebugLoc();
  1170. // Can't use an updating ld/st if the base register is also a dest
  1171. // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
  1172. for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
  1173. if (MO.getReg() == Base)
  1174. return false;
  1175. int Bytes = getLSMultipleTransferSize(MI);
  1176. MachineBasicBlock &MBB = *MI->getParent();
  1177. MachineBasicBlock::iterator MBBI(MI);
  1178. int Offset;
  1179. MachineBasicBlock::iterator MergeInstr
  1180. = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
  1181. ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
  1182. if (Mode == ARM_AM::ia && Offset == -Bytes) {
  1183. Mode = ARM_AM::db;
  1184. } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
  1185. Mode = ARM_AM::da;
  1186. } else {
  1187. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
  1188. if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
  1189. ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
  1190. // We couldn't find an inc/dec to merge. But if the base is dead, we
  1191. // can still change to a writeback form as that will save us 2 bytes
  1192. // of code size. It can create WAW hazards though, so only do it if
  1193. // we're minimizing code size.
  1194. if (!STI->hasMinSize() || !BaseKill)
  1195. return false;
  1196. bool HighRegsUsed = false;
  1197. for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 2))
  1198. if (MO.getReg() >= ARM::R8) {
  1199. HighRegsUsed = true;
  1200. break;
  1201. }
  1202. if (!HighRegsUsed)
  1203. MergeInstr = MBB.end();
  1204. else
  1205. return false;
  1206. }
  1207. }
  1208. if (MergeInstr != MBB.end()) {
  1209. LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
  1210. MBB.erase(MergeInstr);
  1211. }
  1212. unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
  1213. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
  1214. .addReg(Base, getDefRegState(true)) // WB base register
  1215. .addReg(Base, getKillRegState(BaseKill))
  1216. .addImm(Pred).addReg(PredReg);
  1217. // Transfer the rest of operands.
  1218. for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 3))
  1219. MIB.add(MO);
  1220. // Transfer memoperands.
  1221. MIB.setMemRefs(MI->memoperands());
  1222. LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
  1223. MBB.erase(MBBI);
  1224. return true;
  1225. }
  1226. static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
  1227. ARM_AM::AddrOpc Mode) {
  1228. switch (Opc) {
  1229. case ARM::LDRi12:
  1230. return ARM::LDR_PRE_IMM;
  1231. case ARM::STRi12:
  1232. return ARM::STR_PRE_IMM;
  1233. case ARM::VLDRS:
  1234. return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
  1235. case ARM::VLDRD:
  1236. return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
  1237. case ARM::VSTRS:
  1238. return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
  1239. case ARM::VSTRD:
  1240. return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
  1241. case ARM::t2LDRi8:
  1242. case ARM::t2LDRi12:
  1243. return ARM::t2LDR_PRE;
  1244. case ARM::t2STRi8:
  1245. case ARM::t2STRi12:
  1246. return ARM::t2STR_PRE;
  1247. default: llvm_unreachable("Unhandled opcode!");
  1248. }
  1249. }
  1250. static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
  1251. ARM_AM::AddrOpc Mode) {
  1252. switch (Opc) {
  1253. case ARM::LDRi12:
  1254. return ARM::LDR_POST_IMM;
  1255. case ARM::STRi12:
  1256. return ARM::STR_POST_IMM;
  1257. case ARM::VLDRS:
  1258. return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
  1259. case ARM::VLDRD:
  1260. return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
  1261. case ARM::VSTRS:
  1262. return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
  1263. case ARM::VSTRD:
  1264. return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
  1265. case ARM::t2LDRi8:
  1266. case ARM::t2LDRi12:
  1267. return ARM::t2LDR_POST;
  1268. case ARM::t2LDRBi8:
  1269. case ARM::t2LDRBi12:
  1270. return ARM::t2LDRB_POST;
  1271. case ARM::t2LDRSBi8:
  1272. case ARM::t2LDRSBi12:
  1273. return ARM::t2LDRSB_POST;
  1274. case ARM::t2LDRHi8:
  1275. case ARM::t2LDRHi12:
  1276. return ARM::t2LDRH_POST;
  1277. case ARM::t2LDRSHi8:
  1278. case ARM::t2LDRSHi12:
  1279. return ARM::t2LDRSH_POST;
  1280. case ARM::t2STRi8:
  1281. case ARM::t2STRi12:
  1282. return ARM::t2STR_POST;
  1283. case ARM::t2STRBi8:
  1284. case ARM::t2STRBi12:
  1285. return ARM::t2STRB_POST;
  1286. case ARM::t2STRHi8:
  1287. case ARM::t2STRHi12:
  1288. return ARM::t2STRH_POST;
  1289. case ARM::MVE_VLDRBS16:
  1290. return ARM::MVE_VLDRBS16_post;
  1291. case ARM::MVE_VLDRBS32:
  1292. return ARM::MVE_VLDRBS32_post;
  1293. case ARM::MVE_VLDRBU16:
  1294. return ARM::MVE_VLDRBU16_post;
  1295. case ARM::MVE_VLDRBU32:
  1296. return ARM::MVE_VLDRBU32_post;
  1297. case ARM::MVE_VLDRHS32:
  1298. return ARM::MVE_VLDRHS32_post;
  1299. case ARM::MVE_VLDRHU32:
  1300. return ARM::MVE_VLDRHU32_post;
  1301. case ARM::MVE_VLDRBU8:
  1302. return ARM::MVE_VLDRBU8_post;
  1303. case ARM::MVE_VLDRHU16:
  1304. return ARM::MVE_VLDRHU16_post;
  1305. case ARM::MVE_VLDRWU32:
  1306. return ARM::MVE_VLDRWU32_post;
  1307. case ARM::MVE_VSTRB16:
  1308. return ARM::MVE_VSTRB16_post;
  1309. case ARM::MVE_VSTRB32:
  1310. return ARM::MVE_VSTRB32_post;
  1311. case ARM::MVE_VSTRH32:
  1312. return ARM::MVE_VSTRH32_post;
  1313. case ARM::MVE_VSTRBU8:
  1314. return ARM::MVE_VSTRBU8_post;
  1315. case ARM::MVE_VSTRHU16:
  1316. return ARM::MVE_VSTRHU16_post;
  1317. case ARM::MVE_VSTRWU32:
  1318. return ARM::MVE_VSTRWU32_post;
  1319. default: llvm_unreachable("Unhandled opcode!");
  1320. }
  1321. }
  1322. /// Fold proceeding/trailing inc/dec of base register into the
  1323. /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
  1324. bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
  1325. // Thumb1 doesn't have updating LDR/STR.
  1326. // FIXME: Use LDM/STM with single register instead.
  1327. if (isThumb1) return false;
  1328. LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << *MI);
  1329. Register Base = getLoadStoreBaseOp(*MI).getReg();
  1330. bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
  1331. unsigned Opcode = MI->getOpcode();
  1332. DebugLoc DL = MI->getDebugLoc();
  1333. bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
  1334. Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
  1335. bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
  1336. if (isi32Load(Opcode) || isi32Store(Opcode))
  1337. if (MI->getOperand(2).getImm() != 0)
  1338. return false;
  1339. if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
  1340. return false;
  1341. // Can't do the merge if the destination register is the same as the would-be
  1342. // writeback register.
  1343. if (MI->getOperand(0).getReg() == Base)
  1344. return false;
  1345. Register PredReg;
  1346. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1347. int Bytes = getLSMultipleTransferSize(MI);
  1348. MachineBasicBlock &MBB = *MI->getParent();
  1349. MachineBasicBlock::iterator MBBI(MI);
  1350. int Offset;
  1351. MachineBasicBlock::iterator MergeInstr
  1352. = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
  1353. unsigned NewOpc;
  1354. if (!isAM5 && Offset == Bytes) {
  1355. NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
  1356. } else if (Offset == -Bytes) {
  1357. NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
  1358. } else {
  1359. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
  1360. if (MergeInstr == MBB.end())
  1361. return false;
  1362. NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
  1363. if ((isAM5 && Offset != Bytes) ||
  1364. (!isAM5 && !isLegalAddressImm(NewOpc, Offset, TII))) {
  1365. NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
  1366. if (isAM5 || !isLegalAddressImm(NewOpc, Offset, TII))
  1367. return false;
  1368. }
  1369. }
  1370. LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
  1371. MBB.erase(MergeInstr);
  1372. ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
  1373. bool isLd = isLoadSingle(Opcode);
  1374. if (isAM5) {
  1375. // VLDM[SD]_UPD, VSTM[SD]_UPD
  1376. // (There are no base-updating versions of VLDR/VSTR instructions, but the
  1377. // updating load/store-multiple instructions can be used with only one
  1378. // register.)
  1379. MachineOperand &MO = MI->getOperand(0);
  1380. auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
  1381. .addReg(Base, getDefRegState(true)) // WB base register
  1382. .addReg(Base, getKillRegState(isLd ? BaseKill : false))
  1383. .addImm(Pred)
  1384. .addReg(PredReg)
  1385. .addReg(MO.getReg(), (isLd ? getDefRegState(true)
  1386. : getKillRegState(MO.isKill())))
  1387. .cloneMemRefs(*MI);
  1388. (void)MIB;
  1389. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1390. } else if (isLd) {
  1391. if (isAM2) {
  1392. // LDR_PRE, LDR_POST
  1393. if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
  1394. auto MIB =
  1395. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1396. .addReg(Base, RegState::Define)
  1397. .addReg(Base)
  1398. .addImm(Offset)
  1399. .addImm(Pred)
  1400. .addReg(PredReg)
  1401. .cloneMemRefs(*MI);
  1402. (void)MIB;
  1403. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1404. } else {
  1405. int Imm = ARM_AM::getAM2Opc(AddSub, abs(Offset), ARM_AM::no_shift);
  1406. auto MIB =
  1407. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1408. .addReg(Base, RegState::Define)
  1409. .addReg(Base)
  1410. .addReg(0)
  1411. .addImm(Imm)
  1412. .add(predOps(Pred, PredReg))
  1413. .cloneMemRefs(*MI);
  1414. (void)MIB;
  1415. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1416. }
  1417. } else {
  1418. // t2LDR_PRE, t2LDR_POST
  1419. auto MIB =
  1420. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1421. .addReg(Base, RegState::Define)
  1422. .addReg(Base)
  1423. .addImm(Offset)
  1424. .add(predOps(Pred, PredReg))
  1425. .cloneMemRefs(*MI);
  1426. (void)MIB;
  1427. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1428. }
  1429. } else {
  1430. MachineOperand &MO = MI->getOperand(0);
  1431. // FIXME: post-indexed stores use am2offset_imm, which still encodes
  1432. // the vestigal zero-reg offset register. When that's fixed, this clause
  1433. // can be removed entirely.
  1434. if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
  1435. int Imm = ARM_AM::getAM2Opc(AddSub, abs(Offset), ARM_AM::no_shift);
  1436. // STR_PRE, STR_POST
  1437. auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
  1438. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  1439. .addReg(Base)
  1440. .addReg(0)
  1441. .addImm(Imm)
  1442. .add(predOps(Pred, PredReg))
  1443. .cloneMemRefs(*MI);
  1444. (void)MIB;
  1445. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1446. } else {
  1447. // t2STR_PRE, t2STR_POST
  1448. auto MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
  1449. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  1450. .addReg(Base)
  1451. .addImm(Offset)
  1452. .add(predOps(Pred, PredReg))
  1453. .cloneMemRefs(*MI);
  1454. (void)MIB;
  1455. LLVM_DEBUG(dbgs() << " Added new instruction: " << *MIB);
  1456. }
  1457. }
  1458. MBB.erase(MBBI);
  1459. return true;
  1460. }
  1461. bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
  1462. unsigned Opcode = MI.getOpcode();
  1463. assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
  1464. "Must have t2STRDi8 or t2LDRDi8");
  1465. if (MI.getOperand(3).getImm() != 0)
  1466. return false;
  1467. LLVM_DEBUG(dbgs() << "Attempting to merge update of: " << MI);
  1468. // Behaviour for writeback is undefined if base register is the same as one
  1469. // of the others.
  1470. const MachineOperand &BaseOp = MI.getOperand(2);
  1471. Register Base = BaseOp.getReg();
  1472. const MachineOperand &Reg0Op = MI.getOperand(0);
  1473. const MachineOperand &Reg1Op = MI.getOperand(1);
  1474. if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
  1475. return false;
  1476. Register PredReg;
  1477. ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
  1478. MachineBasicBlock::iterator MBBI(MI);
  1479. MachineBasicBlock &MBB = *MI.getParent();
  1480. int Offset;
  1481. MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
  1482. PredReg, Offset);
  1483. unsigned NewOpc;
  1484. if (Offset == 8 || Offset == -8) {
  1485. NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
  1486. } else {
  1487. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset, TRI);
  1488. if (MergeInstr == MBB.end())
  1489. return false;
  1490. NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
  1491. if (!isLegalAddressImm(NewOpc, Offset, TII))
  1492. return false;
  1493. }
  1494. LLVM_DEBUG(dbgs() << " Erasing old increment: " << *MergeInstr);
  1495. MBB.erase(MergeInstr);
  1496. DebugLoc DL = MI.getDebugLoc();
  1497. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
  1498. if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
  1499. MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
  1500. } else {
  1501. assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
  1502. MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
  1503. }
  1504. MIB.addReg(BaseOp.getReg(), RegState::Kill)
  1505. .addImm(Offset).addImm(Pred).addReg(PredReg);
  1506. assert(TII->get(Opcode).getNumOperands() == 6 &&
  1507. TII->get(NewOpc).getNumOperands() == 7 &&
  1508. "Unexpected number of operands in Opcode specification.");
  1509. // Transfer implicit operands.
  1510. for (const MachineOperand &MO : MI.implicit_operands())
  1511. MIB.add(MO);
  1512. MIB.cloneMemRefs(MI);
  1513. LLVM_DEBUG(dbgs() << " Added new load/store: " << *MIB);
  1514. MBB.erase(MBBI);
  1515. return true;
  1516. }
  1517. /// Returns true if instruction is a memory operation that this pass is capable
  1518. /// of operating on.
  1519. static bool isMemoryOp(const MachineInstr &MI) {
  1520. unsigned Opcode = MI.getOpcode();
  1521. switch (Opcode) {
  1522. case ARM::VLDRS:
  1523. case ARM::VSTRS:
  1524. case ARM::VLDRD:
  1525. case ARM::VSTRD:
  1526. case ARM::LDRi12:
  1527. case ARM::STRi12:
  1528. case ARM::tLDRi:
  1529. case ARM::tSTRi:
  1530. case ARM::tLDRspi:
  1531. case ARM::tSTRspi:
  1532. case ARM::t2LDRi8:
  1533. case ARM::t2LDRi12:
  1534. case ARM::t2STRi8:
  1535. case ARM::t2STRi12:
  1536. break;
  1537. default:
  1538. return false;
  1539. }
  1540. if (!MI.getOperand(1).isReg())
  1541. return false;
  1542. // When no memory operands are present, conservatively assume unaligned,
  1543. // volatile, unfoldable.
  1544. if (!MI.hasOneMemOperand())
  1545. return false;
  1546. const MachineMemOperand &MMO = **MI.memoperands_begin();
  1547. // Don't touch volatile memory accesses - we may be changing their order.
  1548. // TODO: We could allow unordered and monotonic atomics here, but we need to
  1549. // make sure the resulting ldm/stm is correctly marked as atomic.
  1550. if (MMO.isVolatile() || MMO.isAtomic())
  1551. return false;
  1552. // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
  1553. // not.
  1554. if (MMO.getAlign() < Align(4))
  1555. return false;
  1556. // str <undef> could probably be eliminated entirely, but for now we just want
  1557. // to avoid making a mess of it.
  1558. // FIXME: Use str <undef> as a wildcard to enable better stm folding.
  1559. if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
  1560. return false;
  1561. // Likewise don't mess with references to undefined addresses.
  1562. if (MI.getOperand(1).isUndef())
  1563. return false;
  1564. return true;
  1565. }
  1566. static void InsertLDR_STR(MachineBasicBlock &MBB,
  1567. MachineBasicBlock::iterator &MBBI, int Offset,
  1568. bool isDef, unsigned NewOpc, unsigned Reg,
  1569. bool RegDeadKill, bool RegUndef, unsigned BaseReg,
  1570. bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
  1571. unsigned PredReg, const TargetInstrInfo *TII,
  1572. MachineInstr *MI) {
  1573. if (isDef) {
  1574. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1575. TII->get(NewOpc))
  1576. .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
  1577. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  1578. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1579. // FIXME: This is overly conservative; the new instruction accesses 4
  1580. // bytes, not 8.
  1581. MIB.cloneMemRefs(*MI);
  1582. } else {
  1583. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1584. TII->get(NewOpc))
  1585. .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
  1586. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  1587. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1588. // FIXME: This is overly conservative; the new instruction accesses 4
  1589. // bytes, not 8.
  1590. MIB.cloneMemRefs(*MI);
  1591. }
  1592. }
  1593. bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
  1594. MachineBasicBlock::iterator &MBBI) {
  1595. MachineInstr *MI = &*MBBI;
  1596. unsigned Opcode = MI->getOpcode();
  1597. // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
  1598. // if we see this opcode.
  1599. if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
  1600. return false;
  1601. const MachineOperand &BaseOp = MI->getOperand(2);
  1602. Register BaseReg = BaseOp.getReg();
  1603. Register EvenReg = MI->getOperand(0).getReg();
  1604. Register OddReg = MI->getOperand(1).getReg();
  1605. unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
  1606. unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
  1607. // ARM errata 602117: LDRD with base in list may result in incorrect base
  1608. // register when interrupted or faulted.
  1609. bool Errata602117 = EvenReg == BaseReg &&
  1610. (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
  1611. // ARM LDRD/STRD needs consecutive registers.
  1612. bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
  1613. (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
  1614. if (!Errata602117 && !NonConsecutiveRegs)
  1615. return false;
  1616. bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
  1617. bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
  1618. bool EvenDeadKill = isLd ?
  1619. MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
  1620. bool EvenUndef = MI->getOperand(0).isUndef();
  1621. bool OddDeadKill = isLd ?
  1622. MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
  1623. bool OddUndef = MI->getOperand(1).isUndef();
  1624. bool BaseKill = BaseOp.isKill();
  1625. bool BaseUndef = BaseOp.isUndef();
  1626. assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
  1627. "register offset not handled below");
  1628. int OffImm = getMemoryOpOffset(*MI);
  1629. Register PredReg;
  1630. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1631. if (OddRegNum > EvenRegNum && OffImm == 0) {
  1632. // Ascending register numbers and no offset. It's safe to change it to a
  1633. // ldm or stm.
  1634. unsigned NewOpc = (isLd)
  1635. ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
  1636. : (isT2 ? ARM::t2STMIA : ARM::STMIA);
  1637. if (isLd) {
  1638. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  1639. .addReg(BaseReg, getKillRegState(BaseKill))
  1640. .addImm(Pred).addReg(PredReg)
  1641. .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
  1642. .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
  1643. .cloneMemRefs(*MI);
  1644. ++NumLDRD2LDM;
  1645. } else {
  1646. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  1647. .addReg(BaseReg, getKillRegState(BaseKill))
  1648. .addImm(Pred).addReg(PredReg)
  1649. .addReg(EvenReg,
  1650. getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
  1651. .addReg(OddReg,
  1652. getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
  1653. .cloneMemRefs(*MI);
  1654. ++NumSTRD2STM;
  1655. }
  1656. } else {
  1657. // Split into two instructions.
  1658. unsigned NewOpc = (isLd)
  1659. ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
  1660. : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
  1661. // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
  1662. // so adjust and use t2LDRi12 here for that.
  1663. unsigned NewOpc2 = (isLd)
  1664. ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
  1665. : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
  1666. // If this is a load, make sure the first load does not clobber the base
  1667. // register before the second load reads it.
  1668. if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
  1669. assert(!TRI->regsOverlap(OddReg, BaseReg));
  1670. InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
  1671. false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
  1672. InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
  1673. false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
  1674. MI);
  1675. } else {
  1676. if (OddReg == EvenReg && EvenDeadKill) {
  1677. // If the two source operands are the same, the kill marker is
  1678. // probably on the first one. e.g.
  1679. // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
  1680. EvenDeadKill = false;
  1681. OddDeadKill = true;
  1682. }
  1683. // Never kill the base register in the first instruction.
  1684. if (EvenReg == BaseReg)
  1685. EvenDeadKill = false;
  1686. InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
  1687. EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
  1688. MI);
  1689. InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
  1690. OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
  1691. MI);
  1692. }
  1693. if (isLd)
  1694. ++NumLDRD2LDR;
  1695. else
  1696. ++NumSTRD2STR;
  1697. }
  1698. MBBI = MBB.erase(MBBI);
  1699. return true;
  1700. }
  1701. /// An optimization pass to turn multiple LDR / STR ops of the same base and
  1702. /// incrementing offset into LDM / STM ops.
  1703. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
  1704. MemOpQueue MemOps;
  1705. unsigned CurrBase = 0;
  1706. unsigned CurrOpc = ~0u;
  1707. ARMCC::CondCodes CurrPred = ARMCC::AL;
  1708. unsigned Position = 0;
  1709. assert(Candidates.size() == 0);
  1710. assert(MergeBaseCandidates.size() == 0);
  1711. LiveRegsValid = false;
  1712. for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
  1713. I = MBBI) {
  1714. // The instruction in front of the iterator is the one we look at.
  1715. MBBI = std::prev(I);
  1716. if (FixInvalidRegPairOp(MBB, MBBI))
  1717. continue;
  1718. ++Position;
  1719. if (isMemoryOp(*MBBI)) {
  1720. unsigned Opcode = MBBI->getOpcode();
  1721. const MachineOperand &MO = MBBI->getOperand(0);
  1722. Register Reg = MO.getReg();
  1723. Register Base = getLoadStoreBaseOp(*MBBI).getReg();
  1724. Register PredReg;
  1725. ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
  1726. int Offset = getMemoryOpOffset(*MBBI);
  1727. if (CurrBase == 0) {
  1728. // Start of a new chain.
  1729. CurrBase = Base;
  1730. CurrOpc = Opcode;
  1731. CurrPred = Pred;
  1732. MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
  1733. continue;
  1734. }
  1735. // Note: No need to match PredReg in the next if.
  1736. if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
  1737. // Watch out for:
  1738. // r4 := ldr [r0, #8]
  1739. // r4 := ldr [r0, #4]
  1740. // or
  1741. // r0 := ldr [r0]
  1742. // If a load overrides the base register or a register loaded by
  1743. // another load in our chain, we cannot take this instruction.
  1744. bool Overlap = false;
  1745. if (isLoadSingle(Opcode)) {
  1746. Overlap = (Base == Reg);
  1747. if (!Overlap) {
  1748. for (const MemOpQueueEntry &E : MemOps) {
  1749. if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
  1750. Overlap = true;
  1751. break;
  1752. }
  1753. }
  1754. }
  1755. }
  1756. if (!Overlap) {
  1757. // Check offset and sort memory operation into the current chain.
  1758. if (Offset > MemOps.back().Offset) {
  1759. MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
  1760. continue;
  1761. } else {
  1762. MemOpQueue::iterator MI, ME;
  1763. for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
  1764. if (Offset < MI->Offset) {
  1765. // Found a place to insert.
  1766. break;
  1767. }
  1768. if (Offset == MI->Offset) {
  1769. // Collision, abort.
  1770. MI = ME;
  1771. break;
  1772. }
  1773. }
  1774. if (MI != MemOps.end()) {
  1775. MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
  1776. continue;
  1777. }
  1778. }
  1779. }
  1780. }
  1781. // Don't advance the iterator; The op will start a new chain next.
  1782. MBBI = I;
  1783. --Position;
  1784. // Fallthrough to look into existing chain.
  1785. } else if (MBBI->isDebugInstr()) {
  1786. continue;
  1787. } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
  1788. MBBI->getOpcode() == ARM::t2STRDi8) {
  1789. // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
  1790. // remember them because we may still be able to merge add/sub into them.
  1791. MergeBaseCandidates.push_back(&*MBBI);
  1792. }
  1793. // If we are here then the chain is broken; Extract candidates for a merge.
  1794. if (MemOps.size() > 0) {
  1795. FormCandidates(MemOps);
  1796. // Reset for the next chain.
  1797. CurrBase = 0;
  1798. CurrOpc = ~0u;
  1799. CurrPred = ARMCC::AL;
  1800. MemOps.clear();
  1801. }
  1802. }
  1803. if (MemOps.size() > 0)
  1804. FormCandidates(MemOps);
  1805. // Sort candidates so they get processed from end to begin of the basic
  1806. // block later; This is necessary for liveness calculation.
  1807. auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
  1808. return M0->InsertPos < M1->InsertPos;
  1809. };
  1810. llvm::sort(Candidates, LessThan);
  1811. // Go through list of candidates and merge.
  1812. bool Changed = false;
  1813. for (const MergeCandidate *Candidate : Candidates) {
  1814. if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
  1815. MachineInstr *Merged = MergeOpsUpdate(*Candidate);
  1816. // Merge preceding/trailing base inc/dec into the merged op.
  1817. if (Merged) {
  1818. Changed = true;
  1819. unsigned Opcode = Merged->getOpcode();
  1820. if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
  1821. MergeBaseUpdateLSDouble(*Merged);
  1822. else
  1823. MergeBaseUpdateLSMultiple(Merged);
  1824. } else {
  1825. for (MachineInstr *MI : Candidate->Instrs) {
  1826. if (MergeBaseUpdateLoadStore(MI))
  1827. Changed = true;
  1828. }
  1829. }
  1830. } else {
  1831. assert(Candidate->Instrs.size() == 1);
  1832. if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
  1833. Changed = true;
  1834. }
  1835. }
  1836. Candidates.clear();
  1837. // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
  1838. for (MachineInstr *MI : MergeBaseCandidates)
  1839. MergeBaseUpdateLSDouble(*MI);
  1840. MergeBaseCandidates.clear();
  1841. return Changed;
  1842. }
  1843. /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
  1844. /// into the preceding stack restore so it directly restore the value of LR
  1845. /// into pc.
  1846. /// ldmfd sp!, {..., lr}
  1847. /// bx lr
  1848. /// or
  1849. /// ldmfd sp!, {..., lr}
  1850. /// mov pc, lr
  1851. /// =>
  1852. /// ldmfd sp!, {..., pc}
  1853. bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
  1854. // Thumb1 LDM doesn't allow high registers.
  1855. if (isThumb1) return false;
  1856. if (MBB.empty()) return false;
  1857. MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  1858. if (MBBI != MBB.begin() && MBBI != MBB.end() &&
  1859. (MBBI->getOpcode() == ARM::BX_RET ||
  1860. MBBI->getOpcode() == ARM::tBX_RET ||
  1861. MBBI->getOpcode() == ARM::MOVPCLR)) {
  1862. MachineBasicBlock::iterator PrevI = std::prev(MBBI);
  1863. // Ignore any debug instructions.
  1864. while (PrevI->isDebugInstr() && PrevI != MBB.begin())
  1865. --PrevI;
  1866. MachineInstr &PrevMI = *PrevI;
  1867. unsigned Opcode = PrevMI.getOpcode();
  1868. if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
  1869. Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
  1870. Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
  1871. MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
  1872. if (MO.getReg() != ARM::LR)
  1873. return false;
  1874. unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
  1875. assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
  1876. Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
  1877. PrevMI.setDesc(TII->get(NewOpc));
  1878. MO.setReg(ARM::PC);
  1879. PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
  1880. MBB.erase(MBBI);
  1881. // We now restore LR into PC so it is not live-out of the return block
  1882. // anymore: Clear the CSI Restored bit.
  1883. MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
  1884. // CSI should be fixed after PrologEpilog Insertion
  1885. assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid");
  1886. for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
  1887. if (Info.getReg() == ARM::LR) {
  1888. Info.setRestored(false);
  1889. break;
  1890. }
  1891. }
  1892. return true;
  1893. }
  1894. }
  1895. return false;
  1896. }
  1897. bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
  1898. MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
  1899. if (MBBI == MBB.begin() || MBBI == MBB.end() ||
  1900. MBBI->getOpcode() != ARM::tBX_RET)
  1901. return false;
  1902. MachineBasicBlock::iterator Prev = MBBI;
  1903. --Prev;
  1904. if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
  1905. return false;
  1906. for (auto Use : Prev->uses())
  1907. if (Use.isKill()) {
  1908. assert(STI->hasV4TOps());
  1909. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
  1910. .addReg(Use.getReg(), RegState::Kill)
  1911. .add(predOps(ARMCC::AL))
  1912. .copyImplicitOps(*MBBI);
  1913. MBB.erase(MBBI);
  1914. MBB.erase(Prev);
  1915. return true;
  1916. }
  1917. llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
  1918. }
  1919. bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1920. if (skipFunction(Fn.getFunction()))
  1921. return false;
  1922. MF = &Fn;
  1923. STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
  1924. TL = STI->getTargetLowering();
  1925. AFI = Fn.getInfo<ARMFunctionInfo>();
  1926. TII = STI->getInstrInfo();
  1927. TRI = STI->getRegisterInfo();
  1928. RegClassInfoValid = false;
  1929. isThumb2 = AFI->isThumb2Function();
  1930. isThumb1 = AFI->isThumbFunction() && !isThumb2;
  1931. bool Modified = false;
  1932. for (MachineBasicBlock &MBB : Fn) {
  1933. Modified |= LoadStoreMultipleOpti(MBB);
  1934. if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress())
  1935. Modified |= MergeReturnIntoLDM(MBB);
  1936. if (isThumb1)
  1937. Modified |= CombineMovBx(MBB);
  1938. }
  1939. Allocator.DestroyAll();
  1940. return Modified;
  1941. }
  1942. #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
  1943. "ARM pre- register allocation load / store optimization pass"
  1944. namespace {
  1945. /// Pre- register allocation pass that move load / stores from consecutive
  1946. /// locations close to make it more likely they will be combined later.
  1947. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
  1948. static char ID;
  1949. AliasAnalysis *AA;
  1950. const DataLayout *TD;
  1951. const TargetInstrInfo *TII;
  1952. const TargetRegisterInfo *TRI;
  1953. const ARMSubtarget *STI;
  1954. MachineRegisterInfo *MRI;
  1955. MachineDominatorTree *DT;
  1956. MachineFunction *MF;
  1957. ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
  1958. bool runOnMachineFunction(MachineFunction &Fn) override;
  1959. StringRef getPassName() const override {
  1960. return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
  1961. }
  1962. void getAnalysisUsage(AnalysisUsage &AU) const override {
  1963. AU.addRequired<AAResultsWrapperPass>();
  1964. AU.addRequired<MachineDominatorTree>();
  1965. AU.addPreserved<MachineDominatorTree>();
  1966. MachineFunctionPass::getAnalysisUsage(AU);
  1967. }
  1968. private:
  1969. bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
  1970. unsigned &NewOpc, Register &EvenReg, Register &OddReg,
  1971. Register &BaseReg, int &Offset, Register &PredReg,
  1972. ARMCC::CondCodes &Pred, bool &isT2);
  1973. bool RescheduleOps(MachineBasicBlock *MBB,
  1974. SmallVectorImpl<MachineInstr *> &Ops,
  1975. unsigned Base, bool isLd,
  1976. DenseMap<MachineInstr*, unsigned> &MI2LocMap);
  1977. bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
  1978. bool DistributeIncrements();
  1979. bool DistributeIncrements(Register Base);
  1980. };
  1981. } // end anonymous namespace
  1982. char ARMPreAllocLoadStoreOpt::ID = 0;
  1983. INITIALIZE_PASS_BEGIN(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
  1984. ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
  1985. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  1986. INITIALIZE_PASS_END(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
  1987. ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
  1988. // Limit the number of instructions to be rescheduled.
  1989. // FIXME: tune this limit, and/or come up with some better heuristics.
  1990. static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
  1991. cl::init(8), cl::Hidden);
  1992. bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1993. if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
  1994. return false;
  1995. TD = &Fn.getDataLayout();
  1996. STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
  1997. TII = STI->getInstrInfo();
  1998. TRI = STI->getRegisterInfo();
  1999. MRI = &Fn.getRegInfo();
  2000. DT = &getAnalysis<MachineDominatorTree>();
  2001. MF = &Fn;
  2002. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  2003. bool Modified = DistributeIncrements();
  2004. for (MachineBasicBlock &MFI : Fn)
  2005. Modified |= RescheduleLoadStoreInstrs(&MFI);
  2006. return Modified;
  2007. }
  2008. static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
  2009. MachineBasicBlock::iterator I,
  2010. MachineBasicBlock::iterator E,
  2011. SmallPtrSetImpl<MachineInstr*> &MemOps,
  2012. SmallSet<unsigned, 4> &MemRegs,
  2013. const TargetRegisterInfo *TRI,
  2014. AliasAnalysis *AA) {
  2015. // Are there stores / loads / calls between them?
  2016. SmallSet<unsigned, 4> AddedRegPressure;
  2017. while (++I != E) {
  2018. if (I->isDebugInstr() || MemOps.count(&*I))
  2019. continue;
  2020. if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
  2021. return false;
  2022. if (I->mayStore() || (!isLd && I->mayLoad()))
  2023. for (MachineInstr *MemOp : MemOps)
  2024. if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
  2025. return false;
  2026. for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
  2027. MachineOperand &MO = I->getOperand(j);
  2028. if (!MO.isReg())
  2029. continue;
  2030. Register Reg = MO.getReg();
  2031. if (MO.isDef() && TRI->regsOverlap(Reg, Base))
  2032. return false;
  2033. if (Reg != Base && !MemRegs.count(Reg))
  2034. AddedRegPressure.insert(Reg);
  2035. }
  2036. }
  2037. // Estimate register pressure increase due to the transformation.
  2038. if (MemRegs.size() <= 4)
  2039. // Ok if we are moving small number of instructions.
  2040. return true;
  2041. return AddedRegPressure.size() <= MemRegs.size() * 2;
  2042. }
  2043. bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(
  2044. MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, unsigned &NewOpc,
  2045. Register &FirstReg, Register &SecondReg, Register &BaseReg, int &Offset,
  2046. Register &PredReg, ARMCC::CondCodes &Pred, bool &isT2) {
  2047. // Make sure we're allowed to generate LDRD/STRD.
  2048. if (!STI->hasV5TEOps())
  2049. return false;
  2050. // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
  2051. unsigned Scale = 1;
  2052. unsigned Opcode = Op0->getOpcode();
  2053. if (Opcode == ARM::LDRi12) {
  2054. NewOpc = ARM::LDRD;
  2055. } else if (Opcode == ARM::STRi12) {
  2056. NewOpc = ARM::STRD;
  2057. } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
  2058. NewOpc = ARM::t2LDRDi8;
  2059. Scale = 4;
  2060. isT2 = true;
  2061. } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
  2062. NewOpc = ARM::t2STRDi8;
  2063. Scale = 4;
  2064. isT2 = true;
  2065. } else {
  2066. return false;
  2067. }
  2068. // Make sure the base address satisfies i64 ld / st alignment requirement.
  2069. // At the moment, we ignore the memoryoperand's value.
  2070. // If we want to use AliasAnalysis, we should check it accordingly.
  2071. if (!Op0->hasOneMemOperand() ||
  2072. (*Op0->memoperands_begin())->isVolatile() ||
  2073. (*Op0->memoperands_begin())->isAtomic())
  2074. return false;
  2075. Align Alignment = (*Op0->memoperands_begin())->getAlign();
  2076. const Function &Func = MF->getFunction();
  2077. Align ReqAlign =
  2078. STI->hasV6Ops() ? TD->getABITypeAlign(Type::getInt64Ty(Func.getContext()))
  2079. : Align(8); // Pre-v6 need 8-byte align
  2080. if (Alignment < ReqAlign)
  2081. return false;
  2082. // Then make sure the immediate offset fits.
  2083. int OffImm = getMemoryOpOffset(*Op0);
  2084. if (isT2) {
  2085. int Limit = (1 << 8) * Scale;
  2086. if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
  2087. return false;
  2088. Offset = OffImm;
  2089. } else {
  2090. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  2091. if (OffImm < 0) {
  2092. AddSub = ARM_AM::sub;
  2093. OffImm = - OffImm;
  2094. }
  2095. int Limit = (1 << 8) * Scale;
  2096. if (OffImm >= Limit || (OffImm & (Scale-1)))
  2097. return false;
  2098. Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
  2099. }
  2100. FirstReg = Op0->getOperand(0).getReg();
  2101. SecondReg = Op1->getOperand(0).getReg();
  2102. if (FirstReg == SecondReg)
  2103. return false;
  2104. BaseReg = Op0->getOperand(1).getReg();
  2105. Pred = getInstrPredicate(*Op0, PredReg);
  2106. dl = Op0->getDebugLoc();
  2107. return true;
  2108. }
  2109. bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
  2110. SmallVectorImpl<MachineInstr *> &Ops,
  2111. unsigned Base, bool isLd,
  2112. DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
  2113. bool RetVal = false;
  2114. // Sort by offset (in reverse order).
  2115. llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
  2116. int LOffset = getMemoryOpOffset(*LHS);
  2117. int ROffset = getMemoryOpOffset(*RHS);
  2118. assert(LHS == RHS || LOffset != ROffset);
  2119. return LOffset > ROffset;
  2120. });
  2121. // The loads / stores of the same base are in order. Scan them from first to
  2122. // last and check for the following:
  2123. // 1. Any def of base.
  2124. // 2. Any gaps.
  2125. while (Ops.size() > 1) {
  2126. unsigned FirstLoc = ~0U;
  2127. unsigned LastLoc = 0;
  2128. MachineInstr *FirstOp = nullptr;
  2129. MachineInstr *LastOp = nullptr;
  2130. int LastOffset = 0;
  2131. unsigned LastOpcode = 0;
  2132. unsigned LastBytes = 0;
  2133. unsigned NumMove = 0;
  2134. for (MachineInstr *Op : llvm::reverse(Ops)) {
  2135. // Make sure each operation has the same kind.
  2136. unsigned LSMOpcode
  2137. = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
  2138. if (LastOpcode && LSMOpcode != LastOpcode)
  2139. break;
  2140. // Check that we have a continuous set of offsets.
  2141. int Offset = getMemoryOpOffset(*Op);
  2142. unsigned Bytes = getLSMultipleTransferSize(Op);
  2143. if (LastBytes) {
  2144. if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
  2145. break;
  2146. }
  2147. // Don't try to reschedule too many instructions.
  2148. if (NumMove == InstReorderLimit)
  2149. break;
  2150. // Found a mergable instruction; save information about it.
  2151. ++NumMove;
  2152. LastOffset = Offset;
  2153. LastBytes = Bytes;
  2154. LastOpcode = LSMOpcode;
  2155. unsigned Loc = MI2LocMap[Op];
  2156. if (Loc <= FirstLoc) {
  2157. FirstLoc = Loc;
  2158. FirstOp = Op;
  2159. }
  2160. if (Loc >= LastLoc) {
  2161. LastLoc = Loc;
  2162. LastOp = Op;
  2163. }
  2164. }
  2165. if (NumMove <= 1)
  2166. Ops.pop_back();
  2167. else {
  2168. SmallPtrSet<MachineInstr*, 4> MemOps;
  2169. SmallSet<unsigned, 4> MemRegs;
  2170. for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
  2171. MemOps.insert(Ops[i]);
  2172. MemRegs.insert(Ops[i]->getOperand(0).getReg());
  2173. }
  2174. // Be conservative, if the instructions are too far apart, don't
  2175. // move them. We want to limit the increase of register pressure.
  2176. bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
  2177. if (DoMove)
  2178. DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
  2179. MemOps, MemRegs, TRI, AA);
  2180. if (!DoMove) {
  2181. for (unsigned i = 0; i != NumMove; ++i)
  2182. Ops.pop_back();
  2183. } else {
  2184. // This is the new location for the loads / stores.
  2185. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
  2186. while (InsertPos != MBB->end() &&
  2187. (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
  2188. ++InsertPos;
  2189. // If we are moving a pair of loads / stores, see if it makes sense
  2190. // to try to allocate a pair of registers that can form register pairs.
  2191. MachineInstr *Op0 = Ops.back();
  2192. MachineInstr *Op1 = Ops[Ops.size()-2];
  2193. Register FirstReg, SecondReg;
  2194. Register BaseReg, PredReg;
  2195. ARMCC::CondCodes Pred = ARMCC::AL;
  2196. bool isT2 = false;
  2197. unsigned NewOpc = 0;
  2198. int Offset = 0;
  2199. DebugLoc dl;
  2200. if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
  2201. FirstReg, SecondReg, BaseReg,
  2202. Offset, PredReg, Pred, isT2)) {
  2203. Ops.pop_back();
  2204. Ops.pop_back();
  2205. const MCInstrDesc &MCID = TII->get(NewOpc);
  2206. const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
  2207. MRI->constrainRegClass(FirstReg, TRC);
  2208. MRI->constrainRegClass(SecondReg, TRC);
  2209. // Form the pair instruction.
  2210. if (isLd) {
  2211. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
  2212. .addReg(FirstReg, RegState::Define)
  2213. .addReg(SecondReg, RegState::Define)
  2214. .addReg(BaseReg);
  2215. // FIXME: We're converting from LDRi12 to an insn that still
  2216. // uses addrmode2, so we need an explicit offset reg. It should
  2217. // always by reg0 since we're transforming LDRi12s.
  2218. if (!isT2)
  2219. MIB.addReg(0);
  2220. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  2221. MIB.cloneMergedMemRefs({Op0, Op1});
  2222. LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
  2223. ++NumLDRDFormed;
  2224. } else {
  2225. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
  2226. .addReg(FirstReg)
  2227. .addReg(SecondReg)
  2228. .addReg(BaseReg);
  2229. // FIXME: We're converting from LDRi12 to an insn that still
  2230. // uses addrmode2, so we need an explicit offset reg. It should
  2231. // always by reg0 since we're transforming STRi12s.
  2232. if (!isT2)
  2233. MIB.addReg(0);
  2234. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  2235. MIB.cloneMergedMemRefs({Op0, Op1});
  2236. LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
  2237. ++NumSTRDFormed;
  2238. }
  2239. MBB->erase(Op0);
  2240. MBB->erase(Op1);
  2241. if (!isT2) {
  2242. // Add register allocation hints to form register pairs.
  2243. MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
  2244. MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
  2245. }
  2246. } else {
  2247. for (unsigned i = 0; i != NumMove; ++i) {
  2248. MachineInstr *Op = Ops.pop_back_val();
  2249. MBB->splice(InsertPos, MBB, Op);
  2250. }
  2251. }
  2252. NumLdStMoved += NumMove;
  2253. RetVal = true;
  2254. }
  2255. }
  2256. }
  2257. return RetVal;
  2258. }
  2259. bool
  2260. ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
  2261. bool RetVal = false;
  2262. DenseMap<MachineInstr*, unsigned> MI2LocMap;
  2263. using MapIt = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator;
  2264. using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
  2265. using BaseVec = SmallVector<unsigned, 4>;
  2266. Base2InstMap Base2LdsMap;
  2267. Base2InstMap Base2StsMap;
  2268. BaseVec LdBases;
  2269. BaseVec StBases;
  2270. unsigned Loc = 0;
  2271. MachineBasicBlock::iterator MBBI = MBB->begin();
  2272. MachineBasicBlock::iterator E = MBB->end();
  2273. while (MBBI != E) {
  2274. for (; MBBI != E; ++MBBI) {
  2275. MachineInstr &MI = *MBBI;
  2276. if (MI.isCall() || MI.isTerminator()) {
  2277. // Stop at barriers.
  2278. ++MBBI;
  2279. break;
  2280. }
  2281. if (!MI.isDebugInstr())
  2282. MI2LocMap[&MI] = ++Loc;
  2283. if (!isMemoryOp(MI))
  2284. continue;
  2285. Register PredReg;
  2286. if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
  2287. continue;
  2288. int Opc = MI.getOpcode();
  2289. bool isLd = isLoadSingle(Opc);
  2290. Register Base = MI.getOperand(1).getReg();
  2291. int Offset = getMemoryOpOffset(MI);
  2292. bool StopHere = false;
  2293. auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) {
  2294. MapIt BI = Base2Ops.find(Base);
  2295. if (BI == Base2Ops.end()) {
  2296. Base2Ops[Base].push_back(&MI);
  2297. Bases.push_back(Base);
  2298. return;
  2299. }
  2300. for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
  2301. if (Offset == getMemoryOpOffset(*BI->second[i])) {
  2302. StopHere = true;
  2303. break;
  2304. }
  2305. }
  2306. if (!StopHere)
  2307. BI->second.push_back(&MI);
  2308. };
  2309. if (isLd)
  2310. FindBases(Base2LdsMap, LdBases);
  2311. else
  2312. FindBases(Base2StsMap, StBases);
  2313. if (StopHere) {
  2314. // Found a duplicate (a base+offset combination that's seen earlier).
  2315. // Backtrack.
  2316. --Loc;
  2317. break;
  2318. }
  2319. }
  2320. // Re-schedule loads.
  2321. for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
  2322. unsigned Base = LdBases[i];
  2323. SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
  2324. if (Lds.size() > 1)
  2325. RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
  2326. }
  2327. // Re-schedule stores.
  2328. for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
  2329. unsigned Base = StBases[i];
  2330. SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
  2331. if (Sts.size() > 1)
  2332. RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
  2333. }
  2334. if (MBBI != E) {
  2335. Base2LdsMap.clear();
  2336. Base2StsMap.clear();
  2337. LdBases.clear();
  2338. StBases.clear();
  2339. }
  2340. }
  2341. return RetVal;
  2342. }
  2343. // Get the Base register operand index from the memory access MachineInst if we
  2344. // should attempt to distribute postinc on it. Return -1 if not of a valid
  2345. // instruction type. If it returns an index, it is assumed that instruction is a
  2346. // r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
  2347. static int getBaseOperandIndex(MachineInstr &MI) {
  2348. switch (MI.getOpcode()) {
  2349. case ARM::MVE_VLDRBS16:
  2350. case ARM::MVE_VLDRBS32:
  2351. case ARM::MVE_VLDRBU16:
  2352. case ARM::MVE_VLDRBU32:
  2353. case ARM::MVE_VLDRHS32:
  2354. case ARM::MVE_VLDRHU32:
  2355. case ARM::MVE_VLDRBU8:
  2356. case ARM::MVE_VLDRHU16:
  2357. case ARM::MVE_VLDRWU32:
  2358. case ARM::MVE_VSTRB16:
  2359. case ARM::MVE_VSTRB32:
  2360. case ARM::MVE_VSTRH32:
  2361. case ARM::MVE_VSTRBU8:
  2362. case ARM::MVE_VSTRHU16:
  2363. case ARM::MVE_VSTRWU32:
  2364. case ARM::t2LDRHi8:
  2365. case ARM::t2LDRHi12:
  2366. case ARM::t2LDRSHi8:
  2367. case ARM::t2LDRSHi12:
  2368. case ARM::t2LDRBi8:
  2369. case ARM::t2LDRBi12:
  2370. case ARM::t2LDRSBi8:
  2371. case ARM::t2LDRSBi12:
  2372. case ARM::t2STRBi8:
  2373. case ARM::t2STRBi12:
  2374. case ARM::t2STRHi8:
  2375. case ARM::t2STRHi12:
  2376. return 1;
  2377. case ARM::MVE_VLDRBS16_post:
  2378. case ARM::MVE_VLDRBS32_post:
  2379. case ARM::MVE_VLDRBU16_post:
  2380. case ARM::MVE_VLDRBU32_post:
  2381. case ARM::MVE_VLDRHS32_post:
  2382. case ARM::MVE_VLDRHU32_post:
  2383. case ARM::MVE_VLDRBU8_post:
  2384. case ARM::MVE_VLDRHU16_post:
  2385. case ARM::MVE_VLDRWU32_post:
  2386. case ARM::MVE_VSTRB16_post:
  2387. case ARM::MVE_VSTRB32_post:
  2388. case ARM::MVE_VSTRH32_post:
  2389. case ARM::MVE_VSTRBU8_post:
  2390. case ARM::MVE_VSTRHU16_post:
  2391. case ARM::MVE_VSTRWU32_post:
  2392. case ARM::MVE_VLDRBS16_pre:
  2393. case ARM::MVE_VLDRBS32_pre:
  2394. case ARM::MVE_VLDRBU16_pre:
  2395. case ARM::MVE_VLDRBU32_pre:
  2396. case ARM::MVE_VLDRHS32_pre:
  2397. case ARM::MVE_VLDRHU32_pre:
  2398. case ARM::MVE_VLDRBU8_pre:
  2399. case ARM::MVE_VLDRHU16_pre:
  2400. case ARM::MVE_VLDRWU32_pre:
  2401. case ARM::MVE_VSTRB16_pre:
  2402. case ARM::MVE_VSTRB32_pre:
  2403. case ARM::MVE_VSTRH32_pre:
  2404. case ARM::MVE_VSTRBU8_pre:
  2405. case ARM::MVE_VSTRHU16_pre:
  2406. case ARM::MVE_VSTRWU32_pre:
  2407. return 2;
  2408. }
  2409. return -1;
  2410. }
  2411. static bool isPostIndex(MachineInstr &MI) {
  2412. switch (MI.getOpcode()) {
  2413. case ARM::MVE_VLDRBS16_post:
  2414. case ARM::MVE_VLDRBS32_post:
  2415. case ARM::MVE_VLDRBU16_post:
  2416. case ARM::MVE_VLDRBU32_post:
  2417. case ARM::MVE_VLDRHS32_post:
  2418. case ARM::MVE_VLDRHU32_post:
  2419. case ARM::MVE_VLDRBU8_post:
  2420. case ARM::MVE_VLDRHU16_post:
  2421. case ARM::MVE_VLDRWU32_post:
  2422. case ARM::MVE_VSTRB16_post:
  2423. case ARM::MVE_VSTRB32_post:
  2424. case ARM::MVE_VSTRH32_post:
  2425. case ARM::MVE_VSTRBU8_post:
  2426. case ARM::MVE_VSTRHU16_post:
  2427. case ARM::MVE_VSTRWU32_post:
  2428. return true;
  2429. }
  2430. return false;
  2431. }
  2432. static bool isPreIndex(MachineInstr &MI) {
  2433. switch (MI.getOpcode()) {
  2434. case ARM::MVE_VLDRBS16_pre:
  2435. case ARM::MVE_VLDRBS32_pre:
  2436. case ARM::MVE_VLDRBU16_pre:
  2437. case ARM::MVE_VLDRBU32_pre:
  2438. case ARM::MVE_VLDRHS32_pre:
  2439. case ARM::MVE_VLDRHU32_pre:
  2440. case ARM::MVE_VLDRBU8_pre:
  2441. case ARM::MVE_VLDRHU16_pre:
  2442. case ARM::MVE_VLDRWU32_pre:
  2443. case ARM::MVE_VSTRB16_pre:
  2444. case ARM::MVE_VSTRB32_pre:
  2445. case ARM::MVE_VSTRH32_pre:
  2446. case ARM::MVE_VSTRBU8_pre:
  2447. case ARM::MVE_VSTRHU16_pre:
  2448. case ARM::MVE_VSTRWU32_pre:
  2449. return true;
  2450. }
  2451. return false;
  2452. }
  2453. // Given a memory access Opcode, check that the give Imm would be a valid Offset
  2454. // for this instruction (same as isLegalAddressImm), Or if the instruction
  2455. // could be easily converted to one where that was valid. For example converting
  2456. // t2LDRi12 to t2LDRi8 for negative offsets. Works in conjunction with
  2457. // AdjustBaseAndOffset below.
  2458. static bool isLegalOrConvertableAddressImm(unsigned Opcode, int Imm,
  2459. const TargetInstrInfo *TII,
  2460. int &CodesizeEstimate) {
  2461. if (isLegalAddressImm(Opcode, Imm, TII))
  2462. return true;
  2463. // We can convert AddrModeT2_i12 to AddrModeT2_i8neg.
  2464. const MCInstrDesc &Desc = TII->get(Opcode);
  2465. unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
  2466. switch (AddrMode) {
  2467. case ARMII::AddrModeT2_i12:
  2468. CodesizeEstimate += 1;
  2469. return Imm < 0 && -Imm < ((1 << 8) * 1);
  2470. }
  2471. return false;
  2472. }
  2473. // Given an MI adjust its address BaseReg to use NewBaseReg and address offset
  2474. // by -Offset. This can either happen in-place or be a replacement as MI is
  2475. // converted to another instruction type.
  2476. static void AdjustBaseAndOffset(MachineInstr *MI, Register NewBaseReg,
  2477. int Offset, const TargetInstrInfo *TII,
  2478. const TargetRegisterInfo *TRI) {
  2479. // Set the Base reg
  2480. unsigned BaseOp = getBaseOperandIndex(*MI);
  2481. MI->getOperand(BaseOp).setReg(NewBaseReg);
  2482. // and constrain the reg class to that required by the instruction.
  2483. MachineFunction *MF = MI->getMF();
  2484. MachineRegisterInfo &MRI = MF->getRegInfo();
  2485. const MCInstrDesc &MCID = TII->get(MI->getOpcode());
  2486. const TargetRegisterClass *TRC = TII->getRegClass(MCID, BaseOp, TRI, *MF);
  2487. MRI.constrainRegClass(NewBaseReg, TRC);
  2488. int OldOffset = MI->getOperand(BaseOp + 1).getImm();
  2489. if (isLegalAddressImm(MI->getOpcode(), OldOffset - Offset, TII))
  2490. MI->getOperand(BaseOp + 1).setImm(OldOffset - Offset);
  2491. else {
  2492. unsigned ConvOpcode;
  2493. switch (MI->getOpcode()) {
  2494. case ARM::t2LDRHi12:
  2495. ConvOpcode = ARM::t2LDRHi8;
  2496. break;
  2497. case ARM::t2LDRSHi12:
  2498. ConvOpcode = ARM::t2LDRSHi8;
  2499. break;
  2500. case ARM::t2LDRBi12:
  2501. ConvOpcode = ARM::t2LDRBi8;
  2502. break;
  2503. case ARM::t2LDRSBi12:
  2504. ConvOpcode = ARM::t2LDRSBi8;
  2505. break;
  2506. case ARM::t2STRHi12:
  2507. ConvOpcode = ARM::t2STRHi8;
  2508. break;
  2509. case ARM::t2STRBi12:
  2510. ConvOpcode = ARM::t2STRBi8;
  2511. break;
  2512. default:
  2513. llvm_unreachable("Unhandled convertable opcode");
  2514. }
  2515. assert(isLegalAddressImm(ConvOpcode, OldOffset - Offset, TII) &&
  2516. "Illegal Address Immediate after convert!");
  2517. const MCInstrDesc &MCID = TII->get(ConvOpcode);
  2518. BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
  2519. .add(MI->getOperand(0))
  2520. .add(MI->getOperand(1))
  2521. .addImm(OldOffset - Offset)
  2522. .add(MI->getOperand(3))
  2523. .add(MI->getOperand(4))
  2524. .cloneMemRefs(*MI);
  2525. MI->eraseFromParent();
  2526. }
  2527. }
  2528. static MachineInstr *createPostIncLoadStore(MachineInstr *MI, int Offset,
  2529. Register NewReg,
  2530. const TargetInstrInfo *TII,
  2531. const TargetRegisterInfo *TRI) {
  2532. MachineFunction *MF = MI->getMF();
  2533. MachineRegisterInfo &MRI = MF->getRegInfo();
  2534. unsigned NewOpcode = getPostIndexedLoadStoreOpcode(
  2535. MI->getOpcode(), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
  2536. const MCInstrDesc &MCID = TII->get(NewOpcode);
  2537. // Constrain the def register class
  2538. const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
  2539. MRI.constrainRegClass(NewReg, TRC);
  2540. // And do the same for the base operand
  2541. TRC = TII->getRegClass(MCID, 2, TRI, *MF);
  2542. MRI.constrainRegClass(MI->getOperand(1).getReg(), TRC);
  2543. unsigned AddrMode = (MCID.TSFlags & ARMII::AddrModeMask);
  2544. switch (AddrMode) {
  2545. case ARMII::AddrModeT2_i7:
  2546. case ARMII::AddrModeT2_i7s2:
  2547. case ARMII::AddrModeT2_i7s4:
  2548. // Any MVE load/store
  2549. return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
  2550. .addReg(NewReg, RegState::Define)
  2551. .add(MI->getOperand(0))
  2552. .add(MI->getOperand(1))
  2553. .addImm(Offset)
  2554. .add(MI->getOperand(3))
  2555. .add(MI->getOperand(4))
  2556. .add(MI->getOperand(5))
  2557. .cloneMemRefs(*MI);
  2558. case ARMII::AddrModeT2_i8:
  2559. if (MI->mayLoad()) {
  2560. return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
  2561. .add(MI->getOperand(0))
  2562. .addReg(NewReg, RegState::Define)
  2563. .add(MI->getOperand(1))
  2564. .addImm(Offset)
  2565. .add(MI->getOperand(3))
  2566. .add(MI->getOperand(4))
  2567. .cloneMemRefs(*MI);
  2568. } else {
  2569. return BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), MCID)
  2570. .addReg(NewReg, RegState::Define)
  2571. .add(MI->getOperand(0))
  2572. .add(MI->getOperand(1))
  2573. .addImm(Offset)
  2574. .add(MI->getOperand(3))
  2575. .add(MI->getOperand(4))
  2576. .cloneMemRefs(*MI);
  2577. }
  2578. default:
  2579. llvm_unreachable("Unhandled createPostIncLoadStore");
  2580. }
  2581. }
  2582. // Given a Base Register, optimise the load/store uses to attempt to create more
  2583. // post-inc accesses and less register moves. We do this by taking zero offset
  2584. // loads/stores with an add, and convert them to a postinc load/store of the
  2585. // same type. Any subsequent accesses will be adjusted to use and account for
  2586. // the post-inc value.
  2587. // For example:
  2588. // LDR #0 LDR_POSTINC #16
  2589. // LDR #4 LDR #-12
  2590. // LDR #8 LDR #-8
  2591. // LDR #12 LDR #-4
  2592. // ADD #16
  2593. //
  2594. // At the same time if we do not find an increment but do find an existing
  2595. // pre/post inc instruction, we can still adjust the offsets of subsequent
  2596. // instructions to save the register move that would otherwise be needed for the
  2597. // in-place increment.
  2598. bool ARMPreAllocLoadStoreOpt::DistributeIncrements(Register Base) {
  2599. // We are looking for:
  2600. // One zero offset load/store that can become postinc
  2601. MachineInstr *BaseAccess = nullptr;
  2602. MachineInstr *PrePostInc = nullptr;
  2603. // An increment that can be folded in
  2604. MachineInstr *Increment = nullptr;
  2605. // Other accesses after BaseAccess that will need to be updated to use the
  2606. // postinc value.
  2607. SmallPtrSet<MachineInstr *, 8> OtherAccesses;
  2608. for (auto &Use : MRI->use_nodbg_instructions(Base)) {
  2609. if (!Increment && getAddSubImmediate(Use) != 0) {
  2610. Increment = &Use;
  2611. continue;
  2612. }
  2613. int BaseOp = getBaseOperandIndex(Use);
  2614. if (BaseOp == -1)
  2615. return false;
  2616. if (!Use.getOperand(BaseOp).isReg() ||
  2617. Use.getOperand(BaseOp).getReg() != Base)
  2618. return false;
  2619. if (isPreIndex(Use) || isPostIndex(Use))
  2620. PrePostInc = &Use;
  2621. else if (Use.getOperand(BaseOp + 1).getImm() == 0)
  2622. BaseAccess = &Use;
  2623. else
  2624. OtherAccesses.insert(&Use);
  2625. }
  2626. int IncrementOffset;
  2627. Register NewBaseReg;
  2628. if (BaseAccess && Increment) {
  2629. if (PrePostInc || BaseAccess->getParent() != Increment->getParent())
  2630. return false;
  2631. Register PredReg;
  2632. if (Increment->definesRegister(ARM::CPSR) ||
  2633. getInstrPredicate(*Increment, PredReg) != ARMCC::AL)
  2634. return false;
  2635. LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on VirtualReg "
  2636. << Base.virtRegIndex() << "\n");
  2637. // Make sure that Increment has no uses before BaseAccess.
  2638. for (MachineInstr &Use :
  2639. MRI->use_nodbg_instructions(Increment->getOperand(0).getReg())) {
  2640. if (!DT->dominates(BaseAccess, &Use) || &Use == BaseAccess) {
  2641. LLVM_DEBUG(dbgs() << " BaseAccess doesn't dominate use of increment\n");
  2642. return false;
  2643. }
  2644. }
  2645. // Make sure that Increment can be folded into Base
  2646. IncrementOffset = getAddSubImmediate(*Increment);
  2647. unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode(
  2648. BaseAccess->getOpcode(), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
  2649. if (!isLegalAddressImm(NewPostIncOpcode, IncrementOffset, TII)) {
  2650. LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on postinc\n");
  2651. return false;
  2652. }
  2653. }
  2654. else if (PrePostInc) {
  2655. // If we already have a pre/post index load/store then set BaseAccess,
  2656. // IncrementOffset and NewBaseReg to the values it already produces,
  2657. // allowing us to update and subsequent uses of BaseOp reg with the
  2658. // incremented value.
  2659. if (Increment)
  2660. return false;
  2661. LLVM_DEBUG(dbgs() << "\nAttempting to distribute increments on already "
  2662. << "indexed VirtualReg " << Base.virtRegIndex() << "\n");
  2663. int BaseOp = getBaseOperandIndex(*PrePostInc);
  2664. IncrementOffset = PrePostInc->getOperand(BaseOp+1).getImm();
  2665. BaseAccess = PrePostInc;
  2666. NewBaseReg = PrePostInc->getOperand(0).getReg();
  2667. }
  2668. else
  2669. return false;
  2670. // And make sure that the negative value of increment can be added to all
  2671. // other offsets after the BaseAccess. We rely on either
  2672. // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
  2673. // to keep things simple.
  2674. // This also adds a simple codesize metric, to detect if an instruction (like
  2675. // t2LDRBi12) which can often be shrunk to a thumb1 instruction (tLDRBi)
  2676. // cannot because it is converted to something else (t2LDRBi8). We start this
  2677. // at -1 for the gain from removing the increment.
  2678. SmallPtrSet<MachineInstr *, 4> SuccessorAccesses;
  2679. int CodesizeEstimate = -1;
  2680. for (auto *Use : OtherAccesses) {
  2681. if (DT->dominates(BaseAccess, Use)) {
  2682. SuccessorAccesses.insert(Use);
  2683. unsigned BaseOp = getBaseOperandIndex(*Use);
  2684. if (!isLegalOrConvertableAddressImm(Use->getOpcode(),
  2685. Use->getOperand(BaseOp + 1).getImm() -
  2686. IncrementOffset,
  2687. TII, CodesizeEstimate)) {
  2688. LLVM_DEBUG(dbgs() << " Illegal addressing mode immediate on use\n");
  2689. return false;
  2690. }
  2691. } else if (!DT->dominates(Use, BaseAccess)) {
  2692. LLVM_DEBUG(
  2693. dbgs() << " Unknown dominance relation between Base and Use\n");
  2694. return false;
  2695. }
  2696. }
  2697. if (STI->hasMinSize() && CodesizeEstimate > 0) {
  2698. LLVM_DEBUG(dbgs() << " Expected to grow instructions under minsize\n");
  2699. return false;
  2700. }
  2701. if (!PrePostInc) {
  2702. // Replace BaseAccess with a post inc
  2703. LLVM_DEBUG(dbgs() << "Changing: "; BaseAccess->dump());
  2704. LLVM_DEBUG(dbgs() << " And : "; Increment->dump());
  2705. NewBaseReg = Increment->getOperand(0).getReg();
  2706. MachineInstr *BaseAccessPost =
  2707. createPostIncLoadStore(BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
  2708. BaseAccess->eraseFromParent();
  2709. Increment->eraseFromParent();
  2710. (void)BaseAccessPost;
  2711. LLVM_DEBUG(dbgs() << " To : "; BaseAccessPost->dump());
  2712. }
  2713. for (auto *Use : SuccessorAccesses) {
  2714. LLVM_DEBUG(dbgs() << "Changing: "; Use->dump());
  2715. AdjustBaseAndOffset(Use, NewBaseReg, IncrementOffset, TII, TRI);
  2716. LLVM_DEBUG(dbgs() << " To : "; Use->dump());
  2717. }
  2718. // Remove the kill flag from all uses of NewBaseReg, in case any old uses
  2719. // remain.
  2720. for (MachineOperand &Op : MRI->use_nodbg_operands(NewBaseReg))
  2721. Op.setIsKill(false);
  2722. return true;
  2723. }
  2724. bool ARMPreAllocLoadStoreOpt::DistributeIncrements() {
  2725. bool Changed = false;
  2726. SmallSetVector<Register, 4> Visited;
  2727. for (auto &MBB : *MF) {
  2728. for (auto &MI : MBB) {
  2729. int BaseOp = getBaseOperandIndex(MI);
  2730. if (BaseOp == -1 || !MI.getOperand(BaseOp).isReg())
  2731. continue;
  2732. Register Base = MI.getOperand(BaseOp).getReg();
  2733. if (!Base.isVirtual() || Visited.count(Base))
  2734. continue;
  2735. Visited.insert(Base);
  2736. }
  2737. }
  2738. for (auto Base : Visited)
  2739. Changed |= DistributeIncrements(Base);
  2740. return Changed;
  2741. }
  2742. /// Returns an instance of the load / store optimization pass.
  2743. FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
  2744. if (PreAlloc)
  2745. return new ARMPreAllocLoadStoreOpt();
  2746. return new ARMLoadStoreOpt();
  2747. }