ARMExpandPseudoInsts.cpp 133 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174
  1. //===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file contains a pass that expands pseudo instructions into target
  10. // instructions to allow proper scheduling, if-conversion, and other late
  11. // optimizations. This pass should be run after register allocation but before
  12. // the post-regalloc scheduling pass.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "ARM.h"
  16. #include "ARMBaseInstrInfo.h"
  17. #include "ARMBaseRegisterInfo.h"
  18. #include "ARMConstantPoolValue.h"
  19. #include "ARMMachineFunctionInfo.h"
  20. #include "ARMSubtarget.h"
  21. #include "MCTargetDesc/ARMAddressingModes.h"
  22. #include "llvm/CodeGen/LivePhysRegs.h"
  23. #include "llvm/CodeGen/MachineFrameInfo.h"
  24. #include "llvm/CodeGen/MachineFunctionPass.h"
  25. #include "llvm/MC/MCAsmInfo.h"
  26. #include "llvm/Support/Debug.h"
  27. using namespace llvm;
  28. #define DEBUG_TYPE "arm-pseudo"
  29. static cl::opt<bool>
  30. VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
  31. cl::desc("Verify machine code after expanding ARM pseudos"));
  32. #define ARM_EXPAND_PSEUDO_NAME "ARM pseudo instruction expansion pass"
  33. namespace {
  34. class ARMExpandPseudo : public MachineFunctionPass {
  35. public:
  36. static char ID;
  37. ARMExpandPseudo() : MachineFunctionPass(ID) {}
  38. const ARMBaseInstrInfo *TII;
  39. const TargetRegisterInfo *TRI;
  40. const ARMSubtarget *STI;
  41. ARMFunctionInfo *AFI;
  42. bool runOnMachineFunction(MachineFunction &Fn) override;
  43. MachineFunctionProperties getRequiredProperties() const override {
  44. return MachineFunctionProperties().set(
  45. MachineFunctionProperties::Property::NoVRegs);
  46. }
  47. StringRef getPassName() const override {
  48. return ARM_EXPAND_PSEUDO_NAME;
  49. }
  50. private:
  51. void TransferImpOps(MachineInstr &OldMI,
  52. MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
  53. bool ExpandMI(MachineBasicBlock &MBB,
  54. MachineBasicBlock::iterator MBBI,
  55. MachineBasicBlock::iterator &NextMBBI);
  56. bool ExpandMBB(MachineBasicBlock &MBB);
  57. void ExpandVLD(MachineBasicBlock::iterator &MBBI);
  58. void ExpandVST(MachineBasicBlock::iterator &MBBI);
  59. void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
  60. void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
  61. unsigned Opc, bool IsExt);
  62. void ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI);
  63. void ExpandMOV32BitImm(MachineBasicBlock &MBB,
  64. MachineBasicBlock::iterator &MBBI);
  65. void CMSEClearGPRegs(MachineBasicBlock &MBB,
  66. MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
  67. const SmallVectorImpl<unsigned> &ClearRegs,
  68. unsigned ClobberReg);
  69. MachineBasicBlock &CMSEClearFPRegs(MachineBasicBlock &MBB,
  70. MachineBasicBlock::iterator MBBI);
  71. MachineBasicBlock &CMSEClearFPRegsV8(MachineBasicBlock &MBB,
  72. MachineBasicBlock::iterator MBBI,
  73. const BitVector &ClearRegs);
  74. MachineBasicBlock &CMSEClearFPRegsV81(MachineBasicBlock &MBB,
  75. MachineBasicBlock::iterator MBBI,
  76. const BitVector &ClearRegs);
  77. void CMSESaveClearFPRegs(MachineBasicBlock &MBB,
  78. MachineBasicBlock::iterator MBBI, DebugLoc &DL,
  79. const LivePhysRegs &LiveRegs,
  80. SmallVectorImpl<unsigned> &AvailableRegs);
  81. void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB,
  82. MachineBasicBlock::iterator MBBI, DebugLoc &DL,
  83. const LivePhysRegs &LiveRegs,
  84. SmallVectorImpl<unsigned> &ScratchRegs);
  85. void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
  86. MachineBasicBlock::iterator MBBI, DebugLoc &DL,
  87. const LivePhysRegs &LiveRegs);
  88. void CMSERestoreFPRegs(MachineBasicBlock &MBB,
  89. MachineBasicBlock::iterator MBBI, DebugLoc &DL,
  90. SmallVectorImpl<unsigned> &AvailableRegs);
  91. void CMSERestoreFPRegsV8(MachineBasicBlock &MBB,
  92. MachineBasicBlock::iterator MBBI, DebugLoc &DL,
  93. SmallVectorImpl<unsigned> &AvailableRegs);
  94. void CMSERestoreFPRegsV81(MachineBasicBlock &MBB,
  95. MachineBasicBlock::iterator MBBI, DebugLoc &DL,
  96. SmallVectorImpl<unsigned> &AvailableRegs);
  97. bool ExpandCMP_SWAP(MachineBasicBlock &MBB,
  98. MachineBasicBlock::iterator MBBI, unsigned LdrexOp,
  99. unsigned StrexOp, unsigned UxtOp,
  100. MachineBasicBlock::iterator &NextMBBI);
  101. bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
  102. MachineBasicBlock::iterator MBBI,
  103. MachineBasicBlock::iterator &NextMBBI);
  104. };
  105. char ARMExpandPseudo::ID = 0;
  106. }
  107. INITIALIZE_PASS(ARMExpandPseudo, DEBUG_TYPE, ARM_EXPAND_PSEUDO_NAME, false,
  108. false)
  109. /// TransferImpOps - Transfer implicit operands on the pseudo instruction to
  110. /// the instructions created from the expansion.
  111. void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
  112. MachineInstrBuilder &UseMI,
  113. MachineInstrBuilder &DefMI) {
  114. const MCInstrDesc &Desc = OldMI.getDesc();
  115. for (const MachineOperand &MO :
  116. llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
  117. assert(MO.isReg() && MO.getReg());
  118. if (MO.isUse())
  119. UseMI.add(MO);
  120. else
  121. DefMI.add(MO);
  122. }
  123. }
  124. namespace {
  125. // Constants for register spacing in NEON load/store instructions.
  126. // For quad-register load-lane and store-lane pseudo instructors, the
  127. // spacing is initially assumed to be EvenDblSpc, and that is changed to
  128. // OddDblSpc depending on the lane number operand.
  129. enum NEONRegSpacing {
  130. SingleSpc,
  131. SingleLowSpc , // Single spacing, low registers, three and four vectors.
  132. SingleHighQSpc, // Single spacing, high registers, four vectors.
  133. SingleHighTSpc, // Single spacing, high registers, three vectors.
  134. EvenDblSpc,
  135. OddDblSpc
  136. };
  137. // Entries for NEON load/store information table. The table is sorted by
  138. // PseudoOpc for fast binary-search lookups.
  139. struct NEONLdStTableEntry {
  140. uint16_t PseudoOpc;
  141. uint16_t RealOpc;
  142. bool IsLoad;
  143. bool isUpdating;
  144. bool hasWritebackOperand;
  145. uint8_t RegSpacing; // One of type NEONRegSpacing
  146. uint8_t NumRegs; // D registers loaded or stored
  147. uint8_t RegElts; // elements per D register; used for lane ops
  148. // FIXME: Temporary flag to denote whether the real instruction takes
  149. // a single register (like the encoding) or all of the registers in
  150. // the list (like the asm syntax and the isel DAG). When all definitions
  151. // are converted to take only the single encoded register, this will
  152. // go away.
  153. bool copyAllListRegs;
  154. // Comparison methods for binary search of the table.
  155. bool operator<(const NEONLdStTableEntry &TE) const {
  156. return PseudoOpc < TE.PseudoOpc;
  157. }
  158. friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
  159. return TE.PseudoOpc < PseudoOpc;
  160. }
  161. friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
  162. const NEONLdStTableEntry &TE) {
  163. return PseudoOpc < TE.PseudoOpc;
  164. }
  165. };
  166. }
  167. static const NEONLdStTableEntry NEONLdStTable[] = {
  168. { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true},
  169. { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true},
  170. { ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true},
  171. { ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true},
  172. { ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true},
  173. { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true},
  174. { ARM::VLD1d16QPseudo, ARM::VLD1d16Q, true, false, false, SingleSpc, 4, 4 ,false},
  175. { ARM::VLD1d16QPseudoWB_fixed, ARM::VLD1d16Qwb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
  176. { ARM::VLD1d16QPseudoWB_register, ARM::VLD1d16Qwb_register, true, true, true, SingleSpc, 4, 4 ,false},
  177. { ARM::VLD1d16TPseudo, ARM::VLD1d16T, true, false, false, SingleSpc, 3, 4 ,false},
  178. { ARM::VLD1d16TPseudoWB_fixed, ARM::VLD1d16Twb_fixed, true, true, false, SingleSpc, 3, 4 ,false},
  179. { ARM::VLD1d16TPseudoWB_register, ARM::VLD1d16Twb_register, true, true, true, SingleSpc, 3, 4 ,false},
  180. { ARM::VLD1d32QPseudo, ARM::VLD1d32Q, true, false, false, SingleSpc, 4, 2 ,false},
  181. { ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d32Qwb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
  182. { ARM::VLD1d32QPseudoWB_register, ARM::VLD1d32Qwb_register, true, true, true, SingleSpc, 4, 2 ,false},
  183. { ARM::VLD1d32TPseudo, ARM::VLD1d32T, true, false, false, SingleSpc, 3, 2 ,false},
  184. { ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d32Twb_fixed, true, true, false, SingleSpc, 3, 2 ,false},
  185. { ARM::VLD1d32TPseudoWB_register, ARM::VLD1d32Twb_register, true, true, true, SingleSpc, 3, 2 ,false},
  186. { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false},
  187. { ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false},
  188. { ARM::VLD1d64QPseudoWB_register, ARM::VLD1d64Qwb_register, true, true, true, SingleSpc, 4, 1 ,false},
  189. { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false},
  190. { ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false},
  191. { ARM::VLD1d64TPseudoWB_register, ARM::VLD1d64Twb_register, true, true, true, SingleSpc, 3, 1 ,false},
  192. { ARM::VLD1d8QPseudo, ARM::VLD1d8Q, true, false, false, SingleSpc, 4, 8 ,false},
  193. { ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d8Qwb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
  194. { ARM::VLD1d8QPseudoWB_register, ARM::VLD1d8Qwb_register, true, true, true, SingleSpc, 4, 8 ,false},
  195. { ARM::VLD1d8TPseudo, ARM::VLD1d8T, true, false, false, SingleSpc, 3, 8 ,false},
  196. { ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d8Twb_fixed, true, true, false, SingleSpc, 3, 8 ,false},
  197. { ARM::VLD1d8TPseudoWB_register, ARM::VLD1d8Twb_register, true, true, true, SingleSpc, 3, 8 ,false},
  198. { ARM::VLD1q16HighQPseudo, ARM::VLD1d16Q, true, false, false, SingleHighQSpc, 4, 4 ,false},
  199. { ARM::VLD1q16HighQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleHighQSpc, 4, 4 ,false},
  200. { ARM::VLD1q16HighTPseudo, ARM::VLD1d16T, true, false, false, SingleHighTSpc, 3, 4 ,false},
  201. { ARM::VLD1q16HighTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleHighTSpc, 3, 4 ,false},
  202. { ARM::VLD1q16LowQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleLowSpc, 4, 4 ,false},
  203. { ARM::VLD1q16LowTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleLowSpc, 3, 4 ,false},
  204. { ARM::VLD1q32HighQPseudo, ARM::VLD1d32Q, true, false, false, SingleHighQSpc, 4, 2 ,false},
  205. { ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleHighQSpc, 4, 2 ,false},
  206. { ARM::VLD1q32HighTPseudo, ARM::VLD1d32T, true, false, false, SingleHighTSpc, 3, 2 ,false},
  207. { ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleHighTSpc, 3, 2 ,false},
  208. { ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleLowSpc, 4, 2 ,false},
  209. { ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleLowSpc, 3, 2 ,false},
  210. { ARM::VLD1q64HighQPseudo, ARM::VLD1d64Q, true, false, false, SingleHighQSpc, 4, 1 ,false},
  211. { ARM::VLD1q64HighQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleHighQSpc, 4, 1 ,false},
  212. { ARM::VLD1q64HighTPseudo, ARM::VLD1d64T, true, false, false, SingleHighTSpc, 3, 1 ,false},
  213. { ARM::VLD1q64HighTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleHighTSpc, 3, 1 ,false},
  214. { ARM::VLD1q64LowQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleLowSpc, 4, 1 ,false},
  215. { ARM::VLD1q64LowTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleLowSpc, 3, 1 ,false},
  216. { ARM::VLD1q8HighQPseudo, ARM::VLD1d8Q, true, false, false, SingleHighQSpc, 4, 8 ,false},
  217. { ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleHighQSpc, 4, 8 ,false},
  218. { ARM::VLD1q8HighTPseudo, ARM::VLD1d8T, true, false, false, SingleHighTSpc, 3, 8 ,false},
  219. { ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleHighTSpc, 3, 8 ,false},
  220. { ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleLowSpc, 4, 8 ,false},
  221. { ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleLowSpc, 3, 8 ,false},
  222. { ARM::VLD2DUPq16EvenPseudo, ARM::VLD2DUPd16x2, true, false, false, EvenDblSpc, 2, 4 ,false},
  223. { ARM::VLD2DUPq16OddPseudo, ARM::VLD2DUPd16x2, true, false, false, OddDblSpc, 2, 4 ,false},
  224. { ARM::VLD2DUPq16OddPseudoWB_fixed, ARM::VLD2DUPd16x2wb_fixed, true, true, false, OddDblSpc, 2, 4 ,false},
  225. { ARM::VLD2DUPq16OddPseudoWB_register, ARM::VLD2DUPd16x2wb_register, true, true, true, OddDblSpc, 2, 4 ,false},
  226. { ARM::VLD2DUPq32EvenPseudo, ARM::VLD2DUPd32x2, true, false, false, EvenDblSpc, 2, 2 ,false},
  227. { ARM::VLD2DUPq32OddPseudo, ARM::VLD2DUPd32x2, true, false, false, OddDblSpc, 2, 2 ,false},
  228. { ARM::VLD2DUPq32OddPseudoWB_fixed, ARM::VLD2DUPd32x2wb_fixed, true, true, false, OddDblSpc, 2, 2 ,false},
  229. { ARM::VLD2DUPq32OddPseudoWB_register, ARM::VLD2DUPd32x2wb_register, true, true, true, OddDblSpc, 2, 2 ,false},
  230. { ARM::VLD2DUPq8EvenPseudo, ARM::VLD2DUPd8x2, true, false, false, EvenDblSpc, 2, 8 ,false},
  231. { ARM::VLD2DUPq8OddPseudo, ARM::VLD2DUPd8x2, true, false, false, OddDblSpc, 2, 8 ,false},
  232. { ARM::VLD2DUPq8OddPseudoWB_fixed, ARM::VLD2DUPd8x2wb_fixed, true, true, false, OddDblSpc, 2, 8 ,false},
  233. { ARM::VLD2DUPq8OddPseudoWB_register, ARM::VLD2DUPd8x2wb_register, true, true, true, OddDblSpc, 2, 8 ,false},
  234. { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true},
  235. { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true},
  236. { ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true},
  237. { ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true},
  238. { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true},
  239. { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true},
  240. { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true},
  241. { ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true},
  242. { ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true},
  243. { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true},
  244. { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false},
  245. { ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false},
  246. { ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false},
  247. { ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false},
  248. { ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false},
  249. { ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false},
  250. { ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false},
  251. { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false},
  252. { ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false},
  253. { ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true},
  254. { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true},
  255. { ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true},
  256. { ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true},
  257. { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true},
  258. { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true},
  259. { ARM::VLD3DUPq16EvenPseudo, ARM::VLD3DUPq16, true, false, false, EvenDblSpc, 3, 4 ,true},
  260. { ARM::VLD3DUPq16OddPseudo, ARM::VLD3DUPq16, true, false, false, OddDblSpc, 3, 4 ,true},
  261. { ARM::VLD3DUPq16OddPseudo_UPD, ARM::VLD3DUPq16_UPD, true, true, true, OddDblSpc, 3, 4 ,true},
  262. { ARM::VLD3DUPq32EvenPseudo, ARM::VLD3DUPq32, true, false, false, EvenDblSpc, 3, 2 ,true},
  263. { ARM::VLD3DUPq32OddPseudo, ARM::VLD3DUPq32, true, false, false, OddDblSpc, 3, 2 ,true},
  264. { ARM::VLD3DUPq32OddPseudo_UPD, ARM::VLD3DUPq32_UPD, true, true, true, OddDblSpc, 3, 2 ,true},
  265. { ARM::VLD3DUPq8EvenPseudo, ARM::VLD3DUPq8, true, false, false, EvenDblSpc, 3, 8 ,true},
  266. { ARM::VLD3DUPq8OddPseudo, ARM::VLD3DUPq8, true, false, false, OddDblSpc, 3, 8 ,true},
  267. { ARM::VLD3DUPq8OddPseudo_UPD, ARM::VLD3DUPq8_UPD, true, true, true, OddDblSpc, 3, 8 ,true},
  268. { ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true},
  269. { ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
  270. { ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true},
  271. { ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
  272. { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true},
  273. { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
  274. { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true},
  275. { ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
  276. { ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true},
  277. { ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
  278. { ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true},
  279. { ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true},
  280. { ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true},
  281. { ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true},
  282. { ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true},
  283. { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true},
  284. { ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true},
  285. { ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true},
  286. { ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true},
  287. { ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true},
  288. { ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true},
  289. { ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true},
  290. { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true},
  291. { ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true},
  292. { ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true},
  293. { ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true},
  294. { ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true},
  295. { ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true},
  296. { ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true},
  297. { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true},
  298. { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true},
  299. { ARM::VLD4DUPq16EvenPseudo, ARM::VLD4DUPq16, true, false, false, EvenDblSpc, 4, 4 ,true},
  300. { ARM::VLD4DUPq16OddPseudo, ARM::VLD4DUPq16, true, false, false, OddDblSpc, 4, 4 ,true},
  301. { ARM::VLD4DUPq16OddPseudo_UPD, ARM::VLD4DUPq16_UPD, true, true, true, OddDblSpc, 4, 4 ,true},
  302. { ARM::VLD4DUPq32EvenPseudo, ARM::VLD4DUPq32, true, false, false, EvenDblSpc, 4, 2 ,true},
  303. { ARM::VLD4DUPq32OddPseudo, ARM::VLD4DUPq32, true, false, false, OddDblSpc, 4, 2 ,true},
  304. { ARM::VLD4DUPq32OddPseudo_UPD, ARM::VLD4DUPq32_UPD, true, true, true, OddDblSpc, 4, 2 ,true},
  305. { ARM::VLD4DUPq8EvenPseudo, ARM::VLD4DUPq8, true, false, false, EvenDblSpc, 4, 8 ,true},
  306. { ARM::VLD4DUPq8OddPseudo, ARM::VLD4DUPq8, true, false, false, OddDblSpc, 4, 8 ,true},
  307. { ARM::VLD4DUPq8OddPseudo_UPD, ARM::VLD4DUPq8_UPD, true, true, true, OddDblSpc, 4, 8 ,true},
  308. { ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true},
  309. { ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
  310. { ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true},
  311. { ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
  312. { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true},
  313. { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
  314. { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true},
  315. { ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
  316. { ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true},
  317. { ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
  318. { ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true},
  319. { ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true},
  320. { ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true},
  321. { ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true},
  322. { ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true},
  323. { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true},
  324. { ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true},
  325. { ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true},
  326. { ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true},
  327. { ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true},
  328. { ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true},
  329. { ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true},
  330. { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true},
  331. { ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true},
  332. { ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true},
  333. { ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true},
  334. { ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true},
  335. { ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true},
  336. { ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true},
  337. { ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true},
  338. { ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true},
  339. { ARM::VST1d16QPseudo, ARM::VST1d16Q, false, false, false, SingleSpc, 4, 4 ,false},
  340. { ARM::VST1d16QPseudoWB_fixed, ARM::VST1d16Qwb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
  341. { ARM::VST1d16QPseudoWB_register, ARM::VST1d16Qwb_register, false, true, true, SingleSpc, 4, 4 ,false},
  342. { ARM::VST1d16TPseudo, ARM::VST1d16T, false, false, false, SingleSpc, 3, 4 ,false},
  343. { ARM::VST1d16TPseudoWB_fixed, ARM::VST1d16Twb_fixed, false, true, false, SingleSpc, 3, 4 ,false},
  344. { ARM::VST1d16TPseudoWB_register, ARM::VST1d16Twb_register, false, true, true, SingleSpc, 3, 4 ,false},
  345. { ARM::VST1d32QPseudo, ARM::VST1d32Q, false, false, false, SingleSpc, 4, 2 ,false},
  346. { ARM::VST1d32QPseudoWB_fixed, ARM::VST1d32Qwb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
  347. { ARM::VST1d32QPseudoWB_register, ARM::VST1d32Qwb_register, false, true, true, SingleSpc, 4, 2 ,false},
  348. { ARM::VST1d32TPseudo, ARM::VST1d32T, false, false, false, SingleSpc, 3, 2 ,false},
  349. { ARM::VST1d32TPseudoWB_fixed, ARM::VST1d32Twb_fixed, false, true, false, SingleSpc, 3, 2 ,false},
  350. { ARM::VST1d32TPseudoWB_register, ARM::VST1d32Twb_register, false, true, true, SingleSpc, 3, 2 ,false},
  351. { ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false},
  352. { ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false},
  353. { ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false},
  354. { ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false},
  355. { ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false},
  356. { ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false},
  357. { ARM::VST1d8QPseudo, ARM::VST1d8Q, false, false, false, SingleSpc, 4, 8 ,false},
  358. { ARM::VST1d8QPseudoWB_fixed, ARM::VST1d8Qwb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
  359. { ARM::VST1d8QPseudoWB_register, ARM::VST1d8Qwb_register, false, true, true, SingleSpc, 4, 8 ,false},
  360. { ARM::VST1d8TPseudo, ARM::VST1d8T, false, false, false, SingleSpc, 3, 8 ,false},
  361. { ARM::VST1d8TPseudoWB_fixed, ARM::VST1d8Twb_fixed, false, true, false, SingleSpc, 3, 8 ,false},
  362. { ARM::VST1d8TPseudoWB_register, ARM::VST1d8Twb_register, false, true, true, SingleSpc, 3, 8 ,false},
  363. { ARM::VST1q16HighQPseudo, ARM::VST1d16Q, false, false, false, SingleHighQSpc, 4, 4 ,false},
  364. { ARM::VST1q16HighQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false},
  365. { ARM::VST1q16HighTPseudo, ARM::VST1d16T, false, false, false, SingleHighTSpc, 3, 4 ,false},
  366. { ARM::VST1q16HighTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleHighTSpc, 3, 4 ,false},
  367. { ARM::VST1q16LowQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleLowSpc, 4, 4 ,false},
  368. { ARM::VST1q16LowTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleLowSpc, 3, 4 ,false},
  369. { ARM::VST1q32HighQPseudo, ARM::VST1d32Q, false, false, false, SingleHighQSpc, 4, 2 ,false},
  370. { ARM::VST1q32HighQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false},
  371. { ARM::VST1q32HighTPseudo, ARM::VST1d32T, false, false, false, SingleHighTSpc, 3, 2 ,false},
  372. { ARM::VST1q32HighTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleHighTSpc, 3, 2 ,false},
  373. { ARM::VST1q32LowQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleLowSpc, 4, 2 ,false},
  374. { ARM::VST1q32LowTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleLowSpc, 3, 2 ,false},
  375. { ARM::VST1q64HighQPseudo, ARM::VST1d64Q, false, false, false, SingleHighQSpc, 4, 1 ,false},
  376. { ARM::VST1q64HighQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false},
  377. { ARM::VST1q64HighTPseudo, ARM::VST1d64T, false, false, false, SingleHighTSpc, 3, 1 ,false},
  378. { ARM::VST1q64HighTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleHighTSpc, 3, 1 ,false},
  379. { ARM::VST1q64LowQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleLowSpc, 4, 1 ,false},
  380. { ARM::VST1q64LowTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleLowSpc, 3, 1 ,false},
  381. { ARM::VST1q8HighQPseudo, ARM::VST1d8Q, false, false, false, SingleHighQSpc, 4, 8 ,false},
  382. { ARM::VST1q8HighQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false},
  383. { ARM::VST1q8HighTPseudo, ARM::VST1d8T, false, false, false, SingleHighTSpc, 3, 8 ,false},
  384. { ARM::VST1q8HighTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleHighTSpc, 3, 8 ,false},
  385. { ARM::VST1q8LowQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleLowSpc, 4, 8 ,false},
  386. { ARM::VST1q8LowTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleLowSpc, 3, 8 ,false},
  387. { ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true},
  388. { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true},
  389. { ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true},
  390. { ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true},
  391. { ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true},
  392. { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true},
  393. { ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true},
  394. { ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true},
  395. { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true},
  396. { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true},
  397. { ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false},
  398. { ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false},
  399. { ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false},
  400. { ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false},
  401. { ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false},
  402. { ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false},
  403. { ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false},
  404. { ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false},
  405. { ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false},
  406. { ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true},
  407. { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
  408. { ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true},
  409. { ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
  410. { ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true},
  411. { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
  412. { ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true},
  413. { ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true},
  414. { ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true},
  415. { ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true},
  416. { ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true},
  417. { ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true},
  418. { ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true},
  419. { ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true},
  420. { ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true},
  421. { ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true},
  422. { ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true},
  423. { ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true},
  424. { ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true},
  425. { ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true},
  426. { ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true},
  427. { ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true},
  428. { ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true},
  429. { ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true},
  430. { ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true},
  431. { ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true},
  432. { ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
  433. { ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true},
  434. { ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
  435. { ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true},
  436. { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
  437. { ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true},
  438. { ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true},
  439. { ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true},
  440. { ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true},
  441. { ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true},
  442. { ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true},
  443. { ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true},
  444. { ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true},
  445. { ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true},
  446. { ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true},
  447. { ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true},
  448. { ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true},
  449. { ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true},
  450. { ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true},
  451. { ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true},
  452. { ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true},
  453. { ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true},
  454. { ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true},
  455. { ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true}
  456. };
  457. /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
  458. /// load or store pseudo instruction.
  459. static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
  460. #ifndef NDEBUG
  461. // Make sure the table is sorted.
  462. static std::atomic<bool> TableChecked(false);
  463. if (!TableChecked.load(std::memory_order_relaxed)) {
  464. assert(llvm::is_sorted(NEONLdStTable) && "NEONLdStTable is not sorted!");
  465. TableChecked.store(true, std::memory_order_relaxed);
  466. }
  467. #endif
  468. auto I = llvm::lower_bound(NEONLdStTable, Opcode);
  469. if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode)
  470. return I;
  471. return nullptr;
  472. }
  473. /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
  474. /// corresponding to the specified register spacing. Not all of the results
  475. /// are necessarily valid, e.g., a Q register only has 2 D subregisters.
  476. static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
  477. const TargetRegisterInfo *TRI, unsigned &D0,
  478. unsigned &D1, unsigned &D2, unsigned &D3) {
  479. if (RegSpc == SingleSpc || RegSpc == SingleLowSpc) {
  480. D0 = TRI->getSubReg(Reg, ARM::dsub_0);
  481. D1 = TRI->getSubReg(Reg, ARM::dsub_1);
  482. D2 = TRI->getSubReg(Reg, ARM::dsub_2);
  483. D3 = TRI->getSubReg(Reg, ARM::dsub_3);
  484. } else if (RegSpc == SingleHighQSpc) {
  485. D0 = TRI->getSubReg(Reg, ARM::dsub_4);
  486. D1 = TRI->getSubReg(Reg, ARM::dsub_5);
  487. D2 = TRI->getSubReg(Reg, ARM::dsub_6);
  488. D3 = TRI->getSubReg(Reg, ARM::dsub_7);
  489. } else if (RegSpc == SingleHighTSpc) {
  490. D0 = TRI->getSubReg(Reg, ARM::dsub_3);
  491. D1 = TRI->getSubReg(Reg, ARM::dsub_4);
  492. D2 = TRI->getSubReg(Reg, ARM::dsub_5);
  493. D3 = TRI->getSubReg(Reg, ARM::dsub_6);
  494. } else if (RegSpc == EvenDblSpc) {
  495. D0 = TRI->getSubReg(Reg, ARM::dsub_0);
  496. D1 = TRI->getSubReg(Reg, ARM::dsub_2);
  497. D2 = TRI->getSubReg(Reg, ARM::dsub_4);
  498. D3 = TRI->getSubReg(Reg, ARM::dsub_6);
  499. } else {
  500. assert(RegSpc == OddDblSpc && "unknown register spacing");
  501. D0 = TRI->getSubReg(Reg, ARM::dsub_1);
  502. D1 = TRI->getSubReg(Reg, ARM::dsub_3);
  503. D2 = TRI->getSubReg(Reg, ARM::dsub_5);
  504. D3 = TRI->getSubReg(Reg, ARM::dsub_7);
  505. }
  506. }
  507. /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
  508. /// operands to real VLD instructions with D register operands.
  509. void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
  510. MachineInstr &MI = *MBBI;
  511. MachineBasicBlock &MBB = *MI.getParent();
  512. LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
  513. const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
  514. assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
  515. NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
  516. unsigned NumRegs = TableEntry->NumRegs;
  517. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
  518. TII->get(TableEntry->RealOpc));
  519. unsigned OpIdx = 0;
  520. bool DstIsDead = MI.getOperand(OpIdx).isDead();
  521. Register DstReg = MI.getOperand(OpIdx++).getReg();
  522. bool IsVLD2DUP = TableEntry->RealOpc == ARM::VLD2DUPd8x2 ||
  523. TableEntry->RealOpc == ARM::VLD2DUPd16x2 ||
  524. TableEntry->RealOpc == ARM::VLD2DUPd32x2 ||
  525. TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_fixed ||
  526. TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_fixed ||
  527. TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_fixed ||
  528. TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_register ||
  529. TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_register ||
  530. TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_register;
  531. if (IsVLD2DUP) {
  532. unsigned SubRegIndex;
  533. if (RegSpc == EvenDblSpc) {
  534. SubRegIndex = ARM::dsub_0;
  535. } else {
  536. assert(RegSpc == OddDblSpc && "Unexpected spacing!");
  537. SubRegIndex = ARM::dsub_1;
  538. }
  539. Register SubReg = TRI->getSubReg(DstReg, SubRegIndex);
  540. unsigned DstRegPair = TRI->getMatchingSuperReg(SubReg, ARM::dsub_0,
  541. &ARM::DPairSpcRegClass);
  542. MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead));
  543. } else {
  544. unsigned D0, D1, D2, D3;
  545. GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
  546. MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
  547. if (NumRegs > 1 && TableEntry->copyAllListRegs)
  548. MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
  549. if (NumRegs > 2 && TableEntry->copyAllListRegs)
  550. MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
  551. if (NumRegs > 3 && TableEntry->copyAllListRegs)
  552. MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
  553. }
  554. if (TableEntry->isUpdating)
  555. MIB.add(MI.getOperand(OpIdx++));
  556. // Copy the addrmode6 operands.
  557. MIB.add(MI.getOperand(OpIdx++));
  558. MIB.add(MI.getOperand(OpIdx++));
  559. // Copy the am6offset operand.
  560. if (TableEntry->hasWritebackOperand) {
  561. // TODO: The writing-back pseudo instructions we translate here are all
  562. // defined to take am6offset nodes that are capable to represent both fixed
  563. // and register forms. Some real instructions, however, do not rely on
  564. // am6offset and have separate definitions for such forms. When this is the
  565. // case, fixed forms do not take any offset nodes, so here we skip them for
  566. // such instructions. Once all real and pseudo writing-back instructions are
  567. // rewritten without use of am6offset nodes, this code will go away.
  568. const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
  569. if (TableEntry->RealOpc == ARM::VLD1d8Qwb_fixed ||
  570. TableEntry->RealOpc == ARM::VLD1d16Qwb_fixed ||
  571. TableEntry->RealOpc == ARM::VLD1d32Qwb_fixed ||
  572. TableEntry->RealOpc == ARM::VLD1d64Qwb_fixed ||
  573. TableEntry->RealOpc == ARM::VLD1d8Twb_fixed ||
  574. TableEntry->RealOpc == ARM::VLD1d16Twb_fixed ||
  575. TableEntry->RealOpc == ARM::VLD1d32Twb_fixed ||
  576. TableEntry->RealOpc == ARM::VLD1d64Twb_fixed ||
  577. TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_fixed ||
  578. TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_fixed ||
  579. TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_fixed) {
  580. assert(AM6Offset.getReg() == 0 &&
  581. "A fixed writing-back pseudo instruction provides an offset "
  582. "register!");
  583. } else {
  584. MIB.add(AM6Offset);
  585. }
  586. }
  587. // For an instruction writing double-spaced subregs, the pseudo instruction
  588. // has an extra operand that is a use of the super-register. Record the
  589. // operand index and skip over it.
  590. unsigned SrcOpIdx = 0;
  591. if (!IsVLD2DUP) {
  592. if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc ||
  593. RegSpc == SingleLowSpc || RegSpc == SingleHighQSpc ||
  594. RegSpc == SingleHighTSpc)
  595. SrcOpIdx = OpIdx++;
  596. }
  597. // Copy the predicate operands.
  598. MIB.add(MI.getOperand(OpIdx++));
  599. MIB.add(MI.getOperand(OpIdx++));
  600. // Copy the super-register source operand used for double-spaced subregs over
  601. // to the new instruction as an implicit operand.
  602. if (SrcOpIdx != 0) {
  603. MachineOperand MO = MI.getOperand(SrcOpIdx);
  604. MO.setImplicit(true);
  605. MIB.add(MO);
  606. }
  607. // Add an implicit def for the super-register.
  608. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
  609. TransferImpOps(MI, MIB, MIB);
  610. // Transfer memoperands.
  611. MIB.cloneMemRefs(MI);
  612. MI.eraseFromParent();
  613. LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
  614. }
  615. /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
  616. /// operands to real VST instructions with D register operands.
  617. void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
  618. MachineInstr &MI = *MBBI;
  619. MachineBasicBlock &MBB = *MI.getParent();
  620. LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
  621. const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
  622. assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
  623. NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
  624. unsigned NumRegs = TableEntry->NumRegs;
  625. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
  626. TII->get(TableEntry->RealOpc));
  627. unsigned OpIdx = 0;
  628. if (TableEntry->isUpdating)
  629. MIB.add(MI.getOperand(OpIdx++));
  630. // Copy the addrmode6 operands.
  631. MIB.add(MI.getOperand(OpIdx++));
  632. MIB.add(MI.getOperand(OpIdx++));
  633. if (TableEntry->hasWritebackOperand) {
  634. // TODO: The writing-back pseudo instructions we translate here are all
  635. // defined to take am6offset nodes that are capable to represent both fixed
  636. // and register forms. Some real instructions, however, do not rely on
  637. // am6offset and have separate definitions for such forms. When this is the
  638. // case, fixed forms do not take any offset nodes, so here we skip them for
  639. // such instructions. Once all real and pseudo writing-back instructions are
  640. // rewritten without use of am6offset nodes, this code will go away.
  641. const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
  642. if (TableEntry->RealOpc == ARM::VST1d8Qwb_fixed ||
  643. TableEntry->RealOpc == ARM::VST1d16Qwb_fixed ||
  644. TableEntry->RealOpc == ARM::VST1d32Qwb_fixed ||
  645. TableEntry->RealOpc == ARM::VST1d64Qwb_fixed ||
  646. TableEntry->RealOpc == ARM::VST1d8Twb_fixed ||
  647. TableEntry->RealOpc == ARM::VST1d16Twb_fixed ||
  648. TableEntry->RealOpc == ARM::VST1d32Twb_fixed ||
  649. TableEntry->RealOpc == ARM::VST1d64Twb_fixed) {
  650. assert(AM6Offset.getReg() == 0 &&
  651. "A fixed writing-back pseudo instruction provides an offset "
  652. "register!");
  653. } else {
  654. MIB.add(AM6Offset);
  655. }
  656. }
  657. bool SrcIsKill = MI.getOperand(OpIdx).isKill();
  658. bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
  659. Register SrcReg = MI.getOperand(OpIdx++).getReg();
  660. unsigned D0, D1, D2, D3;
  661. GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
  662. MIB.addReg(D0, getUndefRegState(SrcIsUndef));
  663. if (NumRegs > 1 && TableEntry->copyAllListRegs)
  664. MIB.addReg(D1, getUndefRegState(SrcIsUndef));
  665. if (NumRegs > 2 && TableEntry->copyAllListRegs)
  666. MIB.addReg(D2, getUndefRegState(SrcIsUndef));
  667. if (NumRegs > 3 && TableEntry->copyAllListRegs)
  668. MIB.addReg(D3, getUndefRegState(SrcIsUndef));
  669. // Copy the predicate operands.
  670. MIB.add(MI.getOperand(OpIdx++));
  671. MIB.add(MI.getOperand(OpIdx++));
  672. if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
  673. MIB->addRegisterKilled(SrcReg, TRI, true);
  674. else if (!SrcIsUndef)
  675. MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg.
  676. TransferImpOps(MI, MIB, MIB);
  677. // Transfer memoperands.
  678. MIB.cloneMemRefs(MI);
  679. MI.eraseFromParent();
  680. LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
  681. }
  682. /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
  683. /// register operands to real instructions with D register operands.
  684. void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
  685. MachineInstr &MI = *MBBI;
  686. MachineBasicBlock &MBB = *MI.getParent();
  687. LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
  688. const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
  689. assert(TableEntry && "NEONLdStTable lookup failed");
  690. NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
  691. unsigned NumRegs = TableEntry->NumRegs;
  692. unsigned RegElts = TableEntry->RegElts;
  693. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
  694. TII->get(TableEntry->RealOpc));
  695. unsigned OpIdx = 0;
  696. // The lane operand is always the 3rd from last operand, before the 2
  697. // predicate operands.
  698. unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
  699. // Adjust the lane and spacing as needed for Q registers.
  700. assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
  701. if (RegSpc == EvenDblSpc && Lane >= RegElts) {
  702. RegSpc = OddDblSpc;
  703. Lane -= RegElts;
  704. }
  705. assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
  706. unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
  707. unsigned DstReg = 0;
  708. bool DstIsDead = false;
  709. if (TableEntry->IsLoad) {
  710. DstIsDead = MI.getOperand(OpIdx).isDead();
  711. DstReg = MI.getOperand(OpIdx++).getReg();
  712. GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
  713. MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
  714. if (NumRegs > 1)
  715. MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
  716. if (NumRegs > 2)
  717. MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
  718. if (NumRegs > 3)
  719. MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
  720. }
  721. if (TableEntry->isUpdating)
  722. MIB.add(MI.getOperand(OpIdx++));
  723. // Copy the addrmode6 operands.
  724. MIB.add(MI.getOperand(OpIdx++));
  725. MIB.add(MI.getOperand(OpIdx++));
  726. // Copy the am6offset operand.
  727. if (TableEntry->hasWritebackOperand)
  728. MIB.add(MI.getOperand(OpIdx++));
  729. // Grab the super-register source.
  730. MachineOperand MO = MI.getOperand(OpIdx++);
  731. if (!TableEntry->IsLoad)
  732. GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
  733. // Add the subregs as sources of the new instruction.
  734. unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
  735. getKillRegState(MO.isKill()));
  736. MIB.addReg(D0, SrcFlags);
  737. if (NumRegs > 1)
  738. MIB.addReg(D1, SrcFlags);
  739. if (NumRegs > 2)
  740. MIB.addReg(D2, SrcFlags);
  741. if (NumRegs > 3)
  742. MIB.addReg(D3, SrcFlags);
  743. // Add the lane number operand.
  744. MIB.addImm(Lane);
  745. OpIdx += 1;
  746. // Copy the predicate operands.
  747. MIB.add(MI.getOperand(OpIdx++));
  748. MIB.add(MI.getOperand(OpIdx++));
  749. // Copy the super-register source to be an implicit source.
  750. MO.setImplicit(true);
  751. MIB.add(MO);
  752. if (TableEntry->IsLoad)
  753. // Add an implicit def for the super-register.
  754. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
  755. TransferImpOps(MI, MIB, MIB);
  756. // Transfer memoperands.
  757. MIB.cloneMemRefs(MI);
  758. MI.eraseFromParent();
  759. }
  760. /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
  761. /// register operands to real instructions with D register operands.
  762. void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
  763. unsigned Opc, bool IsExt) {
  764. MachineInstr &MI = *MBBI;
  765. MachineBasicBlock &MBB = *MI.getParent();
  766. LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
  767. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
  768. unsigned OpIdx = 0;
  769. // Transfer the destination register operand.
  770. MIB.add(MI.getOperand(OpIdx++));
  771. if (IsExt) {
  772. MachineOperand VdSrc(MI.getOperand(OpIdx++));
  773. MIB.add(VdSrc);
  774. }
  775. bool SrcIsKill = MI.getOperand(OpIdx).isKill();
  776. Register SrcReg = MI.getOperand(OpIdx++).getReg();
  777. unsigned D0, D1, D2, D3;
  778. GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
  779. MIB.addReg(D0);
  780. // Copy the other source register operand.
  781. MachineOperand VmSrc(MI.getOperand(OpIdx++));
  782. MIB.add(VmSrc);
  783. // Copy the predicate operands.
  784. MIB.add(MI.getOperand(OpIdx++));
  785. MIB.add(MI.getOperand(OpIdx++));
  786. // Add an implicit kill and use for the super-reg.
  787. MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
  788. TransferImpOps(MI, MIB, MIB);
  789. MI.eraseFromParent();
  790. LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump(););
  791. }
  792. void ARMExpandPseudo::ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI) {
  793. MachineInstr &MI = *MBBI;
  794. MachineBasicBlock &MBB = *MI.getParent();
  795. unsigned NewOpc =
  796. MI.getOpcode() == ARM::MQQPRStore || MI.getOpcode() == ARM::MQQQQPRStore
  797. ? ARM::VSTMDIA
  798. : ARM::VLDMDIA;
  799. MachineInstrBuilder MIB =
  800. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
  801. unsigned Flags = getKillRegState(MI.getOperand(0).isKill()) |
  802. getDefRegState(MI.getOperand(0).isDef());
  803. Register SrcReg = MI.getOperand(0).getReg();
  804. // Copy the destination register.
  805. MIB.add(MI.getOperand(1));
  806. MIB.add(predOps(ARMCC::AL));
  807. MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_0), Flags);
  808. MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_1), Flags);
  809. MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_2), Flags);
  810. MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_3), Flags);
  811. if (MI.getOpcode() == ARM::MQQQQPRStore ||
  812. MI.getOpcode() == ARM::MQQQQPRLoad) {
  813. MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_4), Flags);
  814. MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_5), Flags);
  815. MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_6), Flags);
  816. MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_7), Flags);
  817. }
  818. if (NewOpc == ARM::VSTMDIA)
  819. MIB.addReg(SrcReg, RegState::Implicit);
  820. TransferImpOps(MI, MIB, MIB);
  821. MIB.cloneMemRefs(MI);
  822. MI.eraseFromParent();
  823. }
  824. static bool IsAnAddressOperand(const MachineOperand &MO) {
  825. // This check is overly conservative. Unless we are certain that the machine
  826. // operand is not a symbol reference, we return that it is a symbol reference.
  827. // This is important as the load pair may not be split up Windows.
  828. switch (MO.getType()) {
  829. case MachineOperand::MO_Register:
  830. case MachineOperand::MO_Immediate:
  831. case MachineOperand::MO_CImmediate:
  832. case MachineOperand::MO_FPImmediate:
  833. case MachineOperand::MO_ShuffleMask:
  834. return false;
  835. case MachineOperand::MO_MachineBasicBlock:
  836. return true;
  837. case MachineOperand::MO_FrameIndex:
  838. return false;
  839. case MachineOperand::MO_ConstantPoolIndex:
  840. case MachineOperand::MO_TargetIndex:
  841. case MachineOperand::MO_JumpTableIndex:
  842. case MachineOperand::MO_ExternalSymbol:
  843. case MachineOperand::MO_GlobalAddress:
  844. case MachineOperand::MO_BlockAddress:
  845. return true;
  846. case MachineOperand::MO_RegisterMask:
  847. case MachineOperand::MO_RegisterLiveOut:
  848. return false;
  849. case MachineOperand::MO_Metadata:
  850. case MachineOperand::MO_MCSymbol:
  851. return true;
  852. case MachineOperand::MO_DbgInstrRef:
  853. case MachineOperand::MO_CFIIndex:
  854. return false;
  855. case MachineOperand::MO_IntrinsicID:
  856. case MachineOperand::MO_Predicate:
  857. llvm_unreachable("should not exist post-isel");
  858. }
  859. llvm_unreachable("unhandled machine operand type");
  860. }
  861. static MachineOperand makeImplicit(const MachineOperand &MO) {
  862. MachineOperand NewMO = MO;
  863. NewMO.setImplicit();
  864. return NewMO;
  865. }
  866. void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
  867. MachineBasicBlock::iterator &MBBI) {
  868. MachineInstr &MI = *MBBI;
  869. unsigned Opcode = MI.getOpcode();
  870. Register PredReg;
  871. ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
  872. Register DstReg = MI.getOperand(0).getReg();
  873. bool DstIsDead = MI.getOperand(0).isDead();
  874. bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
  875. const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
  876. bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO);
  877. MachineInstrBuilder LO16, HI16;
  878. LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
  879. if (!STI->hasV6T2Ops() &&
  880. (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
  881. // FIXME Windows CE supports older ARM CPUs
  882. assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+");
  883. assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
  884. unsigned ImmVal = (unsigned)MO.getImm();
  885. unsigned SOImmValV1 = 0, SOImmValV2 = 0;
  886. if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr.
  887. LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
  888. HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
  889. .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
  890. .addReg(DstReg);
  891. SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
  892. SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
  893. } else { // Expand into a mvn + sub.
  894. LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg);
  895. HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri))
  896. .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
  897. .addReg(DstReg);
  898. SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal);
  899. SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal);
  900. SOImmValV1 = ~(-SOImmValV1);
  901. }
  902. unsigned MIFlags = MI.getFlags();
  903. LO16 = LO16.addImm(SOImmValV1);
  904. HI16 = HI16.addImm(SOImmValV2);
  905. LO16.cloneMemRefs(MI);
  906. HI16.cloneMemRefs(MI);
  907. LO16.setMIFlags(MIFlags);
  908. HI16.setMIFlags(MIFlags);
  909. LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
  910. HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
  911. if (isCC)
  912. LO16.add(makeImplicit(MI.getOperand(1)));
  913. TransferImpOps(MI, LO16, HI16);
  914. MI.eraseFromParent();
  915. return;
  916. }
  917. unsigned LO16Opc = 0;
  918. unsigned HI16Opc = 0;
  919. unsigned MIFlags = MI.getFlags();
  920. if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
  921. LO16Opc = ARM::t2MOVi16;
  922. HI16Opc = ARM::t2MOVTi16;
  923. } else {
  924. LO16Opc = ARM::MOVi16;
  925. HI16Opc = ARM::MOVTi16;
  926. }
  927. LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
  928. HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
  929. .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
  930. .addReg(DstReg);
  931. LO16.setMIFlags(MIFlags);
  932. HI16.setMIFlags(MIFlags);
  933. switch (MO.getType()) {
  934. case MachineOperand::MO_Immediate: {
  935. unsigned Imm = MO.getImm();
  936. unsigned Lo16 = Imm & 0xffff;
  937. unsigned Hi16 = (Imm >> 16) & 0xffff;
  938. LO16 = LO16.addImm(Lo16);
  939. HI16 = HI16.addImm(Hi16);
  940. break;
  941. }
  942. case MachineOperand::MO_ExternalSymbol: {
  943. const char *ES = MO.getSymbolName();
  944. unsigned TF = MO.getTargetFlags();
  945. LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16);
  946. HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16);
  947. break;
  948. }
  949. default: {
  950. const GlobalValue *GV = MO.getGlobal();
  951. unsigned TF = MO.getTargetFlags();
  952. LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
  953. HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
  954. break;
  955. }
  956. }
  957. LO16.cloneMemRefs(MI);
  958. HI16.cloneMemRefs(MI);
  959. LO16.addImm(Pred).addReg(PredReg);
  960. HI16.addImm(Pred).addReg(PredReg);
  961. if (RequiresBundling)
  962. finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator());
  963. if (isCC)
  964. LO16.add(makeImplicit(MI.getOperand(1)));
  965. TransferImpOps(MI, LO16, HI16);
  966. MI.eraseFromParent();
  967. LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump(););
  968. LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump(););
  969. }
  970. // The size of the area, accessed by that VLSTM/VLLDM
  971. // S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad)
  972. static const int CMSE_FP_SAVE_SIZE = 136;
  973. static void determineGPRegsToClear(const MachineInstr &MI,
  974. const std::initializer_list<unsigned> &Regs,
  975. SmallVectorImpl<unsigned> &ClearRegs) {
  976. SmallVector<unsigned, 4> OpRegs;
  977. for (const MachineOperand &Op : MI.operands()) {
  978. if (!Op.isReg() || !Op.isUse())
  979. continue;
  980. OpRegs.push_back(Op.getReg());
  981. }
  982. llvm::sort(OpRegs);
  983. std::set_difference(Regs.begin(), Regs.end(), OpRegs.begin(), OpRegs.end(),
  984. std::back_inserter(ClearRegs));
  985. }
  986. void ARMExpandPseudo::CMSEClearGPRegs(
  987. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
  988. const DebugLoc &DL, const SmallVectorImpl<unsigned> &ClearRegs,
  989. unsigned ClobberReg) {
  990. if (STI->hasV8_1MMainlineOps()) {
  991. // Clear the registers using the CLRM instruction.
  992. MachineInstrBuilder CLRM =
  993. BuildMI(MBB, MBBI, DL, TII->get(ARM::t2CLRM)).add(predOps(ARMCC::AL));
  994. for (unsigned R : ClearRegs)
  995. CLRM.addReg(R, RegState::Define);
  996. CLRM.addReg(ARM::APSR, RegState::Define);
  997. CLRM.addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
  998. } else {
  999. // Clear the registers and flags by copying ClobberReg into them.
  1000. // (Baseline can't do a high register clear in one instruction).
  1001. for (unsigned Reg : ClearRegs) {
  1002. if (Reg == ClobberReg)
  1003. continue;
  1004. BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVr), Reg)
  1005. .addReg(ClobberReg)
  1006. .add(predOps(ARMCC::AL));
  1007. }
  1008. BuildMI(MBB, MBBI, DL, TII->get(ARM::t2MSR_M))
  1009. .addImm(STI->hasDSP() ? 0xc00 : 0x800)
  1010. .addReg(ClobberReg)
  1011. .add(predOps(ARMCC::AL));
  1012. }
  1013. }
  1014. // Find which FP registers need to be cleared. The parameter `ClearRegs` is
  1015. // initialised with all elements set to true, and this function resets all the
  1016. // bits, which correspond to register uses. Returns true if any floating point
  1017. // register is defined, false otherwise.
  1018. static bool determineFPRegsToClear(const MachineInstr &MI,
  1019. BitVector &ClearRegs) {
  1020. bool DefFP = false;
  1021. for (const MachineOperand &Op : MI.operands()) {
  1022. if (!Op.isReg())
  1023. continue;
  1024. Register Reg = Op.getReg();
  1025. if (Op.isDef()) {
  1026. if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
  1027. (Reg >= ARM::D0 && Reg <= ARM::D15) ||
  1028. (Reg >= ARM::S0 && Reg <= ARM::S31))
  1029. DefFP = true;
  1030. continue;
  1031. }
  1032. if (Reg >= ARM::Q0 && Reg <= ARM::Q7) {
  1033. int R = Reg - ARM::Q0;
  1034. ClearRegs.reset(R * 4, (R + 1) * 4);
  1035. } else if (Reg >= ARM::D0 && Reg <= ARM::D15) {
  1036. int R = Reg - ARM::D0;
  1037. ClearRegs.reset(R * 2, (R + 1) * 2);
  1038. } else if (Reg >= ARM::S0 && Reg <= ARM::S31) {
  1039. ClearRegs[Reg - ARM::S0] = false;
  1040. }
  1041. }
  1042. return DefFP;
  1043. }
  1044. MachineBasicBlock &
  1045. ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock &MBB,
  1046. MachineBasicBlock::iterator MBBI) {
  1047. BitVector ClearRegs(16, true);
  1048. (void)determineFPRegsToClear(*MBBI, ClearRegs);
  1049. if (STI->hasV8_1MMainlineOps())
  1050. return CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
  1051. else
  1052. return CMSEClearFPRegsV8(MBB, MBBI, ClearRegs);
  1053. }
  1054. // Clear the FP registers for v8.0-M, by copying over the content
  1055. // of LR. Uses R12 as a scratch register.
  1056. MachineBasicBlock &
  1057. ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock &MBB,
  1058. MachineBasicBlock::iterator MBBI,
  1059. const BitVector &ClearRegs) {
  1060. if (!STI->hasFPRegs())
  1061. return MBB;
  1062. auto &RetI = *MBBI;
  1063. const DebugLoc &DL = RetI.getDebugLoc();
  1064. // If optimising for minimum size, clear FP registers unconditionally.
  1065. // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and
  1066. // don't clear them if they belong to the non-secure state.
  1067. MachineBasicBlock *ClearBB, *DoneBB;
  1068. if (STI->hasMinSize()) {
  1069. ClearBB = DoneBB = &MBB;
  1070. } else {
  1071. MachineFunction *MF = MBB.getParent();
  1072. ClearBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  1073. DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  1074. MF->insert(++MBB.getIterator(), ClearBB);
  1075. MF->insert(++ClearBB->getIterator(), DoneBB);
  1076. DoneBB->splice(DoneBB->end(), &MBB, MBBI, MBB.end());
  1077. DoneBB->transferSuccessors(&MBB);
  1078. MBB.addSuccessor(ClearBB);
  1079. MBB.addSuccessor(DoneBB);
  1080. ClearBB->addSuccessor(DoneBB);
  1081. // At the new basic blocks we need to have live-in the registers, used
  1082. // for the return value as well as LR, used to clear registers.
  1083. for (const MachineOperand &Op : RetI.operands()) {
  1084. if (!Op.isReg())
  1085. continue;
  1086. Register Reg = Op.getReg();
  1087. if (Reg == ARM::NoRegister || Reg == ARM::LR)
  1088. continue;
  1089. assert(Reg.isPhysical() && "Unallocated register");
  1090. ClearBB->addLiveIn(Reg);
  1091. DoneBB->addLiveIn(Reg);
  1092. }
  1093. ClearBB->addLiveIn(ARM::LR);
  1094. DoneBB->addLiveIn(ARM::LR);
  1095. // Read the CONTROL register.
  1096. BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2MRS_M), ARM::R12)
  1097. .addImm(20)
  1098. .add(predOps(ARMCC::AL));
  1099. // Check bit 3 (SFPA).
  1100. BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2TSTri))
  1101. .addReg(ARM::R12)
  1102. .addImm(8)
  1103. .add(predOps(ARMCC::AL));
  1104. // If SFPA is clear, jump over ClearBB to DoneBB.
  1105. BuildMI(MBB, MBB.end(), DL, TII->get(ARM::tBcc))
  1106. .addMBB(DoneBB)
  1107. .addImm(ARMCC::EQ)
  1108. .addReg(ARM::CPSR, RegState::Kill);
  1109. }
  1110. // Emit the clearing sequence
  1111. for (unsigned D = 0; D < 8; D++) {
  1112. // Attempt to clear as double
  1113. if (ClearRegs[D * 2 + 0] && ClearRegs[D * 2 + 1]) {
  1114. unsigned Reg = ARM::D0 + D;
  1115. BuildMI(ClearBB, DL, TII->get(ARM::VMOVDRR), Reg)
  1116. .addReg(ARM::LR)
  1117. .addReg(ARM::LR)
  1118. .add(predOps(ARMCC::AL));
  1119. } else {
  1120. // Clear first part as single
  1121. if (ClearRegs[D * 2 + 0]) {
  1122. unsigned Reg = ARM::S0 + D * 2;
  1123. BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
  1124. .addReg(ARM::LR)
  1125. .add(predOps(ARMCC::AL));
  1126. }
  1127. // Clear second part as single
  1128. if (ClearRegs[D * 2 + 1]) {
  1129. unsigned Reg = ARM::S0 + D * 2 + 1;
  1130. BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
  1131. .addReg(ARM::LR)
  1132. .add(predOps(ARMCC::AL));
  1133. }
  1134. }
  1135. }
  1136. // Clear FPSCR bits 0-4, 7, 28-31
  1137. // The other bits are program global according to the AAPCS
  1138. BuildMI(ClearBB, DL, TII->get(ARM::VMRS), ARM::R12)
  1139. .add(predOps(ARMCC::AL));
  1140. BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
  1141. .addReg(ARM::R12)
  1142. .addImm(0x0000009F)
  1143. .add(predOps(ARMCC::AL))
  1144. .add(condCodeOp());
  1145. BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
  1146. .addReg(ARM::R12)
  1147. .addImm(0xF0000000)
  1148. .add(predOps(ARMCC::AL))
  1149. .add(condCodeOp());
  1150. BuildMI(ClearBB, DL, TII->get(ARM::VMSR))
  1151. .addReg(ARM::R12)
  1152. .add(predOps(ARMCC::AL));
  1153. return *DoneBB;
  1154. }
  1155. MachineBasicBlock &
  1156. ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB,
  1157. MachineBasicBlock::iterator MBBI,
  1158. const BitVector &ClearRegs) {
  1159. auto &RetI = *MBBI;
  1160. // Emit a sequence of VSCCLRM <sreglist> instructions, one instruction for
  1161. // each contiguous sequence of S-registers.
  1162. int Start = -1, End = -1;
  1163. for (int S = 0, E = ClearRegs.size(); S != E; ++S) {
  1164. if (ClearRegs[S] && S == End + 1) {
  1165. End = S; // extend range
  1166. continue;
  1167. }
  1168. // Emit current range.
  1169. if (Start < End) {
  1170. MachineInstrBuilder VSCCLRM =
  1171. BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
  1172. .add(predOps(ARMCC::AL));
  1173. while (++Start <= End)
  1174. VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
  1175. VSCCLRM.addReg(ARM::VPR, RegState::Define);
  1176. }
  1177. Start = End = S;
  1178. }
  1179. // Emit last range.
  1180. if (Start < End) {
  1181. MachineInstrBuilder VSCCLRM =
  1182. BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
  1183. .add(predOps(ARMCC::AL));
  1184. while (++Start <= End)
  1185. VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
  1186. VSCCLRM.addReg(ARM::VPR, RegState::Define);
  1187. }
  1188. return MBB;
  1189. }
  1190. void ARMExpandPseudo::CMSESaveClearFPRegs(
  1191. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
  1192. const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
  1193. if (STI->hasV8_1MMainlineOps())
  1194. CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs);
  1195. else if (STI->hasV8MMainlineOps())
  1196. CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs);
  1197. }
  1198. // Save and clear FP registers if present
  1199. void ARMExpandPseudo::CMSESaveClearFPRegsV8(
  1200. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
  1201. const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
  1202. // Store an available register for FPSCR clearing
  1203. assert(!ScratchRegs.empty());
  1204. unsigned SpareReg = ScratchRegs.front();
  1205. // save space on stack for VLSTM
  1206. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
  1207. .addReg(ARM::SP)
  1208. .addImm(CMSE_FP_SAVE_SIZE >> 2)
  1209. .add(predOps(ARMCC::AL));
  1210. // Use ScratchRegs to store the fp regs
  1211. std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
  1212. std::vector<unsigned> NonclearedFPRegs;
  1213. for (const MachineOperand &Op : MBBI->operands()) {
  1214. if (Op.isReg() && Op.isUse()) {
  1215. Register Reg = Op.getReg();
  1216. assert(!ARM::DPRRegClass.contains(Reg) ||
  1217. ARM::DPR_VFP2RegClass.contains(Reg));
  1218. assert(!ARM::QPRRegClass.contains(Reg));
  1219. if (ARM::DPR_VFP2RegClass.contains(Reg)) {
  1220. if (ScratchRegs.size() >= 2) {
  1221. unsigned SaveReg2 = ScratchRegs.pop_back_val();
  1222. unsigned SaveReg1 = ScratchRegs.pop_back_val();
  1223. ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
  1224. // Save the fp register to the normal registers
  1225. BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
  1226. .addReg(SaveReg1, RegState::Define)
  1227. .addReg(SaveReg2, RegState::Define)
  1228. .addReg(Reg)
  1229. .add(predOps(ARMCC::AL));
  1230. } else {
  1231. NonclearedFPRegs.push_back(Reg);
  1232. }
  1233. } else if (ARM::SPRRegClass.contains(Reg)) {
  1234. if (ScratchRegs.size() >= 1) {
  1235. unsigned SaveReg = ScratchRegs.pop_back_val();
  1236. ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
  1237. // Save the fp register to the normal registers
  1238. BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
  1239. .addReg(Reg)
  1240. .add(predOps(ARMCC::AL));
  1241. } else {
  1242. NonclearedFPRegs.push_back(Reg);
  1243. }
  1244. }
  1245. }
  1246. }
  1247. bool passesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
  1248. if (passesFPReg)
  1249. assert(STI->hasFPRegs() && "Subtarget needs fpregs");
  1250. // Lazy store all fp registers to the stack.
  1251. // This executes as NOP in the absence of floating-point support.
  1252. MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
  1253. .addReg(ARM::SP)
  1254. .add(predOps(ARMCC::AL));
  1255. for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
  1256. ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
  1257. VLSTM.addReg(R, RegState::Implicit |
  1258. (LiveRegs.contains(R) ? 0 : RegState::Undef));
  1259. // Restore all arguments
  1260. for (const auto &Regs : ClearedFPRegs) {
  1261. unsigned Reg, SaveReg1, SaveReg2;
  1262. std::tie(Reg, SaveReg1, SaveReg2) = Regs;
  1263. if (ARM::DPR_VFP2RegClass.contains(Reg))
  1264. BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
  1265. .addReg(SaveReg1)
  1266. .addReg(SaveReg2)
  1267. .add(predOps(ARMCC::AL));
  1268. else if (ARM::SPRRegClass.contains(Reg))
  1269. BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
  1270. .addReg(SaveReg1)
  1271. .add(predOps(ARMCC::AL));
  1272. }
  1273. for (unsigned Reg : NonclearedFPRegs) {
  1274. if (ARM::DPR_VFP2RegClass.contains(Reg)) {
  1275. if (STI->isLittle()) {
  1276. BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRD), Reg)
  1277. .addReg(ARM::SP)
  1278. .addImm((Reg - ARM::D0) * 2)
  1279. .add(predOps(ARMCC::AL));
  1280. } else {
  1281. // For big-endian targets we need to load the two subregisters of Reg
  1282. // manually because VLDRD would load them in wrong order
  1283. unsigned SReg0 = TRI->getSubReg(Reg, ARM::ssub_0);
  1284. BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0)
  1285. .addReg(ARM::SP)
  1286. .addImm((Reg - ARM::D0) * 2)
  1287. .add(predOps(ARMCC::AL));
  1288. BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0 + 1)
  1289. .addReg(ARM::SP)
  1290. .addImm((Reg - ARM::D0) * 2 + 1)
  1291. .add(predOps(ARMCC::AL));
  1292. }
  1293. } else if (ARM::SPRRegClass.contains(Reg)) {
  1294. BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), Reg)
  1295. .addReg(ARM::SP)
  1296. .addImm(Reg - ARM::S0)
  1297. .add(predOps(ARMCC::AL));
  1298. }
  1299. }
  1300. // restore FPSCR from stack and clear bits 0-4, 7, 28-31
  1301. // The other bits are program global according to the AAPCS
  1302. if (passesFPReg) {
  1303. BuildMI(MBB, MBBI, DL, TII->get(ARM::tLDRspi), SpareReg)
  1304. .addReg(ARM::SP)
  1305. .addImm(0x10)
  1306. .add(predOps(ARMCC::AL));
  1307. BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
  1308. .addReg(SpareReg)
  1309. .addImm(0x0000009F)
  1310. .add(predOps(ARMCC::AL))
  1311. .add(condCodeOp());
  1312. BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
  1313. .addReg(SpareReg)
  1314. .addImm(0xF0000000)
  1315. .add(predOps(ARMCC::AL))
  1316. .add(condCodeOp());
  1317. BuildMI(MBB, MBBI, DL, TII->get(ARM::VMSR))
  1318. .addReg(SpareReg)
  1319. .add(predOps(ARMCC::AL));
  1320. // The ldr must happen after a floating point instruction. To prevent the
  1321. // post-ra scheduler to mess with the order, we create a bundle.
  1322. finalizeBundle(MBB, VLSTM->getIterator(), MBBI->getIterator());
  1323. }
  1324. }
  1325. void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
  1326. MachineBasicBlock::iterator MBBI,
  1327. DebugLoc &DL,
  1328. const LivePhysRegs &LiveRegs) {
  1329. BitVector ClearRegs(32, true);
  1330. bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs);
  1331. // If the instruction does not write to a FP register and no elements were
  1332. // removed from the set, then no FP registers were used to pass
  1333. // arguments/returns.
  1334. if (!DefFP && ClearRegs.count() == ClearRegs.size()) {
  1335. // save space on stack for VLSTM
  1336. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
  1337. .addReg(ARM::SP)
  1338. .addImm(CMSE_FP_SAVE_SIZE >> 2)
  1339. .add(predOps(ARMCC::AL));
  1340. // Lazy store all FP registers to the stack
  1341. MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
  1342. .addReg(ARM::SP)
  1343. .add(predOps(ARMCC::AL));
  1344. for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
  1345. ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
  1346. VLSTM.addReg(R, RegState::Implicit |
  1347. (LiveRegs.contains(R) ? 0 : RegState::Undef));
  1348. } else {
  1349. // Push all the callee-saved registers (s16-s31).
  1350. MachineInstrBuilder VPUSH =
  1351. BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP)
  1352. .addReg(ARM::SP)
  1353. .add(predOps(ARMCC::AL));
  1354. for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
  1355. VPUSH.addReg(Reg);
  1356. // Clear FP registers with a VSCCLRM.
  1357. (void)CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
  1358. // Save floating-point context.
  1359. BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTR_FPCXTS_pre), ARM::SP)
  1360. .addReg(ARM::SP)
  1361. .addImm(-8)
  1362. .add(predOps(ARMCC::AL));
  1363. }
  1364. }
  1365. // Restore FP registers if present
  1366. void ARMExpandPseudo::CMSERestoreFPRegs(
  1367. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
  1368. SmallVectorImpl<unsigned> &AvailableRegs) {
  1369. if (STI->hasV8_1MMainlineOps())
  1370. CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs);
  1371. else if (STI->hasV8MMainlineOps())
  1372. CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs);
  1373. }
  1374. void ARMExpandPseudo::CMSERestoreFPRegsV8(
  1375. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
  1376. SmallVectorImpl<unsigned> &AvailableRegs) {
  1377. // Keep a scratch register for the mitigation sequence.
  1378. unsigned ScratchReg = ARM::NoRegister;
  1379. if (STI->fixCMSE_CVE_2021_35465())
  1380. ScratchReg = AvailableRegs.pop_back_val();
  1381. // Use AvailableRegs to store the fp regs
  1382. std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
  1383. std::vector<unsigned> NonclearedFPRegs;
  1384. for (const MachineOperand &Op : MBBI->operands()) {
  1385. if (Op.isReg() && Op.isDef()) {
  1386. Register Reg = Op.getReg();
  1387. assert(!ARM::DPRRegClass.contains(Reg) ||
  1388. ARM::DPR_VFP2RegClass.contains(Reg));
  1389. assert(!ARM::QPRRegClass.contains(Reg));
  1390. if (ARM::DPR_VFP2RegClass.contains(Reg)) {
  1391. if (AvailableRegs.size() >= 2) {
  1392. unsigned SaveReg2 = AvailableRegs.pop_back_val();
  1393. unsigned SaveReg1 = AvailableRegs.pop_back_val();
  1394. ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
  1395. // Save the fp register to the normal registers
  1396. BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
  1397. .addReg(SaveReg1, RegState::Define)
  1398. .addReg(SaveReg2, RegState::Define)
  1399. .addReg(Reg)
  1400. .add(predOps(ARMCC::AL));
  1401. } else {
  1402. NonclearedFPRegs.push_back(Reg);
  1403. }
  1404. } else if (ARM::SPRRegClass.contains(Reg)) {
  1405. if (AvailableRegs.size() >= 1) {
  1406. unsigned SaveReg = AvailableRegs.pop_back_val();
  1407. ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
  1408. // Save the fp register to the normal registers
  1409. BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
  1410. .addReg(Reg)
  1411. .add(predOps(ARMCC::AL));
  1412. } else {
  1413. NonclearedFPRegs.push_back(Reg);
  1414. }
  1415. }
  1416. }
  1417. }
  1418. bool returnsFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
  1419. if (returnsFPReg)
  1420. assert(STI->hasFPRegs() && "Subtarget needs fpregs");
  1421. // Push FP regs that cannot be restored via normal registers on the stack
  1422. for (unsigned Reg : NonclearedFPRegs) {
  1423. if (ARM::DPR_VFP2RegClass.contains(Reg))
  1424. BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD))
  1425. .addReg(Reg)
  1426. .addReg(ARM::SP)
  1427. .addImm((Reg - ARM::D0) * 2)
  1428. .add(predOps(ARMCC::AL));
  1429. else if (ARM::SPRRegClass.contains(Reg))
  1430. BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS))
  1431. .addReg(Reg)
  1432. .addReg(ARM::SP)
  1433. .addImm(Reg - ARM::S0)
  1434. .add(predOps(ARMCC::AL));
  1435. }
  1436. // Lazy load fp regs from stack.
  1437. // This executes as NOP in the absence of floating-point support.
  1438. MachineInstrBuilder VLLDM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
  1439. .addReg(ARM::SP)
  1440. .add(predOps(ARMCC::AL));
  1441. if (STI->fixCMSE_CVE_2021_35465()) {
  1442. auto Bundler = MIBundleBuilder(MBB, VLLDM);
  1443. // Read the CONTROL register.
  1444. Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2MRS_M))
  1445. .addReg(ScratchReg, RegState::Define)
  1446. .addImm(20)
  1447. .add(predOps(ARMCC::AL)));
  1448. // Check bit 3 (SFPA).
  1449. Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2TSTri))
  1450. .addReg(ScratchReg)
  1451. .addImm(8)
  1452. .add(predOps(ARMCC::AL)));
  1453. // Emit the IT block.
  1454. Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2IT))
  1455. .addImm(ARMCC::NE)
  1456. .addImm(8));
  1457. // If SFPA is clear jump over to VLLDM, otherwise execute an instruction
  1458. // which has no functional effect apart from causing context creation:
  1459. // vmovne s0, s0. In the absence of FPU we emit .inst.w 0xeeb00a40,
  1460. // which is defined as NOP if not executed.
  1461. if (STI->hasFPRegs())
  1462. Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::VMOVS))
  1463. .addReg(ARM::S0, RegState::Define)
  1464. .addReg(ARM::S0, RegState::Undef)
  1465. .add(predOps(ARMCC::NE)));
  1466. else
  1467. Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::INLINEASM))
  1468. .addExternalSymbol(".inst.w 0xeeb00a40")
  1469. .addImm(InlineAsm::Extra_HasSideEffects));
  1470. finalizeBundle(MBB, Bundler.begin(), Bundler.end());
  1471. }
  1472. // Restore all FP registers via normal registers
  1473. for (const auto &Regs : ClearedFPRegs) {
  1474. unsigned Reg, SaveReg1, SaveReg2;
  1475. std::tie(Reg, SaveReg1, SaveReg2) = Regs;
  1476. if (ARM::DPR_VFP2RegClass.contains(Reg))
  1477. BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
  1478. .addReg(SaveReg1)
  1479. .addReg(SaveReg2)
  1480. .add(predOps(ARMCC::AL));
  1481. else if (ARM::SPRRegClass.contains(Reg))
  1482. BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
  1483. .addReg(SaveReg1)
  1484. .add(predOps(ARMCC::AL));
  1485. }
  1486. // Pop the stack space
  1487. BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
  1488. .addReg(ARM::SP)
  1489. .addImm(CMSE_FP_SAVE_SIZE >> 2)
  1490. .add(predOps(ARMCC::AL));
  1491. }
  1492. static bool definesOrUsesFPReg(const MachineInstr &MI) {
  1493. for (const MachineOperand &Op : MI.operands()) {
  1494. if (!Op.isReg())
  1495. continue;
  1496. Register Reg = Op.getReg();
  1497. if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
  1498. (Reg >= ARM::D0 && Reg <= ARM::D15) ||
  1499. (Reg >= ARM::S0 && Reg <= ARM::S31))
  1500. return true;
  1501. }
  1502. return false;
  1503. }
  1504. void ARMExpandPseudo::CMSERestoreFPRegsV81(
  1505. MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
  1506. SmallVectorImpl<unsigned> &AvailableRegs) {
  1507. if (!definesOrUsesFPReg(*MBBI)) {
  1508. if (STI->fixCMSE_CVE_2021_35465()) {
  1509. BuildMI(MBB, MBBI, DL, TII->get(ARM::VSCCLRMS))
  1510. .add(predOps(ARMCC::AL))
  1511. .addReg(ARM::VPR, RegState::Define);
  1512. }
  1513. // Load FP registers from stack.
  1514. BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
  1515. .addReg(ARM::SP)
  1516. .add(predOps(ARMCC::AL));
  1517. // Pop the stack space
  1518. BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
  1519. .addReg(ARM::SP)
  1520. .addImm(CMSE_FP_SAVE_SIZE >> 2)
  1521. .add(predOps(ARMCC::AL));
  1522. } else {
  1523. // Restore the floating point context.
  1524. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::VLDR_FPCXTS_post),
  1525. ARM::SP)
  1526. .addReg(ARM::SP)
  1527. .addImm(8)
  1528. .add(predOps(ARMCC::AL));
  1529. // Pop all the callee-saved registers (s16-s31).
  1530. MachineInstrBuilder VPOP =
  1531. BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP)
  1532. .addReg(ARM::SP)
  1533. .add(predOps(ARMCC::AL));
  1534. for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
  1535. VPOP.addReg(Reg, RegState::Define);
  1536. }
  1537. }
  1538. /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
  1539. /// possible. This only gets used at -O0 so we don't care about efficiency of
  1540. /// the generated code.
  1541. bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
  1542. MachineBasicBlock::iterator MBBI,
  1543. unsigned LdrexOp, unsigned StrexOp,
  1544. unsigned UxtOp,
  1545. MachineBasicBlock::iterator &NextMBBI) {
  1546. bool IsThumb = STI->isThumb();
  1547. bool IsThumb1Only = STI->isThumb1Only();
  1548. MachineInstr &MI = *MBBI;
  1549. DebugLoc DL = MI.getDebugLoc();
  1550. const MachineOperand &Dest = MI.getOperand(0);
  1551. Register TempReg = MI.getOperand(1).getReg();
  1552. // Duplicating undef operands into 2 instructions does not guarantee the same
  1553. // value on both; However undef should be replaced by xzr anyway.
  1554. assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
  1555. Register AddrReg = MI.getOperand(2).getReg();
  1556. Register DesiredReg = MI.getOperand(3).getReg();
  1557. Register NewReg = MI.getOperand(4).getReg();
  1558. if (IsThumb) {
  1559. assert(STI->hasV8MBaselineOps() &&
  1560. "CMP_SWAP not expected to be custom expanded for Thumb1");
  1561. assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) &&
  1562. "ARMv8-M.baseline does not have t2UXTB/t2UXTH");
  1563. assert((UxtOp == 0 || ARM::tGPRRegClass.contains(DesiredReg)) &&
  1564. "DesiredReg used for UXT op must be tGPR");
  1565. }
  1566. MachineFunction *MF = MBB.getParent();
  1567. auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  1568. auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  1569. auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  1570. MF->insert(++MBB.getIterator(), LoadCmpBB);
  1571. MF->insert(++LoadCmpBB->getIterator(), StoreBB);
  1572. MF->insert(++StoreBB->getIterator(), DoneBB);
  1573. if (UxtOp) {
  1574. MachineInstrBuilder MIB =
  1575. BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg)
  1576. .addReg(DesiredReg, RegState::Kill);
  1577. if (!IsThumb)
  1578. MIB.addImm(0);
  1579. MIB.add(predOps(ARMCC::AL));
  1580. }
  1581. // .Lloadcmp:
  1582. // ldrex rDest, [rAddr]
  1583. // cmp rDest, rDesired
  1584. // bne .Ldone
  1585. MachineInstrBuilder MIB;
  1586. MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg());
  1587. MIB.addReg(AddrReg);
  1588. if (LdrexOp == ARM::t2LDREX)
  1589. MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
  1590. MIB.add(predOps(ARMCC::AL));
  1591. unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
  1592. BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
  1593. .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
  1594. .addReg(DesiredReg)
  1595. .add(predOps(ARMCC::AL));
  1596. unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
  1597. BuildMI(LoadCmpBB, DL, TII->get(Bcc))
  1598. .addMBB(DoneBB)
  1599. .addImm(ARMCC::NE)
  1600. .addReg(ARM::CPSR, RegState::Kill);
  1601. LoadCmpBB->addSuccessor(DoneBB);
  1602. LoadCmpBB->addSuccessor(StoreBB);
  1603. // .Lstore:
  1604. // strex rTempReg, rNew, [rAddr]
  1605. // cmp rTempReg, #0
  1606. // bne .Lloadcmp
  1607. MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg)
  1608. .addReg(NewReg)
  1609. .addReg(AddrReg);
  1610. if (StrexOp == ARM::t2STREX)
  1611. MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
  1612. MIB.add(predOps(ARMCC::AL));
  1613. unsigned CMPri =
  1614. IsThumb ? (IsThumb1Only ? ARM::tCMPi8 : ARM::t2CMPri) : ARM::CMPri;
  1615. BuildMI(StoreBB, DL, TII->get(CMPri))
  1616. .addReg(TempReg, RegState::Kill)
  1617. .addImm(0)
  1618. .add(predOps(ARMCC::AL));
  1619. BuildMI(StoreBB, DL, TII->get(Bcc))
  1620. .addMBB(LoadCmpBB)
  1621. .addImm(ARMCC::NE)
  1622. .addReg(ARM::CPSR, RegState::Kill);
  1623. StoreBB->addSuccessor(LoadCmpBB);
  1624. StoreBB->addSuccessor(DoneBB);
  1625. DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
  1626. DoneBB->transferSuccessors(&MBB);
  1627. MBB.addSuccessor(LoadCmpBB);
  1628. NextMBBI = MBB.end();
  1629. MI.eraseFromParent();
  1630. // Recompute livein lists.
  1631. LivePhysRegs LiveRegs;
  1632. computeAndAddLiveIns(LiveRegs, *DoneBB);
  1633. computeAndAddLiveIns(LiveRegs, *StoreBB);
  1634. computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
  1635. // Do an extra pass around the loop to get loop carried registers right.
  1636. StoreBB->clearLiveIns();
  1637. computeAndAddLiveIns(LiveRegs, *StoreBB);
  1638. LoadCmpBB->clearLiveIns();
  1639. computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
  1640. return true;
  1641. }
  1642. /// ARM's ldrexd/strexd take a consecutive register pair (represented as a
  1643. /// single GPRPair register), Thumb's take two separate registers so we need to
  1644. /// extract the subregs from the pair.
  1645. static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg,
  1646. unsigned Flags, bool IsThumb,
  1647. const TargetRegisterInfo *TRI) {
  1648. if (IsThumb) {
  1649. Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
  1650. Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
  1651. MIB.addReg(RegLo, Flags);
  1652. MIB.addReg(RegHi, Flags);
  1653. } else
  1654. MIB.addReg(Reg.getReg(), Flags);
  1655. }
  1656. /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop.
  1657. bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
  1658. MachineBasicBlock::iterator MBBI,
  1659. MachineBasicBlock::iterator &NextMBBI) {
  1660. bool IsThumb = STI->isThumb();
  1661. assert(!STI->isThumb1Only() && "CMP_SWAP_64 unsupported under Thumb1!");
  1662. MachineInstr &MI = *MBBI;
  1663. DebugLoc DL = MI.getDebugLoc();
  1664. MachineOperand &Dest = MI.getOperand(0);
  1665. Register TempReg = MI.getOperand(1).getReg();
  1666. // Duplicating undef operands into 2 instructions does not guarantee the same
  1667. // value on both; However undef should be replaced by xzr anyway.
  1668. assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
  1669. Register AddrReg = MI.getOperand(2).getReg();
  1670. Register DesiredReg = MI.getOperand(3).getReg();
  1671. MachineOperand New = MI.getOperand(4);
  1672. New.setIsKill(false);
  1673. Register DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0);
  1674. Register DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1);
  1675. Register DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0);
  1676. Register DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1);
  1677. MachineFunction *MF = MBB.getParent();
  1678. auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  1679. auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  1680. auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
  1681. MF->insert(++MBB.getIterator(), LoadCmpBB);
  1682. MF->insert(++LoadCmpBB->getIterator(), StoreBB);
  1683. MF->insert(++StoreBB->getIterator(), DoneBB);
  1684. // .Lloadcmp:
  1685. // ldrexd rDestLo, rDestHi, [rAddr]
  1686. // cmp rDestLo, rDesiredLo
  1687. // sbcs dead rTempReg, rDestHi, rDesiredHi
  1688. // bne .Ldone
  1689. unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
  1690. MachineInstrBuilder MIB;
  1691. MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD));
  1692. addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI);
  1693. MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
  1694. unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
  1695. BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
  1696. .addReg(DestLo, getKillRegState(Dest.isDead()))
  1697. .addReg(DesiredLo)
  1698. .add(predOps(ARMCC::AL));
  1699. BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
  1700. .addReg(DestHi, getKillRegState(Dest.isDead()))
  1701. .addReg(DesiredHi)
  1702. .addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill);
  1703. unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
  1704. BuildMI(LoadCmpBB, DL, TII->get(Bcc))
  1705. .addMBB(DoneBB)
  1706. .addImm(ARMCC::NE)
  1707. .addReg(ARM::CPSR, RegState::Kill);
  1708. LoadCmpBB->addSuccessor(DoneBB);
  1709. LoadCmpBB->addSuccessor(StoreBB);
  1710. // .Lstore:
  1711. // strexd rTempReg, rNewLo, rNewHi, [rAddr]
  1712. // cmp rTempReg, #0
  1713. // bne .Lloadcmp
  1714. unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
  1715. MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg);
  1716. unsigned Flags = getKillRegState(New.isDead());
  1717. addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI);
  1718. MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
  1719. unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
  1720. BuildMI(StoreBB, DL, TII->get(CMPri))
  1721. .addReg(TempReg, RegState::Kill)
  1722. .addImm(0)
  1723. .add(predOps(ARMCC::AL));
  1724. BuildMI(StoreBB, DL, TII->get(Bcc))
  1725. .addMBB(LoadCmpBB)
  1726. .addImm(ARMCC::NE)
  1727. .addReg(ARM::CPSR, RegState::Kill);
  1728. StoreBB->addSuccessor(LoadCmpBB);
  1729. StoreBB->addSuccessor(DoneBB);
  1730. DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
  1731. DoneBB->transferSuccessors(&MBB);
  1732. MBB.addSuccessor(LoadCmpBB);
  1733. NextMBBI = MBB.end();
  1734. MI.eraseFromParent();
  1735. // Recompute livein lists.
  1736. LivePhysRegs LiveRegs;
  1737. computeAndAddLiveIns(LiveRegs, *DoneBB);
  1738. computeAndAddLiveIns(LiveRegs, *StoreBB);
  1739. computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
  1740. // Do an extra pass around the loop to get loop carried registers right.
  1741. StoreBB->clearLiveIns();
  1742. computeAndAddLiveIns(LiveRegs, *StoreBB);
  1743. LoadCmpBB->clearLiveIns();
  1744. computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
  1745. return true;
  1746. }
  1747. static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
  1748. MachineBasicBlock &MBB,
  1749. MachineBasicBlock::iterator MBBI, int JumpReg,
  1750. const LivePhysRegs &LiveRegs, bool Thumb1Only) {
  1751. const DebugLoc &DL = MBBI->getDebugLoc();
  1752. if (Thumb1Only) { // push Lo and Hi regs separately
  1753. MachineInstrBuilder PushMIB =
  1754. BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
  1755. for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
  1756. PushMIB.addReg(
  1757. Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
  1758. }
  1759. // Thumb1 can only tPUSH low regs, so we copy the high regs to the low
  1760. // regs that we just saved and push the low regs again, taking care to
  1761. // not clobber JumpReg. If JumpReg is one of the low registers, push first
  1762. // the values of r9-r11, and then r8. That would leave them ordered in
  1763. // memory, and allow us to later pop them with a single instructions.
  1764. // FIXME: Could also use any of r0-r3 that are free (including in the
  1765. // first PUSH above).
  1766. for (int LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; --LoReg) {
  1767. if (JumpReg == LoReg)
  1768. continue;
  1769. BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
  1770. .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef)
  1771. .add(predOps(ARMCC::AL));
  1772. --HiReg;
  1773. }
  1774. MachineInstrBuilder PushMIB2 =
  1775. BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
  1776. for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
  1777. if (Reg == JumpReg)
  1778. continue;
  1779. PushMIB2.addReg(Reg, RegState::Kill);
  1780. }
  1781. // If we couldn't use a low register for temporary storage (because it was
  1782. // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been
  1783. // saved.
  1784. if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) {
  1785. int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
  1786. BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
  1787. .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef)
  1788. .add(predOps(ARMCC::AL));
  1789. BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH))
  1790. .add(predOps(ARMCC::AL))
  1791. .addReg(LoReg, RegState::Kill);
  1792. }
  1793. } else { // push Lo and Hi registers with a single instruction
  1794. MachineInstrBuilder PushMIB =
  1795. BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP)
  1796. .addReg(ARM::SP)
  1797. .add(predOps(ARMCC::AL));
  1798. for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) {
  1799. PushMIB.addReg(
  1800. Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
  1801. }
  1802. }
  1803. }
  1804. static void CMSEPopCalleeSaves(const TargetInstrInfo &TII,
  1805. MachineBasicBlock &MBB,
  1806. MachineBasicBlock::iterator MBBI, int JumpReg,
  1807. bool Thumb1Only) {
  1808. const DebugLoc &DL = MBBI->getDebugLoc();
  1809. if (Thumb1Only) {
  1810. MachineInstrBuilder PopMIB =
  1811. BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
  1812. for (int R = 0; R < 4; ++R) {
  1813. PopMIB.addReg(ARM::R4 + R, RegState::Define);
  1814. BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), ARM::R8 + R)
  1815. .addReg(ARM::R4 + R, RegState::Kill)
  1816. .add(predOps(ARMCC::AL));
  1817. }
  1818. MachineInstrBuilder PopMIB2 =
  1819. BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
  1820. for (int R = 0; R < 4; ++R)
  1821. PopMIB2.addReg(ARM::R4 + R, RegState::Define);
  1822. } else { // pop Lo and Hi registers with a single instruction
  1823. MachineInstrBuilder PopMIB =
  1824. BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP)
  1825. .addReg(ARM::SP)
  1826. .add(predOps(ARMCC::AL));
  1827. for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg)
  1828. PopMIB.addReg(Reg, RegState::Define);
  1829. }
  1830. }
  1831. bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
  1832. MachineBasicBlock::iterator MBBI,
  1833. MachineBasicBlock::iterator &NextMBBI) {
  1834. MachineInstr &MI = *MBBI;
  1835. unsigned Opcode = MI.getOpcode();
  1836. switch (Opcode) {
  1837. default:
  1838. return false;
  1839. case ARM::VBSPd:
  1840. case ARM::VBSPq: {
  1841. Register DstReg = MI.getOperand(0).getReg();
  1842. if (DstReg == MI.getOperand(3).getReg()) {
  1843. // Expand to VBIT
  1844. unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq;
  1845. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
  1846. .add(MI.getOperand(0))
  1847. .add(MI.getOperand(3))
  1848. .add(MI.getOperand(2))
  1849. .add(MI.getOperand(1))
  1850. .addImm(MI.getOperand(4).getImm())
  1851. .add(MI.getOperand(5));
  1852. } else if (DstReg == MI.getOperand(2).getReg()) {
  1853. // Expand to VBIF
  1854. unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq;
  1855. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
  1856. .add(MI.getOperand(0))
  1857. .add(MI.getOperand(2))
  1858. .add(MI.getOperand(3))
  1859. .add(MI.getOperand(1))
  1860. .addImm(MI.getOperand(4).getImm())
  1861. .add(MI.getOperand(5));
  1862. } else {
  1863. // Expand to VBSL
  1864. unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq;
  1865. if (DstReg == MI.getOperand(1).getReg()) {
  1866. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
  1867. .add(MI.getOperand(0))
  1868. .add(MI.getOperand(1))
  1869. .add(MI.getOperand(2))
  1870. .add(MI.getOperand(3))
  1871. .addImm(MI.getOperand(4).getImm())
  1872. .add(MI.getOperand(5));
  1873. } else {
  1874. // Use move to satisfy constraints
  1875. unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
  1876. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
  1877. .addReg(DstReg,
  1878. RegState::Define |
  1879. getRenamableRegState(MI.getOperand(0).isRenamable()))
  1880. .add(MI.getOperand(1))
  1881. .add(MI.getOperand(1))
  1882. .addImm(MI.getOperand(4).getImm())
  1883. .add(MI.getOperand(5));
  1884. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
  1885. .add(MI.getOperand(0))
  1886. .addReg(DstReg,
  1887. RegState::Kill |
  1888. getRenamableRegState(MI.getOperand(0).isRenamable()))
  1889. .add(MI.getOperand(2))
  1890. .add(MI.getOperand(3))
  1891. .addImm(MI.getOperand(4).getImm())
  1892. .add(MI.getOperand(5));
  1893. }
  1894. }
  1895. MI.eraseFromParent();
  1896. return true;
  1897. }
  1898. case ARM::TCRETURNdi:
  1899. case ARM::TCRETURNri: {
  1900. MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  1901. if (MBBI->getOpcode() == ARM::SEH_EpilogEnd)
  1902. MBBI--;
  1903. if (MBBI->getOpcode() == ARM::SEH_Nop_Ret)
  1904. MBBI--;
  1905. assert(MBBI->isReturn() &&
  1906. "Can only insert epilog into returning blocks");
  1907. unsigned RetOpcode = MBBI->getOpcode();
  1908. DebugLoc dl = MBBI->getDebugLoc();
  1909. const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
  1910. MBB.getParent()->getSubtarget().getInstrInfo());
  1911. // Tail call return: adjust the stack pointer and jump to callee.
  1912. MBBI = MBB.getLastNonDebugInstr();
  1913. if (MBBI->getOpcode() == ARM::SEH_EpilogEnd)
  1914. MBBI--;
  1915. if (MBBI->getOpcode() == ARM::SEH_Nop_Ret)
  1916. MBBI--;
  1917. MachineOperand &JumpTarget = MBBI->getOperand(0);
  1918. // Jump to label or value in register.
  1919. if (RetOpcode == ARM::TCRETURNdi) {
  1920. MachineFunction *MF = MBB.getParent();
  1921. bool NeedsWinCFI = MF->getTarget().getMCAsmInfo()->usesWindowsCFI() &&
  1922. MF->getFunction().needsUnwindTableEntry();
  1923. unsigned TCOpcode =
  1924. STI->isThumb()
  1925. ? ((STI->isTargetMachO() || NeedsWinCFI) ? ARM::tTAILJMPd
  1926. : ARM::tTAILJMPdND)
  1927. : ARM::TAILJMPd;
  1928. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
  1929. if (JumpTarget.isGlobal())
  1930. MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
  1931. JumpTarget.getTargetFlags());
  1932. else {
  1933. assert(JumpTarget.isSymbol());
  1934. MIB.addExternalSymbol(JumpTarget.getSymbolName(),
  1935. JumpTarget.getTargetFlags());
  1936. }
  1937. // Add the default predicate in Thumb mode.
  1938. if (STI->isThumb())
  1939. MIB.add(predOps(ARMCC::AL));
  1940. } else if (RetOpcode == ARM::TCRETURNri) {
  1941. unsigned Opcode =
  1942. STI->isThumb() ? ARM::tTAILJMPr
  1943. : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4);
  1944. BuildMI(MBB, MBBI, dl,
  1945. TII.get(Opcode))
  1946. .addReg(JumpTarget.getReg(), RegState::Kill);
  1947. }
  1948. auto NewMI = std::prev(MBBI);
  1949. for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i)
  1950. NewMI->addOperand(MBBI->getOperand(i));
  1951. // Update call site info and delete the pseudo instruction TCRETURN.
  1952. if (MI.isCandidateForCallSiteEntry())
  1953. MI.getMF()->moveCallSiteInfo(&MI, &*NewMI);
  1954. MBB.erase(MBBI);
  1955. MBBI = NewMI;
  1956. return true;
  1957. }
  1958. case ARM::tBXNS_RET: {
  1959. // For v8.0-M.Main we need to authenticate LR before clearing FPRs, which
  1960. // uses R12 as a scratch register.
  1961. if (!STI->hasV8_1MMainlineOps() && AFI->shouldSignReturnAddress())
  1962. BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARM::t2AUT));
  1963. MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI);
  1964. if (STI->hasV8_1MMainlineOps()) {
  1965. // Restore the non-secure floating point context.
  1966. BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1967. TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP)
  1968. .addReg(ARM::SP)
  1969. .addImm(4)
  1970. .add(predOps(ARMCC::AL));
  1971. if (AFI->shouldSignReturnAddress())
  1972. BuildMI(AfterBB, AfterBB.end(), DebugLoc(), TII->get(ARM::t2AUT));
  1973. }
  1974. // Clear all GPR that are not a use of the return instruction.
  1975. assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) {
  1976. return !Op.isReg() || Op.getReg() != ARM::R12;
  1977. }));
  1978. SmallVector<unsigned, 5> ClearRegs;
  1979. determineGPRegsToClear(
  1980. *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs);
  1981. CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs,
  1982. ARM::LR);
  1983. MachineInstrBuilder NewMI =
  1984. BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(),
  1985. TII->get(ARM::tBXNS))
  1986. .addReg(ARM::LR)
  1987. .add(predOps(ARMCC::AL));
  1988. for (const MachineOperand &Op : MI.operands())
  1989. NewMI->addOperand(Op);
  1990. MI.eraseFromParent();
  1991. return true;
  1992. }
  1993. case ARM::tBLXNS_CALL: {
  1994. DebugLoc DL = MBBI->getDebugLoc();
  1995. Register JumpReg = MBBI->getOperand(0).getReg();
  1996. // Figure out which registers are live at the point immediately before the
  1997. // call. When we indiscriminately push a set of registers, the live
  1998. // registers are added as ordinary use operands, whereas dead registers
  1999. // are "undef".
  2000. LivePhysRegs LiveRegs(*TRI);
  2001. LiveRegs.addLiveOuts(MBB);
  2002. for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse()))
  2003. LiveRegs.stepBackward(MI);
  2004. LiveRegs.stepBackward(*MBBI);
  2005. CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs,
  2006. AFI->isThumb1OnlyFunction());
  2007. SmallVector<unsigned, 16> ClearRegs;
  2008. determineGPRegsToClear(*MBBI,
  2009. {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
  2010. ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9,
  2011. ARM::R10, ARM::R11, ARM::R12},
  2012. ClearRegs);
  2013. auto OriginalClearRegs = ClearRegs;
  2014. // Get the first cleared register as a scratch (to use later with tBIC).
  2015. // We need to use the first so we can ensure it is a low register.
  2016. unsigned ScratchReg = ClearRegs.front();
  2017. // Clear LSB of JumpReg
  2018. if (AFI->isThumb2Function()) {
  2019. BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg)
  2020. .addReg(JumpReg)
  2021. .addImm(1)
  2022. .add(predOps(ARMCC::AL))
  2023. .add(condCodeOp());
  2024. } else {
  2025. // We need to use an extra register to cope with 8M Baseline,
  2026. // since we have saved all of the registers we are ok to trash a non
  2027. // argument register here.
  2028. BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg)
  2029. .add(condCodeOp())
  2030. .addImm(1)
  2031. .add(predOps(ARMCC::AL));
  2032. BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg)
  2033. .addReg(ARM::CPSR, RegState::Define)
  2034. .addReg(JumpReg)
  2035. .addReg(ScratchReg)
  2036. .add(predOps(ARMCC::AL));
  2037. }
  2038. CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs,
  2039. ClearRegs); // save+clear FP regs with ClearRegs
  2040. CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg);
  2041. const MachineInstrBuilder NewCall =
  2042. BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr))
  2043. .add(predOps(ARMCC::AL))
  2044. .addReg(JumpReg, RegState::Kill);
  2045. for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
  2046. NewCall->addOperand(MO);
  2047. if (MI.isCandidateForCallSiteEntry())
  2048. MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr());
  2049. CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers
  2050. CMSEPopCalleeSaves(*TII, MBB, MBBI, JumpReg, AFI->isThumb1OnlyFunction());
  2051. MI.eraseFromParent();
  2052. return true;
  2053. }
  2054. case ARM::VMOVHcc:
  2055. case ARM::VMOVScc:
  2056. case ARM::VMOVDcc: {
  2057. unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD;
  2058. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
  2059. MI.getOperand(1).getReg())
  2060. .add(MI.getOperand(2))
  2061. .addImm(MI.getOperand(3).getImm()) // 'pred'
  2062. .add(MI.getOperand(4))
  2063. .add(makeImplicit(MI.getOperand(1)));
  2064. MI.eraseFromParent();
  2065. return true;
  2066. }
  2067. case ARM::t2MOVCCr:
  2068. case ARM::MOVCCr: {
  2069. unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
  2070. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
  2071. MI.getOperand(1).getReg())
  2072. .add(MI.getOperand(2))
  2073. .addImm(MI.getOperand(3).getImm()) // 'pred'
  2074. .add(MI.getOperand(4))
  2075. .add(condCodeOp()) // 's' bit
  2076. .add(makeImplicit(MI.getOperand(1)));
  2077. MI.eraseFromParent();
  2078. return true;
  2079. }
  2080. case ARM::MOVCCsi: {
  2081. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
  2082. (MI.getOperand(1).getReg()))
  2083. .add(MI.getOperand(2))
  2084. .addImm(MI.getOperand(3).getImm())
  2085. .addImm(MI.getOperand(4).getImm()) // 'pred'
  2086. .add(MI.getOperand(5))
  2087. .add(condCodeOp()) // 's' bit
  2088. .add(makeImplicit(MI.getOperand(1)));
  2089. MI.eraseFromParent();
  2090. return true;
  2091. }
  2092. case ARM::MOVCCsr: {
  2093. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
  2094. (MI.getOperand(1).getReg()))
  2095. .add(MI.getOperand(2))
  2096. .add(MI.getOperand(3))
  2097. .addImm(MI.getOperand(4).getImm())
  2098. .addImm(MI.getOperand(5).getImm()) // 'pred'
  2099. .add(MI.getOperand(6))
  2100. .add(condCodeOp()) // 's' bit
  2101. .add(makeImplicit(MI.getOperand(1)));
  2102. MI.eraseFromParent();
  2103. return true;
  2104. }
  2105. case ARM::t2MOVCCi16:
  2106. case ARM::MOVCCi16: {
  2107. unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
  2108. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
  2109. MI.getOperand(1).getReg())
  2110. .addImm(MI.getOperand(2).getImm())
  2111. .addImm(MI.getOperand(3).getImm()) // 'pred'
  2112. .add(MI.getOperand(4))
  2113. .add(makeImplicit(MI.getOperand(1)));
  2114. MI.eraseFromParent();
  2115. return true;
  2116. }
  2117. case ARM::t2MOVCCi:
  2118. case ARM::MOVCCi: {
  2119. unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
  2120. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
  2121. MI.getOperand(1).getReg())
  2122. .addImm(MI.getOperand(2).getImm())
  2123. .addImm(MI.getOperand(3).getImm()) // 'pred'
  2124. .add(MI.getOperand(4))
  2125. .add(condCodeOp()) // 's' bit
  2126. .add(makeImplicit(MI.getOperand(1)));
  2127. MI.eraseFromParent();
  2128. return true;
  2129. }
  2130. case ARM::t2MVNCCi:
  2131. case ARM::MVNCCi: {
  2132. unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
  2133. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
  2134. MI.getOperand(1).getReg())
  2135. .addImm(MI.getOperand(2).getImm())
  2136. .addImm(MI.getOperand(3).getImm()) // 'pred'
  2137. .add(MI.getOperand(4))
  2138. .add(condCodeOp()) // 's' bit
  2139. .add(makeImplicit(MI.getOperand(1)));
  2140. MI.eraseFromParent();
  2141. return true;
  2142. }
  2143. case ARM::t2MOVCClsl:
  2144. case ARM::t2MOVCClsr:
  2145. case ARM::t2MOVCCasr:
  2146. case ARM::t2MOVCCror: {
  2147. unsigned NewOpc;
  2148. switch (Opcode) {
  2149. case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break;
  2150. case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break;
  2151. case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break;
  2152. case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break;
  2153. default: llvm_unreachable("unexpeced conditional move");
  2154. }
  2155. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
  2156. MI.getOperand(1).getReg())
  2157. .add(MI.getOperand(2))
  2158. .addImm(MI.getOperand(3).getImm())
  2159. .addImm(MI.getOperand(4).getImm()) // 'pred'
  2160. .add(MI.getOperand(5))
  2161. .add(condCodeOp()) // 's' bit
  2162. .add(makeImplicit(MI.getOperand(1)));
  2163. MI.eraseFromParent();
  2164. return true;
  2165. }
  2166. case ARM::Int_eh_sjlj_dispatchsetup: {
  2167. MachineFunction &MF = *MI.getParent()->getParent();
  2168. const ARMBaseInstrInfo *AII =
  2169. static_cast<const ARMBaseInstrInfo*>(TII);
  2170. const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
  2171. // For functions using a base pointer, we rematerialize it (via the frame
  2172. // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
  2173. // for us. Otherwise, expand to nothing.
  2174. if (RI.hasBasePointer(MF)) {
  2175. int32_t NumBytes = AFI->getFramePtrSpillOffset();
  2176. Register FramePtr = RI.getFrameRegister(MF);
  2177. assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
  2178. "base pointer without frame pointer?");
  2179. if (AFI->isThumb2Function()) {
  2180. emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
  2181. FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
  2182. } else if (AFI->isThumbFunction()) {
  2183. emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
  2184. FramePtr, -NumBytes, *TII, RI);
  2185. } else {
  2186. emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
  2187. FramePtr, -NumBytes, ARMCC::AL, 0,
  2188. *TII);
  2189. }
  2190. // If there's dynamic realignment, adjust for it.
  2191. if (RI.hasStackRealignment(MF)) {
  2192. MachineFrameInfo &MFI = MF.getFrameInfo();
  2193. Align MaxAlign = MFI.getMaxAlign();
  2194. assert (!AFI->isThumb1OnlyFunction());
  2195. // Emit bic r6, r6, MaxAlign
  2196. assert(MaxAlign <= Align(256) &&
  2197. "The BIC instruction cannot encode "
  2198. "immediates larger than 256 with all lower "
  2199. "bits set.");
  2200. unsigned bicOpc = AFI->isThumbFunction() ?
  2201. ARM::t2BICri : ARM::BICri;
  2202. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6)
  2203. .addReg(ARM::R6, RegState::Kill)
  2204. .addImm(MaxAlign.value() - 1)
  2205. .add(predOps(ARMCC::AL))
  2206. .add(condCodeOp());
  2207. }
  2208. }
  2209. MI.eraseFromParent();
  2210. return true;
  2211. }
  2212. case ARM::MOVsrl_flag:
  2213. case ARM::MOVsra_flag: {
  2214. // These are just fancy MOVs instructions.
  2215. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
  2216. MI.getOperand(0).getReg())
  2217. .add(MI.getOperand(1))
  2218. .addImm(ARM_AM::getSORegOpc(
  2219. (Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1))
  2220. .add(predOps(ARMCC::AL))
  2221. .addReg(ARM::CPSR, RegState::Define);
  2222. MI.eraseFromParent();
  2223. return true;
  2224. }
  2225. case ARM::RRX: {
  2226. // This encodes as "MOVs Rd, Rm, rrx
  2227. MachineInstrBuilder MIB =
  2228. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
  2229. MI.getOperand(0).getReg())
  2230. .add(MI.getOperand(1))
  2231. .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))
  2232. .add(predOps(ARMCC::AL))
  2233. .add(condCodeOp());
  2234. TransferImpOps(MI, MIB, MIB);
  2235. MI.eraseFromParent();
  2236. return true;
  2237. }
  2238. case ARM::tTPsoft:
  2239. case ARM::TPsoft: {
  2240. const bool Thumb = Opcode == ARM::tTPsoft;
  2241. MachineInstrBuilder MIB;
  2242. MachineFunction *MF = MBB.getParent();
  2243. if (STI->genLongCalls()) {
  2244. MachineConstantPool *MCP = MF->getConstantPool();
  2245. unsigned PCLabelID = AFI->createPICLabelUId();
  2246. MachineConstantPoolValue *CPV =
  2247. ARMConstantPoolSymbol::Create(MF->getFunction().getContext(),
  2248. "__aeabi_read_tp", PCLabelID, 0);
  2249. Register Reg = MI.getOperand(0).getReg();
  2250. MIB =
  2251. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  2252. TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
  2253. .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
  2254. if (!Thumb)
  2255. MIB.addImm(0);
  2256. MIB.add(predOps(ARMCC::AL));
  2257. MIB =
  2258. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  2259. TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF)));
  2260. if (Thumb)
  2261. MIB.add(predOps(ARMCC::AL));
  2262. MIB.addReg(Reg, RegState::Kill);
  2263. } else {
  2264. MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
  2265. TII->get(Thumb ? ARM::tBL : ARM::BL));
  2266. if (Thumb)
  2267. MIB.add(predOps(ARMCC::AL));
  2268. MIB.addExternalSymbol("__aeabi_read_tp", 0);
  2269. }
  2270. MIB.cloneMemRefs(MI);
  2271. TransferImpOps(MI, MIB, MIB);
  2272. // Update the call site info.
  2273. if (MI.isCandidateForCallSiteEntry())
  2274. MF->moveCallSiteInfo(&MI, &*MIB);
  2275. MI.eraseFromParent();
  2276. return true;
  2277. }
  2278. case ARM::tLDRpci_pic:
  2279. case ARM::t2LDRpci_pic: {
  2280. unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
  2281. ? ARM::tLDRpci : ARM::t2LDRpci;
  2282. Register DstReg = MI.getOperand(0).getReg();
  2283. bool DstIsDead = MI.getOperand(0).isDead();
  2284. MachineInstrBuilder MIB1 =
  2285. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
  2286. .add(MI.getOperand(1))
  2287. .add(predOps(ARMCC::AL));
  2288. MIB1.cloneMemRefs(MI);
  2289. MachineInstrBuilder MIB2 =
  2290. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
  2291. .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
  2292. .addReg(DstReg)
  2293. .add(MI.getOperand(2));
  2294. TransferImpOps(MI, MIB1, MIB2);
  2295. MI.eraseFromParent();
  2296. return true;
  2297. }
  2298. case ARM::LDRLIT_ga_abs:
  2299. case ARM::LDRLIT_ga_pcrel:
  2300. case ARM::LDRLIT_ga_pcrel_ldr:
  2301. case ARM::tLDRLIT_ga_abs:
  2302. case ARM::t2LDRLIT_ga_pcrel:
  2303. case ARM::tLDRLIT_ga_pcrel: {
  2304. Register DstReg = MI.getOperand(0).getReg();
  2305. bool DstIsDead = MI.getOperand(0).isDead();
  2306. const MachineOperand &MO1 = MI.getOperand(1);
  2307. auto Flags = MO1.getTargetFlags();
  2308. const GlobalValue *GV = MO1.getGlobal();
  2309. bool IsARM = Opcode != ARM::tLDRLIT_ga_pcrel &&
  2310. Opcode != ARM::tLDRLIT_ga_abs &&
  2311. Opcode != ARM::t2LDRLIT_ga_pcrel;
  2312. bool IsPIC =
  2313. Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs;
  2314. unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci;
  2315. if (Opcode == ARM::t2LDRLIT_ga_pcrel)
  2316. LDRLITOpc = ARM::t2LDRpci;
  2317. unsigned PICAddOpc =
  2318. IsARM
  2319. ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
  2320. : ARM::tPICADD;
  2321. // We need a new const-pool entry to load from.
  2322. MachineConstantPool *MCP = MBB.getParent()->getConstantPool();
  2323. unsigned ARMPCLabelIndex = 0;
  2324. MachineConstantPoolValue *CPV;
  2325. if (IsPIC) {
  2326. unsigned PCAdj = IsARM ? 8 : 4;
  2327. auto Modifier = (Flags & ARMII::MO_GOT)
  2328. ? ARMCP::GOT_PREL
  2329. : ARMCP::no_modifier;
  2330. ARMPCLabelIndex = AFI->createPICLabelUId();
  2331. CPV = ARMConstantPoolConstant::Create(
  2332. GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, Modifier,
  2333. /*AddCurrentAddr*/ Modifier == ARMCP::GOT_PREL);
  2334. } else
  2335. CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier);
  2336. MachineInstrBuilder MIB =
  2337. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg)
  2338. .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
  2339. if (IsARM)
  2340. MIB.addImm(0);
  2341. MIB.add(predOps(ARMCC::AL));
  2342. if (IsPIC) {
  2343. MachineInstrBuilder MIB =
  2344. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc))
  2345. .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
  2346. .addReg(DstReg)
  2347. .addImm(ARMPCLabelIndex);
  2348. if (IsARM)
  2349. MIB.add(predOps(ARMCC::AL));
  2350. }
  2351. MI.eraseFromParent();
  2352. return true;
  2353. }
  2354. case ARM::MOV_ga_pcrel:
  2355. case ARM::MOV_ga_pcrel_ldr:
  2356. case ARM::t2MOV_ga_pcrel: {
  2357. // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
  2358. unsigned LabelId = AFI->createPICLabelUId();
  2359. Register DstReg = MI.getOperand(0).getReg();
  2360. bool DstIsDead = MI.getOperand(0).isDead();
  2361. const MachineOperand &MO1 = MI.getOperand(1);
  2362. const GlobalValue *GV = MO1.getGlobal();
  2363. unsigned TF = MO1.getTargetFlags();
  2364. bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
  2365. unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
  2366. unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
  2367. unsigned LO16TF = TF | ARMII::MO_LO16;
  2368. unsigned HI16TF = TF | ARMII::MO_HI16;
  2369. unsigned PICAddOpc = isARM
  2370. ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
  2371. : ARM::tPICADD;
  2372. MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
  2373. TII->get(LO16Opc), DstReg)
  2374. .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
  2375. .addImm(LabelId);
  2376. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg)
  2377. .addReg(DstReg)
  2378. .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
  2379. .addImm(LabelId);
  2380. MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
  2381. TII->get(PICAddOpc))
  2382. .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
  2383. .addReg(DstReg).addImm(LabelId);
  2384. if (isARM) {
  2385. MIB3.add(predOps(ARMCC::AL));
  2386. if (Opcode == ARM::MOV_ga_pcrel_ldr)
  2387. MIB3.cloneMemRefs(MI);
  2388. }
  2389. TransferImpOps(MI, MIB1, MIB3);
  2390. MI.eraseFromParent();
  2391. return true;
  2392. }
  2393. case ARM::MOVi32imm:
  2394. case ARM::MOVCCi32imm:
  2395. case ARM::t2MOVi32imm:
  2396. case ARM::t2MOVCCi32imm:
  2397. ExpandMOV32BitImm(MBB, MBBI);
  2398. return true;
  2399. case ARM::SUBS_PC_LR: {
  2400. MachineInstrBuilder MIB =
  2401. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
  2402. .addReg(ARM::LR)
  2403. .add(MI.getOperand(0))
  2404. .add(MI.getOperand(1))
  2405. .add(MI.getOperand(2))
  2406. .addReg(ARM::CPSR, RegState::Undef);
  2407. TransferImpOps(MI, MIB, MIB);
  2408. MI.eraseFromParent();
  2409. return true;
  2410. }
  2411. case ARM::VLDMQIA: {
  2412. unsigned NewOpc = ARM::VLDMDIA;
  2413. MachineInstrBuilder MIB =
  2414. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
  2415. unsigned OpIdx = 0;
  2416. // Grab the Q register destination.
  2417. bool DstIsDead = MI.getOperand(OpIdx).isDead();
  2418. Register DstReg = MI.getOperand(OpIdx++).getReg();
  2419. // Copy the source register.
  2420. MIB.add(MI.getOperand(OpIdx++));
  2421. // Copy the predicate operands.
  2422. MIB.add(MI.getOperand(OpIdx++));
  2423. MIB.add(MI.getOperand(OpIdx++));
  2424. // Add the destination operands (D subregs).
  2425. Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
  2426. Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
  2427. MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
  2428. .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
  2429. // Add an implicit def for the super-register.
  2430. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
  2431. TransferImpOps(MI, MIB, MIB);
  2432. MIB.cloneMemRefs(MI);
  2433. MI.eraseFromParent();
  2434. return true;
  2435. }
  2436. case ARM::VSTMQIA: {
  2437. unsigned NewOpc = ARM::VSTMDIA;
  2438. MachineInstrBuilder MIB =
  2439. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
  2440. unsigned OpIdx = 0;
  2441. // Grab the Q register source.
  2442. bool SrcIsKill = MI.getOperand(OpIdx).isKill();
  2443. Register SrcReg = MI.getOperand(OpIdx++).getReg();
  2444. // Copy the destination register.
  2445. MachineOperand Dst(MI.getOperand(OpIdx++));
  2446. MIB.add(Dst);
  2447. // Copy the predicate operands.
  2448. MIB.add(MI.getOperand(OpIdx++));
  2449. MIB.add(MI.getOperand(OpIdx++));
  2450. // Add the source operands (D subregs).
  2451. Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
  2452. Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
  2453. MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0)
  2454. .addReg(D1, SrcIsKill ? RegState::Kill : 0);
  2455. if (SrcIsKill) // Add an implicit kill for the Q register.
  2456. MIB->addRegisterKilled(SrcReg, TRI, true);
  2457. TransferImpOps(MI, MIB, MIB);
  2458. MIB.cloneMemRefs(MI);
  2459. MI.eraseFromParent();
  2460. return true;
  2461. }
  2462. case ARM::VLD2q8Pseudo:
  2463. case ARM::VLD2q16Pseudo:
  2464. case ARM::VLD2q32Pseudo:
  2465. case ARM::VLD2q8PseudoWB_fixed:
  2466. case ARM::VLD2q16PseudoWB_fixed:
  2467. case ARM::VLD2q32PseudoWB_fixed:
  2468. case ARM::VLD2q8PseudoWB_register:
  2469. case ARM::VLD2q16PseudoWB_register:
  2470. case ARM::VLD2q32PseudoWB_register:
  2471. case ARM::VLD3d8Pseudo:
  2472. case ARM::VLD3d16Pseudo:
  2473. case ARM::VLD3d32Pseudo:
  2474. case ARM::VLD1d8TPseudo:
  2475. case ARM::VLD1d8TPseudoWB_fixed:
  2476. case ARM::VLD1d8TPseudoWB_register:
  2477. case ARM::VLD1d16TPseudo:
  2478. case ARM::VLD1d16TPseudoWB_fixed:
  2479. case ARM::VLD1d16TPseudoWB_register:
  2480. case ARM::VLD1d32TPseudo:
  2481. case ARM::VLD1d32TPseudoWB_fixed:
  2482. case ARM::VLD1d32TPseudoWB_register:
  2483. case ARM::VLD1d64TPseudo:
  2484. case ARM::VLD1d64TPseudoWB_fixed:
  2485. case ARM::VLD1d64TPseudoWB_register:
  2486. case ARM::VLD3d8Pseudo_UPD:
  2487. case ARM::VLD3d16Pseudo_UPD:
  2488. case ARM::VLD3d32Pseudo_UPD:
  2489. case ARM::VLD3q8Pseudo_UPD:
  2490. case ARM::VLD3q16Pseudo_UPD:
  2491. case ARM::VLD3q32Pseudo_UPD:
  2492. case ARM::VLD3q8oddPseudo:
  2493. case ARM::VLD3q16oddPseudo:
  2494. case ARM::VLD3q32oddPseudo:
  2495. case ARM::VLD3q8oddPseudo_UPD:
  2496. case ARM::VLD3q16oddPseudo_UPD:
  2497. case ARM::VLD3q32oddPseudo_UPD:
  2498. case ARM::VLD4d8Pseudo:
  2499. case ARM::VLD4d16Pseudo:
  2500. case ARM::VLD4d32Pseudo:
  2501. case ARM::VLD1d8QPseudo:
  2502. case ARM::VLD1d8QPseudoWB_fixed:
  2503. case ARM::VLD1d8QPseudoWB_register:
  2504. case ARM::VLD1d16QPseudo:
  2505. case ARM::VLD1d16QPseudoWB_fixed:
  2506. case ARM::VLD1d16QPseudoWB_register:
  2507. case ARM::VLD1d32QPseudo:
  2508. case ARM::VLD1d32QPseudoWB_fixed:
  2509. case ARM::VLD1d32QPseudoWB_register:
  2510. case ARM::VLD1d64QPseudo:
  2511. case ARM::VLD1d64QPseudoWB_fixed:
  2512. case ARM::VLD1d64QPseudoWB_register:
  2513. case ARM::VLD1q8HighQPseudo:
  2514. case ARM::VLD1q8HighQPseudo_UPD:
  2515. case ARM::VLD1q8LowQPseudo_UPD:
  2516. case ARM::VLD1q8HighTPseudo:
  2517. case ARM::VLD1q8HighTPseudo_UPD:
  2518. case ARM::VLD1q8LowTPseudo_UPD:
  2519. case ARM::VLD1q16HighQPseudo:
  2520. case ARM::VLD1q16HighQPseudo_UPD:
  2521. case ARM::VLD1q16LowQPseudo_UPD:
  2522. case ARM::VLD1q16HighTPseudo:
  2523. case ARM::VLD1q16HighTPseudo_UPD:
  2524. case ARM::VLD1q16LowTPseudo_UPD:
  2525. case ARM::VLD1q32HighQPseudo:
  2526. case ARM::VLD1q32HighQPseudo_UPD:
  2527. case ARM::VLD1q32LowQPseudo_UPD:
  2528. case ARM::VLD1q32HighTPseudo:
  2529. case ARM::VLD1q32HighTPseudo_UPD:
  2530. case ARM::VLD1q32LowTPseudo_UPD:
  2531. case ARM::VLD1q64HighQPseudo:
  2532. case ARM::VLD1q64HighQPseudo_UPD:
  2533. case ARM::VLD1q64LowQPseudo_UPD:
  2534. case ARM::VLD1q64HighTPseudo:
  2535. case ARM::VLD1q64HighTPseudo_UPD:
  2536. case ARM::VLD1q64LowTPseudo_UPD:
  2537. case ARM::VLD4d8Pseudo_UPD:
  2538. case ARM::VLD4d16Pseudo_UPD:
  2539. case ARM::VLD4d32Pseudo_UPD:
  2540. case ARM::VLD4q8Pseudo_UPD:
  2541. case ARM::VLD4q16Pseudo_UPD:
  2542. case ARM::VLD4q32Pseudo_UPD:
  2543. case ARM::VLD4q8oddPseudo:
  2544. case ARM::VLD4q16oddPseudo:
  2545. case ARM::VLD4q32oddPseudo:
  2546. case ARM::VLD4q8oddPseudo_UPD:
  2547. case ARM::VLD4q16oddPseudo_UPD:
  2548. case ARM::VLD4q32oddPseudo_UPD:
  2549. case ARM::VLD3DUPd8Pseudo:
  2550. case ARM::VLD3DUPd16Pseudo:
  2551. case ARM::VLD3DUPd32Pseudo:
  2552. case ARM::VLD3DUPd8Pseudo_UPD:
  2553. case ARM::VLD3DUPd16Pseudo_UPD:
  2554. case ARM::VLD3DUPd32Pseudo_UPD:
  2555. case ARM::VLD4DUPd8Pseudo:
  2556. case ARM::VLD4DUPd16Pseudo:
  2557. case ARM::VLD4DUPd32Pseudo:
  2558. case ARM::VLD4DUPd8Pseudo_UPD:
  2559. case ARM::VLD4DUPd16Pseudo_UPD:
  2560. case ARM::VLD4DUPd32Pseudo_UPD:
  2561. case ARM::VLD2DUPq8EvenPseudo:
  2562. case ARM::VLD2DUPq8OddPseudo:
  2563. case ARM::VLD2DUPq16EvenPseudo:
  2564. case ARM::VLD2DUPq16OddPseudo:
  2565. case ARM::VLD2DUPq32EvenPseudo:
  2566. case ARM::VLD2DUPq32OddPseudo:
  2567. case ARM::VLD2DUPq8OddPseudoWB_fixed:
  2568. case ARM::VLD2DUPq8OddPseudoWB_register:
  2569. case ARM::VLD2DUPq16OddPseudoWB_fixed:
  2570. case ARM::VLD2DUPq16OddPseudoWB_register:
  2571. case ARM::VLD2DUPq32OddPseudoWB_fixed:
  2572. case ARM::VLD2DUPq32OddPseudoWB_register:
  2573. case ARM::VLD3DUPq8EvenPseudo:
  2574. case ARM::VLD3DUPq8OddPseudo:
  2575. case ARM::VLD3DUPq16EvenPseudo:
  2576. case ARM::VLD3DUPq16OddPseudo:
  2577. case ARM::VLD3DUPq32EvenPseudo:
  2578. case ARM::VLD3DUPq32OddPseudo:
  2579. case ARM::VLD3DUPq8OddPseudo_UPD:
  2580. case ARM::VLD3DUPq16OddPseudo_UPD:
  2581. case ARM::VLD3DUPq32OddPseudo_UPD:
  2582. case ARM::VLD4DUPq8EvenPseudo:
  2583. case ARM::VLD4DUPq8OddPseudo:
  2584. case ARM::VLD4DUPq16EvenPseudo:
  2585. case ARM::VLD4DUPq16OddPseudo:
  2586. case ARM::VLD4DUPq32EvenPseudo:
  2587. case ARM::VLD4DUPq32OddPseudo:
  2588. case ARM::VLD4DUPq8OddPseudo_UPD:
  2589. case ARM::VLD4DUPq16OddPseudo_UPD:
  2590. case ARM::VLD4DUPq32OddPseudo_UPD:
  2591. ExpandVLD(MBBI);
  2592. return true;
  2593. case ARM::VST2q8Pseudo:
  2594. case ARM::VST2q16Pseudo:
  2595. case ARM::VST2q32Pseudo:
  2596. case ARM::VST2q8PseudoWB_fixed:
  2597. case ARM::VST2q16PseudoWB_fixed:
  2598. case ARM::VST2q32PseudoWB_fixed:
  2599. case ARM::VST2q8PseudoWB_register:
  2600. case ARM::VST2q16PseudoWB_register:
  2601. case ARM::VST2q32PseudoWB_register:
  2602. case ARM::VST3d8Pseudo:
  2603. case ARM::VST3d16Pseudo:
  2604. case ARM::VST3d32Pseudo:
  2605. case ARM::VST1d8TPseudo:
  2606. case ARM::VST1d8TPseudoWB_fixed:
  2607. case ARM::VST1d8TPseudoWB_register:
  2608. case ARM::VST1d16TPseudo:
  2609. case ARM::VST1d16TPseudoWB_fixed:
  2610. case ARM::VST1d16TPseudoWB_register:
  2611. case ARM::VST1d32TPseudo:
  2612. case ARM::VST1d32TPseudoWB_fixed:
  2613. case ARM::VST1d32TPseudoWB_register:
  2614. case ARM::VST1d64TPseudo:
  2615. case ARM::VST1d64TPseudoWB_fixed:
  2616. case ARM::VST1d64TPseudoWB_register:
  2617. case ARM::VST3d8Pseudo_UPD:
  2618. case ARM::VST3d16Pseudo_UPD:
  2619. case ARM::VST3d32Pseudo_UPD:
  2620. case ARM::VST3q8Pseudo_UPD:
  2621. case ARM::VST3q16Pseudo_UPD:
  2622. case ARM::VST3q32Pseudo_UPD:
  2623. case ARM::VST3q8oddPseudo:
  2624. case ARM::VST3q16oddPseudo:
  2625. case ARM::VST3q32oddPseudo:
  2626. case ARM::VST3q8oddPseudo_UPD:
  2627. case ARM::VST3q16oddPseudo_UPD:
  2628. case ARM::VST3q32oddPseudo_UPD:
  2629. case ARM::VST4d8Pseudo:
  2630. case ARM::VST4d16Pseudo:
  2631. case ARM::VST4d32Pseudo:
  2632. case ARM::VST1d8QPseudo:
  2633. case ARM::VST1d8QPseudoWB_fixed:
  2634. case ARM::VST1d8QPseudoWB_register:
  2635. case ARM::VST1d16QPseudo:
  2636. case ARM::VST1d16QPseudoWB_fixed:
  2637. case ARM::VST1d16QPseudoWB_register:
  2638. case ARM::VST1d32QPseudo:
  2639. case ARM::VST1d32QPseudoWB_fixed:
  2640. case ARM::VST1d32QPseudoWB_register:
  2641. case ARM::VST1d64QPseudo:
  2642. case ARM::VST1d64QPseudoWB_fixed:
  2643. case ARM::VST1d64QPseudoWB_register:
  2644. case ARM::VST4d8Pseudo_UPD:
  2645. case ARM::VST4d16Pseudo_UPD:
  2646. case ARM::VST4d32Pseudo_UPD:
  2647. case ARM::VST1q8HighQPseudo:
  2648. case ARM::VST1q8LowQPseudo_UPD:
  2649. case ARM::VST1q8HighTPseudo:
  2650. case ARM::VST1q8LowTPseudo_UPD:
  2651. case ARM::VST1q16HighQPseudo:
  2652. case ARM::VST1q16LowQPseudo_UPD:
  2653. case ARM::VST1q16HighTPseudo:
  2654. case ARM::VST1q16LowTPseudo_UPD:
  2655. case ARM::VST1q32HighQPseudo:
  2656. case ARM::VST1q32LowQPseudo_UPD:
  2657. case ARM::VST1q32HighTPseudo:
  2658. case ARM::VST1q32LowTPseudo_UPD:
  2659. case ARM::VST1q64HighQPseudo:
  2660. case ARM::VST1q64LowQPseudo_UPD:
  2661. case ARM::VST1q64HighTPseudo:
  2662. case ARM::VST1q64LowTPseudo_UPD:
  2663. case ARM::VST1q8HighTPseudo_UPD:
  2664. case ARM::VST1q16HighTPseudo_UPD:
  2665. case ARM::VST1q32HighTPseudo_UPD:
  2666. case ARM::VST1q64HighTPseudo_UPD:
  2667. case ARM::VST1q8HighQPseudo_UPD:
  2668. case ARM::VST1q16HighQPseudo_UPD:
  2669. case ARM::VST1q32HighQPseudo_UPD:
  2670. case ARM::VST1q64HighQPseudo_UPD:
  2671. case ARM::VST4q8Pseudo_UPD:
  2672. case ARM::VST4q16Pseudo_UPD:
  2673. case ARM::VST4q32Pseudo_UPD:
  2674. case ARM::VST4q8oddPseudo:
  2675. case ARM::VST4q16oddPseudo:
  2676. case ARM::VST4q32oddPseudo:
  2677. case ARM::VST4q8oddPseudo_UPD:
  2678. case ARM::VST4q16oddPseudo_UPD:
  2679. case ARM::VST4q32oddPseudo_UPD:
  2680. ExpandVST(MBBI);
  2681. return true;
  2682. case ARM::VLD1LNq8Pseudo:
  2683. case ARM::VLD1LNq16Pseudo:
  2684. case ARM::VLD1LNq32Pseudo:
  2685. case ARM::VLD1LNq8Pseudo_UPD:
  2686. case ARM::VLD1LNq16Pseudo_UPD:
  2687. case ARM::VLD1LNq32Pseudo_UPD:
  2688. case ARM::VLD2LNd8Pseudo:
  2689. case ARM::VLD2LNd16Pseudo:
  2690. case ARM::VLD2LNd32Pseudo:
  2691. case ARM::VLD2LNq16Pseudo:
  2692. case ARM::VLD2LNq32Pseudo:
  2693. case ARM::VLD2LNd8Pseudo_UPD:
  2694. case ARM::VLD2LNd16Pseudo_UPD:
  2695. case ARM::VLD2LNd32Pseudo_UPD:
  2696. case ARM::VLD2LNq16Pseudo_UPD:
  2697. case ARM::VLD2LNq32Pseudo_UPD:
  2698. case ARM::VLD3LNd8Pseudo:
  2699. case ARM::VLD3LNd16Pseudo:
  2700. case ARM::VLD3LNd32Pseudo:
  2701. case ARM::VLD3LNq16Pseudo:
  2702. case ARM::VLD3LNq32Pseudo:
  2703. case ARM::VLD3LNd8Pseudo_UPD:
  2704. case ARM::VLD3LNd16Pseudo_UPD:
  2705. case ARM::VLD3LNd32Pseudo_UPD:
  2706. case ARM::VLD3LNq16Pseudo_UPD:
  2707. case ARM::VLD3LNq32Pseudo_UPD:
  2708. case ARM::VLD4LNd8Pseudo:
  2709. case ARM::VLD4LNd16Pseudo:
  2710. case ARM::VLD4LNd32Pseudo:
  2711. case ARM::VLD4LNq16Pseudo:
  2712. case ARM::VLD4LNq32Pseudo:
  2713. case ARM::VLD4LNd8Pseudo_UPD:
  2714. case ARM::VLD4LNd16Pseudo_UPD:
  2715. case ARM::VLD4LNd32Pseudo_UPD:
  2716. case ARM::VLD4LNq16Pseudo_UPD:
  2717. case ARM::VLD4LNq32Pseudo_UPD:
  2718. case ARM::VST1LNq8Pseudo:
  2719. case ARM::VST1LNq16Pseudo:
  2720. case ARM::VST1LNq32Pseudo:
  2721. case ARM::VST1LNq8Pseudo_UPD:
  2722. case ARM::VST1LNq16Pseudo_UPD:
  2723. case ARM::VST1LNq32Pseudo_UPD:
  2724. case ARM::VST2LNd8Pseudo:
  2725. case ARM::VST2LNd16Pseudo:
  2726. case ARM::VST2LNd32Pseudo:
  2727. case ARM::VST2LNq16Pseudo:
  2728. case ARM::VST2LNq32Pseudo:
  2729. case ARM::VST2LNd8Pseudo_UPD:
  2730. case ARM::VST2LNd16Pseudo_UPD:
  2731. case ARM::VST2LNd32Pseudo_UPD:
  2732. case ARM::VST2LNq16Pseudo_UPD:
  2733. case ARM::VST2LNq32Pseudo_UPD:
  2734. case ARM::VST3LNd8Pseudo:
  2735. case ARM::VST3LNd16Pseudo:
  2736. case ARM::VST3LNd32Pseudo:
  2737. case ARM::VST3LNq16Pseudo:
  2738. case ARM::VST3LNq32Pseudo:
  2739. case ARM::VST3LNd8Pseudo_UPD:
  2740. case ARM::VST3LNd16Pseudo_UPD:
  2741. case ARM::VST3LNd32Pseudo_UPD:
  2742. case ARM::VST3LNq16Pseudo_UPD:
  2743. case ARM::VST3LNq32Pseudo_UPD:
  2744. case ARM::VST4LNd8Pseudo:
  2745. case ARM::VST4LNd16Pseudo:
  2746. case ARM::VST4LNd32Pseudo:
  2747. case ARM::VST4LNq16Pseudo:
  2748. case ARM::VST4LNq32Pseudo:
  2749. case ARM::VST4LNd8Pseudo_UPD:
  2750. case ARM::VST4LNd16Pseudo_UPD:
  2751. case ARM::VST4LNd32Pseudo_UPD:
  2752. case ARM::VST4LNq16Pseudo_UPD:
  2753. case ARM::VST4LNq32Pseudo_UPD:
  2754. ExpandLaneOp(MBBI);
  2755. return true;
  2756. case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
  2757. case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
  2758. case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
  2759. case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
  2760. case ARM::MQQPRLoad:
  2761. case ARM::MQQPRStore:
  2762. case ARM::MQQQQPRLoad:
  2763. case ARM::MQQQQPRStore:
  2764. ExpandMQQPRLoadStore(MBBI);
  2765. return true;
  2766. case ARM::tCMP_SWAP_8:
  2767. assert(STI->isThumb());
  2768. return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB,
  2769. NextMBBI);
  2770. case ARM::tCMP_SWAP_16:
  2771. assert(STI->isThumb());
  2772. return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH,
  2773. NextMBBI);
  2774. case ARM::tCMP_SWAP_32:
  2775. assert(STI->isThumb());
  2776. return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, NextMBBI);
  2777. case ARM::CMP_SWAP_8:
  2778. assert(!STI->isThumb());
  2779. return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB,
  2780. NextMBBI);
  2781. case ARM::CMP_SWAP_16:
  2782. assert(!STI->isThumb());
  2783. return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH,
  2784. NextMBBI);
  2785. case ARM::CMP_SWAP_32:
  2786. assert(!STI->isThumb());
  2787. return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI);
  2788. case ARM::CMP_SWAP_64:
  2789. return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI);
  2790. case ARM::tBL_PUSHLR:
  2791. case ARM::BL_PUSHLR: {
  2792. const bool Thumb = Opcode == ARM::tBL_PUSHLR;
  2793. Register Reg = MI.getOperand(0).getReg();
  2794. assert(Reg == ARM::LR && "expect LR register!");
  2795. MachineInstrBuilder MIB;
  2796. if (Thumb) {
  2797. // push {lr}
  2798. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH))
  2799. .add(predOps(ARMCC::AL))
  2800. .addReg(Reg);
  2801. // bl __gnu_mcount_nc
  2802. MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL));
  2803. } else {
  2804. // stmdb sp!, {lr}
  2805. BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD))
  2806. .addReg(ARM::SP, RegState::Define)
  2807. .addReg(ARM::SP)
  2808. .add(predOps(ARMCC::AL))
  2809. .addReg(Reg);
  2810. // bl __gnu_mcount_nc
  2811. MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL));
  2812. }
  2813. MIB.cloneMemRefs(MI);
  2814. for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
  2815. MIB.add(MO);
  2816. MI.eraseFromParent();
  2817. return true;
  2818. }
  2819. case ARM::t2CALL_BTI: {
  2820. MachineFunction &MF = *MI.getMF();
  2821. MachineInstrBuilder MIB =
  2822. BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::tBL));
  2823. MIB.cloneMemRefs(MI);
  2824. for (unsigned i = 0; i < MI.getNumOperands(); ++i)
  2825. MIB.add(MI.getOperand(i));
  2826. if (MI.isCandidateForCallSiteEntry())
  2827. MF.moveCallSiteInfo(&MI, MIB.getInstr());
  2828. MIBundleBuilder Bundler(MBB, MI);
  2829. Bundler.append(MIB);
  2830. Bundler.append(BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::t2BTI)));
  2831. finalizeBundle(MBB, Bundler.begin(), Bundler.end());
  2832. MI.eraseFromParent();
  2833. return true;
  2834. }
  2835. case ARM::LOADDUAL:
  2836. case ARM::STOREDUAL: {
  2837. Register PairReg = MI.getOperand(0).getReg();
  2838. MachineInstrBuilder MIB =
  2839. BuildMI(MBB, MBBI, MI.getDebugLoc(),
  2840. TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
  2841. .addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
  2842. Opcode == ARM::LOADDUAL ? RegState::Define : 0)
  2843. .addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
  2844. Opcode == ARM::LOADDUAL ? RegState::Define : 0);
  2845. for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
  2846. MIB.add(MO);
  2847. MIB.add(predOps(ARMCC::AL));
  2848. MIB.cloneMemRefs(MI);
  2849. MI.eraseFromParent();
  2850. return true;
  2851. }
  2852. }
  2853. }
  2854. bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
  2855. bool Modified = false;
  2856. MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
  2857. while (MBBI != E) {
  2858. MachineBasicBlock::iterator NMBBI = std::next(MBBI);
  2859. Modified |= ExpandMI(MBB, MBBI, NMBBI);
  2860. MBBI = NMBBI;
  2861. }
  2862. return Modified;
  2863. }
  2864. bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
  2865. STI = &MF.getSubtarget<ARMSubtarget>();
  2866. TII = STI->getInstrInfo();
  2867. TRI = STI->getRegisterInfo();
  2868. AFI = MF.getInfo<ARMFunctionInfo>();
  2869. LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n"
  2870. << "********** Function: " << MF.getName() << '\n');
  2871. bool Modified = false;
  2872. for (MachineBasicBlock &MBB : MF)
  2873. Modified |= ExpandMBB(MBB);
  2874. if (VerifyARMPseudo)
  2875. MF.verify(this, "After expanding ARM pseudo instructions.");
  2876. LLVM_DEBUG(dbgs() << "***************************************************\n");
  2877. return Modified;
  2878. }
  2879. /// createARMExpandPseudoPass - returns an instance of the pseudo instruction
  2880. /// expansion pass.
  2881. FunctionPass *llvm::createARMExpandPseudoPass() {
  2882. return new ARMExpandPseudo();
  2883. }