RISCVInsertVSETVLI.cpp 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442
  1. //===- RISCVInsertVSETVLI.cpp - Insert VSETVLI instructions ---------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements a function pass that inserts VSETVLI instructions where
  10. // needed and expands the vl outputs of VLEFF/VLSEGFF to PseudoReadVL
  11. // instructions.
  12. //
  13. // This pass consists of 3 phases:
  14. //
  15. // Phase 1 collects how each basic block affects VL/VTYPE.
  16. //
  17. // Phase 2 uses the information from phase 1 to do a data flow analysis to
  18. // propagate the VL/VTYPE changes through the function. This gives us the
  19. // VL/VTYPE at the start of each basic block.
  20. //
  21. // Phase 3 inserts VSETVLI instructions in each basic block. Information from
  22. // phase 2 is used to prevent inserting a VSETVLI before the first vector
  23. // instruction in the block if possible.
  24. //
  25. //===----------------------------------------------------------------------===//
  26. #include "RISCV.h"
  27. #include "RISCVSubtarget.h"
  28. #include "llvm/CodeGen/LiveIntervals.h"
  29. #include "llvm/CodeGen/MachineFunctionPass.h"
  30. #include <queue>
  31. using namespace llvm;
  32. #define DEBUG_TYPE "riscv-insert-vsetvli"
  33. #define RISCV_INSERT_VSETVLI_NAME "RISCV Insert VSETVLI pass"
  34. static cl::opt<bool> DisableInsertVSETVLPHIOpt(
  35. "riscv-disable-insert-vsetvl-phi-opt", cl::init(false), cl::Hidden,
  36. cl::desc("Disable looking through phis when inserting vsetvlis."));
  37. static cl::opt<bool> UseStrictAsserts(
  38. "riscv-insert-vsetvl-strict-asserts", cl::init(true), cl::Hidden,
  39. cl::desc("Enable strict assertion checking for the dataflow algorithm"));
  40. namespace {
  41. static unsigned getVLOpNum(const MachineInstr &MI) {
  42. return RISCVII::getVLOpNum(MI.getDesc());
  43. }
  44. static unsigned getSEWOpNum(const MachineInstr &MI) {
  45. return RISCVII::getSEWOpNum(MI.getDesc());
  46. }
  47. static bool isVectorConfigInstr(const MachineInstr &MI) {
  48. return MI.getOpcode() == RISCV::PseudoVSETVLI ||
  49. MI.getOpcode() == RISCV::PseudoVSETVLIX0 ||
  50. MI.getOpcode() == RISCV::PseudoVSETIVLI;
  51. }
  52. /// Return true if this is 'vsetvli x0, x0, vtype' which preserves
  53. /// VL and only sets VTYPE.
  54. static bool isVLPreservingConfig(const MachineInstr &MI) {
  55. if (MI.getOpcode() != RISCV::PseudoVSETVLIX0)
  56. return false;
  57. assert(RISCV::X0 == MI.getOperand(1).getReg());
  58. return RISCV::X0 == MI.getOperand(0).getReg();
  59. }
  60. static uint16_t getRVVMCOpcode(uint16_t RVVPseudoOpcode) {
  61. const RISCVVPseudosTable::PseudoInfo *RVV =
  62. RISCVVPseudosTable::getPseudoInfo(RVVPseudoOpcode);
  63. if (!RVV)
  64. return 0;
  65. return RVV->BaseInstr;
  66. }
  67. static bool isScalarMoveInstr(const MachineInstr &MI) {
  68. switch (getRVVMCOpcode(MI.getOpcode())) {
  69. default:
  70. return false;
  71. case RISCV::VMV_S_X:
  72. case RISCV::VFMV_S_F:
  73. return true;
  74. }
  75. }
  76. /// Get the EEW for a load or store instruction. Return std::nullopt if MI is
  77. /// not a load or store which ignores SEW.
  78. static std::optional<unsigned> getEEWForLoadStore(const MachineInstr &MI) {
  79. switch (getRVVMCOpcode(MI.getOpcode())) {
  80. default:
  81. return std::nullopt;
  82. case RISCV::VLE8_V:
  83. case RISCV::VLSE8_V:
  84. case RISCV::VSE8_V:
  85. case RISCV::VSSE8_V:
  86. return 8;
  87. case RISCV::VLE16_V:
  88. case RISCV::VLSE16_V:
  89. case RISCV::VSE16_V:
  90. case RISCV::VSSE16_V:
  91. return 16;
  92. case RISCV::VLE32_V:
  93. case RISCV::VLSE32_V:
  94. case RISCV::VSE32_V:
  95. case RISCV::VSSE32_V:
  96. return 32;
  97. case RISCV::VLE64_V:
  98. case RISCV::VLSE64_V:
  99. case RISCV::VSE64_V:
  100. case RISCV::VSSE64_V:
  101. return 64;
  102. }
  103. }
  104. /// Return true if this is an operation on mask registers. Note that
  105. /// this includes both arithmetic/logical ops and load/store (vlm/vsm).
  106. static bool isMaskRegOp(const MachineInstr &MI) {
  107. if (!RISCVII::hasSEWOp(MI.getDesc().TSFlags))
  108. return false;
  109. const unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
  110. // A Log2SEW of 0 is an operation on mask registers only.
  111. return Log2SEW == 0;
  112. }
  113. /// Which subfields of VL or VTYPE have values we need to preserve?
  114. struct DemandedFields {
  115. // Some unknown property of VL is used. If demanded, must preserve entire
  116. // value.
  117. bool VLAny = false;
  118. // Only zero vs non-zero is used. If demanded, can change non-zero values.
  119. bool VLZeroness = false;
  120. bool SEW = false;
  121. bool LMUL = false;
  122. bool SEWLMULRatio = false;
  123. bool TailPolicy = false;
  124. bool MaskPolicy = false;
  125. // Return true if any part of VTYPE was used
  126. bool usedVTYPE() const {
  127. return SEW || LMUL || SEWLMULRatio || TailPolicy || MaskPolicy;
  128. }
  129. // Return true if any property of VL was used
  130. bool usedVL() {
  131. return VLAny || VLZeroness;
  132. }
  133. // Mark all VTYPE subfields and properties as demanded
  134. void demandVTYPE() {
  135. SEW = true;
  136. LMUL = true;
  137. SEWLMULRatio = true;
  138. TailPolicy = true;
  139. MaskPolicy = true;
  140. }
  141. // Mark all VL properties as demanded
  142. void demandVL() {
  143. VLAny = true;
  144. VLZeroness = true;
  145. }
  146. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  147. /// Support for debugging, callable in GDB: V->dump()
  148. LLVM_DUMP_METHOD void dump() const {
  149. print(dbgs());
  150. dbgs() << "\n";
  151. }
  152. /// Implement operator<<.
  153. void print(raw_ostream &OS) const {
  154. OS << "{";
  155. OS << "VLAny=" << VLAny << ", ";
  156. OS << "VLZeroness=" << VLZeroness << ", ";
  157. OS << "SEW=" << SEW << ", ";
  158. OS << "LMUL=" << LMUL << ", ";
  159. OS << "SEWLMULRatio=" << SEWLMULRatio << ", ";
  160. OS << "TailPolicy=" << TailPolicy << ", ";
  161. OS << "MaskPolicy=" << MaskPolicy;
  162. OS << "}";
  163. }
  164. #endif
  165. };
  166. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  167. LLVM_ATTRIBUTE_USED
  168. inline raw_ostream &operator<<(raw_ostream &OS, const DemandedFields &DF) {
  169. DF.print(OS);
  170. return OS;
  171. }
  172. #endif
  173. /// Return true if the two values of the VTYPE register provided are
  174. /// indistinguishable from the perspective of an instruction (or set of
  175. /// instructions) which use only the Used subfields and properties.
  176. static bool areCompatibleVTYPEs(uint64_t VType1,
  177. uint64_t VType2,
  178. const DemandedFields &Used) {
  179. if (Used.SEW &&
  180. RISCVVType::getSEW(VType1) != RISCVVType::getSEW(VType2))
  181. return false;
  182. if (Used.LMUL &&
  183. RISCVVType::getVLMUL(VType1) != RISCVVType::getVLMUL(VType2))
  184. return false;
  185. if (Used.SEWLMULRatio) {
  186. auto Ratio1 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType1),
  187. RISCVVType::getVLMUL(VType1));
  188. auto Ratio2 = RISCVVType::getSEWLMULRatio(RISCVVType::getSEW(VType2),
  189. RISCVVType::getVLMUL(VType2));
  190. if (Ratio1 != Ratio2)
  191. return false;
  192. }
  193. if (Used.TailPolicy &&
  194. RISCVVType::isTailAgnostic(VType1) != RISCVVType::isTailAgnostic(VType2))
  195. return false;
  196. if (Used.MaskPolicy &&
  197. RISCVVType::isMaskAgnostic(VType1) != RISCVVType::isMaskAgnostic(VType2))
  198. return false;
  199. return true;
  200. }
  201. /// Return the fields and properties demanded by the provided instruction.
  202. static DemandedFields getDemanded(const MachineInstr &MI) {
  203. // Warning: This function has to work on both the lowered (i.e. post
  204. // emitVSETVLIs) and pre-lowering forms. The main implication of this is
  205. // that it can't use the value of a SEW, VL, or Policy operand as they might
  206. // be stale after lowering.
  207. // Most instructions don't use any of these subfeilds.
  208. DemandedFields Res;
  209. // Start conservative if registers are used
  210. if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VL))
  211. Res.demandVL();;
  212. if (MI.isCall() || MI.isInlineAsm() || MI.readsRegister(RISCV::VTYPE))
  213. Res.demandVTYPE();
  214. // Start conservative on the unlowered form too
  215. uint64_t TSFlags = MI.getDesc().TSFlags;
  216. if (RISCVII::hasSEWOp(TSFlags)) {
  217. Res.demandVTYPE();
  218. if (RISCVII::hasVLOp(TSFlags))
  219. Res.demandVL();
  220. // Behavior is independent of mask policy.
  221. if (!RISCVII::usesMaskPolicy(TSFlags))
  222. Res.MaskPolicy = false;
  223. }
  224. // Loads and stores with implicit EEW do not demand SEW or LMUL directly.
  225. // They instead demand the ratio of the two which is used in computing
  226. // EMUL, but which allows us the flexibility to change SEW and LMUL
  227. // provided we don't change the ratio.
  228. // Note: We assume that the instructions initial SEW is the EEW encoded
  229. // in the opcode. This is asserted when constructing the VSETVLIInfo.
  230. if (getEEWForLoadStore(MI)) {
  231. Res.SEW = false;
  232. Res.LMUL = false;
  233. }
  234. // Store instructions don't use the policy fields.
  235. if (RISCVII::hasSEWOp(TSFlags) && MI.getNumExplicitDefs() == 0) {
  236. Res.TailPolicy = false;
  237. Res.MaskPolicy = false;
  238. }
  239. // If this is a mask reg operation, it only cares about VLMAX.
  240. // TODO: Possible extensions to this logic
  241. // * Probably ok if available VLMax is larger than demanded
  242. // * The policy bits can probably be ignored..
  243. if (isMaskRegOp(MI)) {
  244. Res.SEW = false;
  245. Res.LMUL = false;
  246. }
  247. // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and VL > 0.
  248. if (isScalarMoveInstr(MI)) {
  249. Res.LMUL = false;
  250. Res.SEWLMULRatio = false;
  251. Res.VLAny = false;
  252. }
  253. return Res;
  254. }
  255. /// Defines the abstract state with which the forward dataflow models the
  256. /// values of the VL and VTYPE registers after insertion.
  257. class VSETVLIInfo {
  258. union {
  259. Register AVLReg;
  260. unsigned AVLImm;
  261. };
  262. enum : uint8_t {
  263. Uninitialized,
  264. AVLIsReg,
  265. AVLIsImm,
  266. Unknown,
  267. } State = Uninitialized;
  268. // Fields from VTYPE.
  269. RISCVII::VLMUL VLMul = RISCVII::LMUL_1;
  270. uint8_t SEW = 0;
  271. uint8_t TailAgnostic : 1;
  272. uint8_t MaskAgnostic : 1;
  273. uint8_t SEWLMULRatioOnly : 1;
  274. public:
  275. VSETVLIInfo()
  276. : AVLImm(0), TailAgnostic(false), MaskAgnostic(false),
  277. SEWLMULRatioOnly(false) {}
  278. static VSETVLIInfo getUnknown() {
  279. VSETVLIInfo Info;
  280. Info.setUnknown();
  281. return Info;
  282. }
  283. bool isValid() const { return State != Uninitialized; }
  284. void setUnknown() { State = Unknown; }
  285. bool isUnknown() const { return State == Unknown; }
  286. void setAVLReg(Register Reg) {
  287. AVLReg = Reg;
  288. State = AVLIsReg;
  289. }
  290. void setAVLImm(unsigned Imm) {
  291. AVLImm = Imm;
  292. State = AVLIsImm;
  293. }
  294. bool hasAVLImm() const { return State == AVLIsImm; }
  295. bool hasAVLReg() const { return State == AVLIsReg; }
  296. Register getAVLReg() const {
  297. assert(hasAVLReg());
  298. return AVLReg;
  299. }
  300. unsigned getAVLImm() const {
  301. assert(hasAVLImm());
  302. return AVLImm;
  303. }
  304. unsigned getSEW() const { return SEW; }
  305. RISCVII::VLMUL getVLMUL() const { return VLMul; }
  306. bool hasNonZeroAVL() const {
  307. if (hasAVLImm())
  308. return getAVLImm() > 0;
  309. if (hasAVLReg())
  310. return getAVLReg() == RISCV::X0;
  311. return false;
  312. }
  313. bool hasEquallyZeroAVL(const VSETVLIInfo &Other) const {
  314. if (hasSameAVL(Other))
  315. return true;
  316. return (hasNonZeroAVL() && Other.hasNonZeroAVL());
  317. }
  318. bool hasSameAVL(const VSETVLIInfo &Other) const {
  319. if (hasAVLReg() && Other.hasAVLReg())
  320. return getAVLReg() == Other.getAVLReg();
  321. if (hasAVLImm() && Other.hasAVLImm())
  322. return getAVLImm() == Other.getAVLImm();
  323. return false;
  324. }
  325. void setVTYPE(unsigned VType) {
  326. assert(isValid() && !isUnknown() &&
  327. "Can't set VTYPE for uninitialized or unknown");
  328. VLMul = RISCVVType::getVLMUL(VType);
  329. SEW = RISCVVType::getSEW(VType);
  330. TailAgnostic = RISCVVType::isTailAgnostic(VType);
  331. MaskAgnostic = RISCVVType::isMaskAgnostic(VType);
  332. }
  333. void setVTYPE(RISCVII::VLMUL L, unsigned S, bool TA, bool MA) {
  334. assert(isValid() && !isUnknown() &&
  335. "Can't set VTYPE for uninitialized or unknown");
  336. VLMul = L;
  337. SEW = S;
  338. TailAgnostic = TA;
  339. MaskAgnostic = MA;
  340. }
  341. unsigned encodeVTYPE() const {
  342. assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
  343. "Can't encode VTYPE for uninitialized or unknown");
  344. return RISCVVType::encodeVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
  345. }
  346. bool hasSEWLMULRatioOnly() const { return SEWLMULRatioOnly; }
  347. bool hasSameVTYPE(const VSETVLIInfo &Other) const {
  348. assert(isValid() && Other.isValid() &&
  349. "Can't compare invalid VSETVLIInfos");
  350. assert(!isUnknown() && !Other.isUnknown() &&
  351. "Can't compare VTYPE in unknown state");
  352. assert(!SEWLMULRatioOnly && !Other.SEWLMULRatioOnly &&
  353. "Can't compare when only LMUL/SEW ratio is valid.");
  354. return std::tie(VLMul, SEW, TailAgnostic, MaskAgnostic) ==
  355. std::tie(Other.VLMul, Other.SEW, Other.TailAgnostic,
  356. Other.MaskAgnostic);
  357. }
  358. unsigned getSEWLMULRatio() const {
  359. assert(isValid() && !isUnknown() &&
  360. "Can't use VTYPE for uninitialized or unknown");
  361. return RISCVVType::getSEWLMULRatio(SEW, VLMul);
  362. }
  363. // Check if the VTYPE for these two VSETVLIInfos produce the same VLMAX.
  364. // Note that having the same VLMAX ensures that both share the same
  365. // function from AVL to VL; that is, they must produce the same VL value
  366. // for any given AVL value.
  367. bool hasSameVLMAX(const VSETVLIInfo &Other) const {
  368. assert(isValid() && Other.isValid() &&
  369. "Can't compare invalid VSETVLIInfos");
  370. assert(!isUnknown() && !Other.isUnknown() &&
  371. "Can't compare VTYPE in unknown state");
  372. return getSEWLMULRatio() == Other.getSEWLMULRatio();
  373. }
  374. bool hasCompatibleVTYPE(const DemandedFields &Used,
  375. const VSETVLIInfo &Require) const {
  376. return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
  377. }
  378. // Determine whether the vector instructions requirements represented by
  379. // Require are compatible with the previous vsetvli instruction represented
  380. // by this. MI is the instruction whose requirements we're considering.
  381. bool isCompatible(const DemandedFields &Used, const VSETVLIInfo &Require) const {
  382. assert(isValid() && Require.isValid() &&
  383. "Can't compare invalid VSETVLIInfos");
  384. assert(!Require.SEWLMULRatioOnly &&
  385. "Expected a valid VTYPE for instruction!");
  386. // Nothing is compatible with Unknown.
  387. if (isUnknown() || Require.isUnknown())
  388. return false;
  389. // If only our VLMAX ratio is valid, then this isn't compatible.
  390. if (SEWLMULRatioOnly)
  391. return false;
  392. // If the instruction doesn't need an AVLReg and the SEW matches, consider
  393. // it compatible.
  394. if (Require.hasAVLReg() && Require.AVLReg == RISCV::NoRegister)
  395. if (SEW == Require.SEW)
  396. return true;
  397. if (Used.VLAny && !hasSameAVL(Require))
  398. return false;
  399. if (Used.VLZeroness && !hasEquallyZeroAVL(Require))
  400. return false;
  401. return areCompatibleVTYPEs(encodeVTYPE(), Require.encodeVTYPE(), Used);
  402. }
  403. bool operator==(const VSETVLIInfo &Other) const {
  404. // Uninitialized is only equal to another Uninitialized.
  405. if (!isValid())
  406. return !Other.isValid();
  407. if (!Other.isValid())
  408. return !isValid();
  409. // Unknown is only equal to another Unknown.
  410. if (isUnknown())
  411. return Other.isUnknown();
  412. if (Other.isUnknown())
  413. return isUnknown();
  414. if (!hasSameAVL(Other))
  415. return false;
  416. // If the SEWLMULRatioOnly bits are different, then they aren't equal.
  417. if (SEWLMULRatioOnly != Other.SEWLMULRatioOnly)
  418. return false;
  419. // If only the VLMAX is valid, check that it is the same.
  420. if (SEWLMULRatioOnly)
  421. return hasSameVLMAX(Other);
  422. // If the full VTYPE is valid, check that it is the same.
  423. return hasSameVTYPE(Other);
  424. }
  425. bool operator!=(const VSETVLIInfo &Other) const {
  426. return !(*this == Other);
  427. }
  428. // Calculate the VSETVLIInfo visible to a block assuming this and Other are
  429. // both predecessors.
  430. VSETVLIInfo intersect(const VSETVLIInfo &Other) const {
  431. // If the new value isn't valid, ignore it.
  432. if (!Other.isValid())
  433. return *this;
  434. // If this value isn't valid, this must be the first predecessor, use it.
  435. if (!isValid())
  436. return Other;
  437. // If either is unknown, the result is unknown.
  438. if (isUnknown() || Other.isUnknown())
  439. return VSETVLIInfo::getUnknown();
  440. // If we have an exact, match return this.
  441. if (*this == Other)
  442. return *this;
  443. // Not an exact match, but maybe the AVL and VLMAX are the same. If so,
  444. // return an SEW/LMUL ratio only value.
  445. if (hasSameAVL(Other) && hasSameVLMAX(Other)) {
  446. VSETVLIInfo MergeInfo = *this;
  447. MergeInfo.SEWLMULRatioOnly = true;
  448. return MergeInfo;
  449. }
  450. // Otherwise the result is unknown.
  451. return VSETVLIInfo::getUnknown();
  452. }
  453. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  454. /// Support for debugging, callable in GDB: V->dump()
  455. LLVM_DUMP_METHOD void dump() const {
  456. print(dbgs());
  457. dbgs() << "\n";
  458. }
  459. /// Implement operator<<.
  460. /// @{
  461. void print(raw_ostream &OS) const {
  462. OS << "{";
  463. if (!isValid())
  464. OS << "Uninitialized";
  465. if (isUnknown())
  466. OS << "unknown";
  467. if (hasAVLReg())
  468. OS << "AVLReg=" << (unsigned)AVLReg;
  469. if (hasAVLImm())
  470. OS << "AVLImm=" << (unsigned)AVLImm;
  471. OS << ", "
  472. << "VLMul=" << (unsigned)VLMul << ", "
  473. << "SEW=" << (unsigned)SEW << ", "
  474. << "TailAgnostic=" << (bool)TailAgnostic << ", "
  475. << "MaskAgnostic=" << (bool)MaskAgnostic << ", "
  476. << "SEWLMULRatioOnly=" << (bool)SEWLMULRatioOnly << "}";
  477. }
  478. #endif
  479. };
  480. #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
  481. LLVM_ATTRIBUTE_USED
  482. inline raw_ostream &operator<<(raw_ostream &OS, const VSETVLIInfo &V) {
  483. V.print(OS);
  484. return OS;
  485. }
  486. #endif
  487. struct BlockData {
  488. // The VSETVLIInfo that represents the net changes to the VL/VTYPE registers
  489. // made by this block. Calculated in Phase 1.
  490. VSETVLIInfo Change;
  491. // The VSETVLIInfo that represents the VL/VTYPE settings on exit from this
  492. // block. Calculated in Phase 2.
  493. VSETVLIInfo Exit;
  494. // The VSETVLIInfo that represents the VL/VTYPE settings from all predecessor
  495. // blocks. Calculated in Phase 2, and used by Phase 3.
  496. VSETVLIInfo Pred;
  497. // Keeps track of whether the block is already in the queue.
  498. bool InQueue = false;
  499. BlockData() = default;
  500. };
  501. class RISCVInsertVSETVLI : public MachineFunctionPass {
  502. const TargetInstrInfo *TII;
  503. MachineRegisterInfo *MRI;
  504. std::vector<BlockData> BlockInfo;
  505. std::queue<const MachineBasicBlock *> WorkList;
  506. public:
  507. static char ID;
  508. RISCVInsertVSETVLI() : MachineFunctionPass(ID) {
  509. initializeRISCVInsertVSETVLIPass(*PassRegistry::getPassRegistry());
  510. }
  511. bool runOnMachineFunction(MachineFunction &MF) override;
  512. void getAnalysisUsage(AnalysisUsage &AU) const override {
  513. AU.setPreservesCFG();
  514. MachineFunctionPass::getAnalysisUsage(AU);
  515. }
  516. StringRef getPassName() const override { return RISCV_INSERT_VSETVLI_NAME; }
  517. private:
  518. bool needVSETVLI(const MachineInstr &MI, const VSETVLIInfo &Require,
  519. const VSETVLIInfo &CurInfo) const;
  520. bool needVSETVLIPHI(const VSETVLIInfo &Require,
  521. const MachineBasicBlock &MBB) const;
  522. void insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
  523. const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
  524. void insertVSETVLI(MachineBasicBlock &MBB,
  525. MachineBasicBlock::iterator InsertPt, DebugLoc DL,
  526. const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo);
  527. void transferBefore(VSETVLIInfo &Info, const MachineInstr &MI);
  528. void transferAfter(VSETVLIInfo &Info, const MachineInstr &MI);
  529. bool computeVLVTYPEChanges(const MachineBasicBlock &MBB);
  530. void computeIncomingVLVTYPE(const MachineBasicBlock &MBB);
  531. void emitVSETVLIs(MachineBasicBlock &MBB);
  532. void doLocalPostpass(MachineBasicBlock &MBB);
  533. void doPRE(MachineBasicBlock &MBB);
  534. void insertReadVL(MachineBasicBlock &MBB);
  535. };
  536. } // end anonymous namespace
  537. char RISCVInsertVSETVLI::ID = 0;
  538. INITIALIZE_PASS(RISCVInsertVSETVLI, DEBUG_TYPE, RISCV_INSERT_VSETVLI_NAME,
  539. false, false)
  540. static VSETVLIInfo computeInfoForInstr(const MachineInstr &MI, uint64_t TSFlags,
  541. const MachineRegisterInfo *MRI) {
  542. VSETVLIInfo InstrInfo;
  543. bool TailAgnostic, MaskAgnostic;
  544. unsigned UseOpIdx;
  545. if (MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
  546. // Start with undisturbed.
  547. TailAgnostic = false;
  548. MaskAgnostic = false;
  549. // If there is a policy operand, use it.
  550. if (RISCVII::hasVecPolicyOp(TSFlags)) {
  551. const MachineOperand &Op = MI.getOperand(MI.getNumExplicitOperands() - 1);
  552. uint64_t Policy = Op.getImm();
  553. assert(Policy <= (RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC) &&
  554. "Invalid Policy Value");
  555. TailAgnostic = Policy & RISCVII::TAIL_AGNOSTIC;
  556. MaskAgnostic = Policy & RISCVII::MASK_AGNOSTIC;
  557. }
  558. // If the tied operand is an IMPLICIT_DEF we can use TailAgnostic and
  559. // MaskAgnostic.
  560. const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
  561. MachineInstr *UseMI = MRI->getVRegDef(UseMO.getReg());
  562. if (UseMI && UseMI->isImplicitDef()) {
  563. TailAgnostic = true;
  564. MaskAgnostic = true;
  565. }
  566. // Some pseudo instructions force a tail agnostic policy despite having a
  567. // tied def.
  568. if (RISCVII::doesForceTailAgnostic(TSFlags))
  569. TailAgnostic = true;
  570. if (!RISCVII::usesMaskPolicy(TSFlags))
  571. MaskAgnostic = true;
  572. } else {
  573. // If there is no tied operand,, there shouldn't be a policy operand.
  574. assert(!RISCVII::hasVecPolicyOp(TSFlags) && "Unexpected policy operand");
  575. // No tied operand use agnostic policies.
  576. TailAgnostic = true;
  577. MaskAgnostic = true;
  578. }
  579. RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
  580. unsigned Log2SEW = MI.getOperand(getSEWOpNum(MI)).getImm();
  581. // A Log2SEW of 0 is an operation on mask registers only.
  582. unsigned SEW = Log2SEW ? 1 << Log2SEW : 8;
  583. assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
  584. if (RISCVII::hasVLOp(TSFlags)) {
  585. const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
  586. if (VLOp.isImm()) {
  587. int64_t Imm = VLOp.getImm();
  588. // Conver the VLMax sentintel to X0 register.
  589. if (Imm == RISCV::VLMaxSentinel)
  590. InstrInfo.setAVLReg(RISCV::X0);
  591. else
  592. InstrInfo.setAVLImm(Imm);
  593. } else {
  594. InstrInfo.setAVLReg(VLOp.getReg());
  595. }
  596. } else {
  597. InstrInfo.setAVLReg(RISCV::NoRegister);
  598. }
  599. #ifndef NDEBUG
  600. if (std::optional<unsigned> EEW = getEEWForLoadStore(MI)) {
  601. assert(SEW == EEW && "Initial SEW doesn't match expected EEW");
  602. }
  603. #endif
  604. InstrInfo.setVTYPE(VLMul, SEW, TailAgnostic, MaskAgnostic);
  605. return InstrInfo;
  606. }
  607. void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, MachineInstr &MI,
  608. const VSETVLIInfo &Info,
  609. const VSETVLIInfo &PrevInfo) {
  610. DebugLoc DL = MI.getDebugLoc();
  611. insertVSETVLI(MBB, MachineBasicBlock::iterator(&MI), DL, Info, PrevInfo);
  612. }
  613. void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB,
  614. MachineBasicBlock::iterator InsertPt, DebugLoc DL,
  615. const VSETVLIInfo &Info, const VSETVLIInfo &PrevInfo) {
  616. // Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
  617. // VLMAX.
  618. if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
  619. Info.hasSameAVL(PrevInfo) && Info.hasSameVLMAX(PrevInfo)) {
  620. BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
  621. .addReg(RISCV::X0, RegState::Define | RegState::Dead)
  622. .addReg(RISCV::X0, RegState::Kill)
  623. .addImm(Info.encodeVTYPE())
  624. .addReg(RISCV::VL, RegState::Implicit);
  625. return;
  626. }
  627. if (Info.hasAVLImm()) {
  628. BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
  629. .addReg(RISCV::X0, RegState::Define | RegState::Dead)
  630. .addImm(Info.getAVLImm())
  631. .addImm(Info.encodeVTYPE());
  632. return;
  633. }
  634. Register AVLReg = Info.getAVLReg();
  635. if (AVLReg == RISCV::NoRegister) {
  636. // We can only use x0, x0 if there's no chance of the vtype change causing
  637. // the previous vl to become invalid.
  638. if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
  639. Info.hasSameVLMAX(PrevInfo)) {
  640. BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0))
  641. .addReg(RISCV::X0, RegState::Define | RegState::Dead)
  642. .addReg(RISCV::X0, RegState::Kill)
  643. .addImm(Info.encodeVTYPE())
  644. .addReg(RISCV::VL, RegState::Implicit);
  645. return;
  646. }
  647. // Otherwise use an AVL of 0 to avoid depending on previous vl.
  648. BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI))
  649. .addReg(RISCV::X0, RegState::Define | RegState::Dead)
  650. .addImm(0)
  651. .addImm(Info.encodeVTYPE());
  652. return;
  653. }
  654. if (AVLReg.isVirtual())
  655. MRI->constrainRegClass(AVLReg, &RISCV::GPRNoX0RegClass);
  656. // Use X0 as the DestReg unless AVLReg is X0. We also need to change the
  657. // opcode if the AVLReg is X0 as they have different register classes for
  658. // the AVL operand.
  659. Register DestReg = RISCV::X0;
  660. unsigned Opcode = RISCV::PseudoVSETVLI;
  661. if (AVLReg == RISCV::X0) {
  662. DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass);
  663. Opcode = RISCV::PseudoVSETVLIX0;
  664. }
  665. BuildMI(MBB, InsertPt, DL, TII->get(Opcode))
  666. .addReg(DestReg, RegState::Define | RegState::Dead)
  667. .addReg(AVLReg)
  668. .addImm(Info.encodeVTYPE());
  669. }
  670. // Return a VSETVLIInfo representing the changes made by this VSETVLI or
  671. // VSETIVLI instruction.
  672. static VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) {
  673. VSETVLIInfo NewInfo;
  674. if (MI.getOpcode() == RISCV::PseudoVSETIVLI) {
  675. NewInfo.setAVLImm(MI.getOperand(1).getImm());
  676. } else {
  677. assert(MI.getOpcode() == RISCV::PseudoVSETVLI ||
  678. MI.getOpcode() == RISCV::PseudoVSETVLIX0);
  679. Register AVLReg = MI.getOperand(1).getReg();
  680. assert((AVLReg != RISCV::X0 || MI.getOperand(0).getReg() != RISCV::X0) &&
  681. "Can't handle X0, X0 vsetvli yet");
  682. NewInfo.setAVLReg(AVLReg);
  683. }
  684. NewInfo.setVTYPE(MI.getOperand(2).getImm());
  685. return NewInfo;
  686. }
  687. /// Return true if a VSETVLI is required to transition from CurInfo to Require
  688. /// before MI.
  689. bool RISCVInsertVSETVLI::needVSETVLI(const MachineInstr &MI,
  690. const VSETVLIInfo &Require,
  691. const VSETVLIInfo &CurInfo) const {
  692. assert(Require == computeInfoForInstr(MI, MI.getDesc().TSFlags, MRI));
  693. if (!CurInfo.isValid() || CurInfo.isUnknown() || CurInfo.hasSEWLMULRatioOnly())
  694. return true;
  695. DemandedFields Used = getDemanded(MI);
  696. if (isScalarMoveInstr(MI)) {
  697. // For vmv.s.x and vfmv.s.f, if writing to an implicit_def operand, we don't
  698. // need to preserve any other bits and are thus compatible with any larger,
  699. // etype and can disregard policy bits. Warning: It's tempting to try doing
  700. // this for any tail agnostic operation, but we can't as TA requires
  701. // tail lanes to either be the original value or -1. We are writing
  702. // unknown bits to the lanes here.
  703. auto *VRegDef = MRI->getVRegDef(MI.getOperand(1).getReg());
  704. if (VRegDef && VRegDef->isImplicitDef() &&
  705. CurInfo.getSEW() >= Require.getSEW()) {
  706. Used.SEW = false;
  707. Used.TailPolicy = false;
  708. }
  709. }
  710. if (CurInfo.isCompatible(Used, Require))
  711. return false;
  712. // We didn't find a compatible value. If our AVL is a virtual register,
  713. // it might be defined by a VSET(I)VLI. If it has the same VLMAX we need
  714. // and the last VL/VTYPE we observed is the same, we don't need a
  715. // VSETVLI here.
  716. if (Require.hasAVLReg() && Require.getAVLReg().isVirtual() &&
  717. CurInfo.hasCompatibleVTYPE(Used, Require)) {
  718. if (MachineInstr *DefMI = MRI->getVRegDef(Require.getAVLReg())) {
  719. if (isVectorConfigInstr(*DefMI)) {
  720. VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
  721. if (DefInfo.hasSameAVL(CurInfo) && DefInfo.hasSameVLMAX(CurInfo))
  722. return false;
  723. }
  724. }
  725. }
  726. return true;
  727. }
  728. // Given an incoming state reaching MI, modifies that state so that it is minimally
  729. // compatible with MI. The resulting state is guaranteed to be semantically legal
  730. // for MI, but may not be the state requested by MI.
  731. void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info, const MachineInstr &MI) {
  732. uint64_t TSFlags = MI.getDesc().TSFlags;
  733. if (!RISCVII::hasSEWOp(TSFlags))
  734. return;
  735. const VSETVLIInfo NewInfo = computeInfoForInstr(MI, TSFlags, MRI);
  736. if (Info.isValid() && !needVSETVLI(MI, NewInfo, Info))
  737. return;
  738. const VSETVLIInfo PrevInfo = Info;
  739. Info = NewInfo;
  740. if (!RISCVII::hasVLOp(TSFlags))
  741. return;
  742. // For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
  743. // VL > 0. We can discard the user requested AVL and just use the last
  744. // one if we can prove it equally zero. This removes a vsetvli entirely
  745. // if the types match or allows use of cheaper avl preserving variant
  746. // if VLMAX doesn't change. If VLMAX might change, we couldn't use
  747. // the 'vsetvli x0, x0, vtype" variant, so we avoid the transform to
  748. // prevent extending live range of an avl register operand.
  749. // TODO: We can probably relax this for immediates.
  750. if (isScalarMoveInstr(MI) && PrevInfo.isValid() &&
  751. PrevInfo.hasEquallyZeroAVL(Info) &&
  752. Info.hasSameVLMAX(PrevInfo)) {
  753. if (PrevInfo.hasAVLImm())
  754. Info.setAVLImm(PrevInfo.getAVLImm());
  755. else
  756. Info.setAVLReg(PrevInfo.getAVLReg());
  757. return;
  758. }
  759. // If AVL is defined by a vsetvli with the same VLMAX, we can
  760. // replace the AVL operand with the AVL of the defining vsetvli.
  761. // We avoid general register AVLs to avoid extending live ranges
  762. // without being sure we can kill the original source reg entirely.
  763. if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
  764. return;
  765. MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
  766. if (!DefMI || !isVectorConfigInstr(*DefMI))
  767. return;
  768. VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
  769. if (DefInfo.hasSameVLMAX(Info) &&
  770. (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
  771. if (DefInfo.hasAVLImm())
  772. Info.setAVLImm(DefInfo.getAVLImm());
  773. else
  774. Info.setAVLReg(DefInfo.getAVLReg());
  775. return;
  776. }
  777. }
  778. // Given a state with which we evaluated MI (see transferBefore above for why
  779. // this might be different that the state MI requested), modify the state to
  780. // reflect the changes MI might make.
  781. void RISCVInsertVSETVLI::transferAfter(VSETVLIInfo &Info, const MachineInstr &MI) {
  782. if (isVectorConfigInstr(MI)) {
  783. Info = getInfoForVSETVLI(MI);
  784. return;
  785. }
  786. if (RISCV::isFaultFirstLoad(MI)) {
  787. // Update AVL to vl-output of the fault first load.
  788. Info.setAVLReg(MI.getOperand(1).getReg());
  789. return;
  790. }
  791. // If this is something that updates VL/VTYPE that we don't know about, set
  792. // the state to unknown.
  793. if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
  794. MI.modifiesRegister(RISCV::VTYPE))
  795. Info = VSETVLIInfo::getUnknown();
  796. }
  797. bool RISCVInsertVSETVLI::computeVLVTYPEChanges(const MachineBasicBlock &MBB) {
  798. bool HadVectorOp = false;
  799. BlockData &BBInfo = BlockInfo[MBB.getNumber()];
  800. BBInfo.Change = BBInfo.Pred;
  801. for (const MachineInstr &MI : MBB) {
  802. transferBefore(BBInfo.Change, MI);
  803. if (isVectorConfigInstr(MI) || RISCVII::hasSEWOp(MI.getDesc().TSFlags))
  804. HadVectorOp = true;
  805. transferAfter(BBInfo.Change, MI);
  806. }
  807. return HadVectorOp;
  808. }
  809. void RISCVInsertVSETVLI::computeIncomingVLVTYPE(const MachineBasicBlock &MBB) {
  810. BlockData &BBInfo = BlockInfo[MBB.getNumber()];
  811. BBInfo.InQueue = false;
  812. // Start with the previous entry so that we keep the most conservative state
  813. // we have ever found.
  814. VSETVLIInfo InInfo = BBInfo.Pred;
  815. if (MBB.pred_empty()) {
  816. // There are no predecessors, so use the default starting status.
  817. InInfo.setUnknown();
  818. } else {
  819. for (MachineBasicBlock *P : MBB.predecessors())
  820. InInfo = InInfo.intersect(BlockInfo[P->getNumber()].Exit);
  821. }
  822. // If we don't have any valid predecessor value, wait until we do.
  823. if (!InInfo.isValid())
  824. return;
  825. // If no change, no need to rerun block
  826. if (InInfo == BBInfo.Pred)
  827. return;
  828. BBInfo.Pred = InInfo;
  829. LLVM_DEBUG(dbgs() << "Entry state of " << printMBBReference(MBB)
  830. << " changed to " << BBInfo.Pred << "\n");
  831. // Note: It's tempting to cache the state changes here, but due to the
  832. // compatibility checks performed a blocks output state can change based on
  833. // the input state. To cache, we'd have to add logic for finding
  834. // never-compatible state changes.
  835. computeVLVTYPEChanges(MBB);
  836. VSETVLIInfo TmpStatus = BBInfo.Change;
  837. // If the new exit value matches the old exit value, we don't need to revisit
  838. // any blocks.
  839. if (BBInfo.Exit == TmpStatus)
  840. return;
  841. BBInfo.Exit = TmpStatus;
  842. LLVM_DEBUG(dbgs() << "Exit state of " << printMBBReference(MBB)
  843. << " changed to " << BBInfo.Exit << "\n");
  844. // Add the successors to the work list so we can propagate the changed exit
  845. // status.
  846. for (MachineBasicBlock *S : MBB.successors())
  847. if (!BlockInfo[S->getNumber()].InQueue) {
  848. BlockInfo[S->getNumber()].InQueue = true;
  849. WorkList.push(S);
  850. }
  851. }
  852. // If we weren't able to prove a vsetvli was directly unneeded, it might still
  853. // be unneeded if the AVL is a phi node where all incoming values are VL
  854. // outputs from the last VSETVLI in their respective basic blocks.
  855. bool RISCVInsertVSETVLI::needVSETVLIPHI(const VSETVLIInfo &Require,
  856. const MachineBasicBlock &MBB) const {
  857. if (DisableInsertVSETVLPHIOpt)
  858. return true;
  859. if (!Require.hasAVLReg())
  860. return true;
  861. Register AVLReg = Require.getAVLReg();
  862. if (!AVLReg.isVirtual())
  863. return true;
  864. // We need the AVL to be produce by a PHI node in this basic block.
  865. MachineInstr *PHI = MRI->getVRegDef(AVLReg);
  866. if (!PHI || PHI->getOpcode() != RISCV::PHI || PHI->getParent() != &MBB)
  867. return true;
  868. for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
  869. PHIOp += 2) {
  870. Register InReg = PHI->getOperand(PHIOp).getReg();
  871. MachineBasicBlock *PBB = PHI->getOperand(PHIOp + 1).getMBB();
  872. const BlockData &PBBInfo = BlockInfo[PBB->getNumber()];
  873. // If the exit from the predecessor has the VTYPE we are looking for
  874. // we might be able to avoid a VSETVLI.
  875. if (PBBInfo.Exit.isUnknown() || !PBBInfo.Exit.hasSameVTYPE(Require))
  876. return true;
  877. // We need the PHI input to the be the output of a VSET(I)VLI.
  878. MachineInstr *DefMI = MRI->getVRegDef(InReg);
  879. if (!DefMI || !isVectorConfigInstr(*DefMI))
  880. return true;
  881. // We found a VSET(I)VLI make sure it matches the output of the
  882. // predecessor block.
  883. VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
  884. if (!DefInfo.hasSameAVL(PBBInfo.Exit) ||
  885. !DefInfo.hasSameVTYPE(PBBInfo.Exit))
  886. return true;
  887. }
  888. // If all the incoming values to the PHI checked out, we don't need
  889. // to insert a VSETVLI.
  890. return false;
  891. }
  892. void RISCVInsertVSETVLI::emitVSETVLIs(MachineBasicBlock &MBB) {
  893. VSETVLIInfo CurInfo = BlockInfo[MBB.getNumber()].Pred;
  894. // Track whether the prefix of the block we've scanned is transparent
  895. // (meaning has not yet changed the abstract state).
  896. bool PrefixTransparent = true;
  897. for (MachineInstr &MI : MBB) {
  898. const VSETVLIInfo PrevInfo = CurInfo;
  899. transferBefore(CurInfo, MI);
  900. // If this is an explicit VSETVLI or VSETIVLI, update our state.
  901. if (isVectorConfigInstr(MI)) {
  902. // Conservatively, mark the VL and VTYPE as live.
  903. assert(MI.getOperand(3).getReg() == RISCV::VL &&
  904. MI.getOperand(4).getReg() == RISCV::VTYPE &&
  905. "Unexpected operands where VL and VTYPE should be");
  906. MI.getOperand(3).setIsDead(false);
  907. MI.getOperand(4).setIsDead(false);
  908. PrefixTransparent = false;
  909. }
  910. uint64_t TSFlags = MI.getDesc().TSFlags;
  911. if (RISCVII::hasSEWOp(TSFlags)) {
  912. if (PrevInfo != CurInfo) {
  913. // If this is the first implicit state change, and the state change
  914. // requested can be proven to produce the same register contents, we
  915. // can skip emitting the actual state change and continue as if we
  916. // had since we know the GPR result of the implicit state change
  917. // wouldn't be used and VL/VTYPE registers are correct. Note that
  918. // we *do* need to model the state as if it changed as while the
  919. // register contents are unchanged, the abstract model can change.
  920. if (!PrefixTransparent || needVSETVLIPHI(CurInfo, MBB))
  921. insertVSETVLI(MBB, MI, CurInfo, PrevInfo);
  922. PrefixTransparent = false;
  923. }
  924. if (RISCVII::hasVLOp(TSFlags)) {
  925. MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI));
  926. if (VLOp.isReg()) {
  927. // Erase the AVL operand from the instruction.
  928. VLOp.setReg(RISCV::NoRegister);
  929. VLOp.setIsKill(false);
  930. }
  931. MI.addOperand(MachineOperand::CreateReg(RISCV::VL, /*isDef*/ false,
  932. /*isImp*/ true));
  933. }
  934. MI.addOperand(MachineOperand::CreateReg(RISCV::VTYPE, /*isDef*/ false,
  935. /*isImp*/ true));
  936. }
  937. if (MI.isCall() || MI.isInlineAsm() || MI.modifiesRegister(RISCV::VL) ||
  938. MI.modifiesRegister(RISCV::VTYPE))
  939. PrefixTransparent = false;
  940. transferAfter(CurInfo, MI);
  941. }
  942. // If we reach the end of the block and our current info doesn't match the
  943. // expected info, insert a vsetvli to correct.
  944. if (!UseStrictAsserts) {
  945. const VSETVLIInfo &ExitInfo = BlockInfo[MBB.getNumber()].Exit;
  946. if (CurInfo.isValid() && ExitInfo.isValid() && !ExitInfo.isUnknown() &&
  947. CurInfo != ExitInfo) {
  948. // Note there's an implicit assumption here that terminators never use
  949. // or modify VL or VTYPE. Also, fallthrough will return end().
  950. auto InsertPt = MBB.getFirstInstrTerminator();
  951. insertVSETVLI(MBB, InsertPt, MBB.findDebugLoc(InsertPt), ExitInfo,
  952. CurInfo);
  953. CurInfo = ExitInfo;
  954. }
  955. }
  956. if (UseStrictAsserts && CurInfo.isValid()) {
  957. const auto &Info = BlockInfo[MBB.getNumber()];
  958. if (CurInfo != Info.Exit) {
  959. LLVM_DEBUG(dbgs() << "in block " << printMBBReference(MBB) << "\n");
  960. LLVM_DEBUG(dbgs() << " begin state: " << Info.Pred << "\n");
  961. LLVM_DEBUG(dbgs() << " expected end state: " << Info.Exit << "\n");
  962. LLVM_DEBUG(dbgs() << " actual end state: " << CurInfo << "\n");
  963. }
  964. assert(CurInfo == Info.Exit &&
  965. "InsertVSETVLI dataflow invariant violated");
  966. }
  967. }
  968. /// Return true if the VL value configured must be equal to the requested one.
  969. static bool hasFixedResult(const VSETVLIInfo &Info, const RISCVSubtarget &ST) {
  970. if (!Info.hasAVLImm())
  971. // VLMAX is always the same value.
  972. // TODO: Could extend to other registers by looking at the associated vreg
  973. // def placement.
  974. return RISCV::X0 == Info.getAVLReg();
  975. unsigned AVL = Info.getAVLImm();
  976. unsigned SEW = Info.getSEW();
  977. unsigned AVLInBits = AVL * SEW;
  978. unsigned LMul;
  979. bool Fractional;
  980. std::tie(LMul, Fractional) = RISCVVType::decodeVLMUL(Info.getVLMUL());
  981. if (Fractional)
  982. return ST.getRealMinVLen() / LMul >= AVLInBits;
  983. return ST.getRealMinVLen() * LMul >= AVLInBits;
  984. }
  985. /// Perform simple partial redundancy elimination of the VSETVLI instructions
  986. /// we're about to insert by looking for cases where we can PRE from the
  987. /// beginning of one block to the end of one of its predecessors. Specifically,
  988. /// this is geared to catch the common case of a fixed length vsetvl in a single
  989. /// block loop when it could execute once in the preheader instead.
  990. void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) {
  991. const MachineFunction &MF = *MBB.getParent();
  992. const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
  993. if (!BlockInfo[MBB.getNumber()].Pred.isUnknown())
  994. return;
  995. MachineBasicBlock *UnavailablePred = nullptr;
  996. VSETVLIInfo AvailableInfo;
  997. for (MachineBasicBlock *P : MBB.predecessors()) {
  998. const VSETVLIInfo &PredInfo = BlockInfo[P->getNumber()].Exit;
  999. if (PredInfo.isUnknown()) {
  1000. if (UnavailablePred)
  1001. return;
  1002. UnavailablePred = P;
  1003. } else if (!AvailableInfo.isValid()) {
  1004. AvailableInfo = PredInfo;
  1005. } else if (AvailableInfo != PredInfo) {
  1006. return;
  1007. }
  1008. }
  1009. // Unreachable, single pred, or full redundancy. Note that FRE is handled by
  1010. // phase 3.
  1011. if (!UnavailablePred || !AvailableInfo.isValid())
  1012. return;
  1013. // Critical edge - TODO: consider splitting?
  1014. if (UnavailablePred->succ_size() != 1)
  1015. return;
  1016. // If VL can be less than AVL, then we can't reduce the frequency of exec.
  1017. if (!hasFixedResult(AvailableInfo, ST))
  1018. return;
  1019. // Does it actually let us remove an implicit transition in MBB?
  1020. bool Found = false;
  1021. for (auto &MI : MBB) {
  1022. if (isVectorConfigInstr(MI))
  1023. return;
  1024. const uint64_t TSFlags = MI.getDesc().TSFlags;
  1025. if (RISCVII::hasSEWOp(TSFlags)) {
  1026. if (AvailableInfo != computeInfoForInstr(MI, TSFlags, MRI))
  1027. return;
  1028. Found = true;
  1029. break;
  1030. }
  1031. }
  1032. if (!Found)
  1033. return;
  1034. // Finally, update both data flow state and insert the actual vsetvli.
  1035. // Doing both keeps the code in sync with the dataflow results, which
  1036. // is critical for correctness of phase 3.
  1037. auto OldInfo = BlockInfo[UnavailablePred->getNumber()].Exit;
  1038. LLVM_DEBUG(dbgs() << "PRE VSETVLI from " << MBB.getName() << " to "
  1039. << UnavailablePred->getName() << " with state "
  1040. << AvailableInfo << "\n");
  1041. BlockInfo[UnavailablePred->getNumber()].Exit = AvailableInfo;
  1042. BlockInfo[MBB.getNumber()].Pred = AvailableInfo;
  1043. // Note there's an implicit assumption here that terminators never use
  1044. // or modify VL or VTYPE. Also, fallthrough will return end().
  1045. auto InsertPt = UnavailablePred->getFirstInstrTerminator();
  1046. insertVSETVLI(*UnavailablePred, InsertPt,
  1047. UnavailablePred->findDebugLoc(InsertPt),
  1048. AvailableInfo, OldInfo);
  1049. }
  1050. static void doUnion(DemandedFields &A, DemandedFields B) {
  1051. A.VLAny |= B.VLAny;
  1052. A.VLZeroness |= B.VLZeroness;
  1053. A.SEW |= B.SEW;
  1054. A.LMUL |= B.LMUL;
  1055. A.SEWLMULRatio |= B.SEWLMULRatio;
  1056. A.TailPolicy |= B.TailPolicy;
  1057. A.MaskPolicy |= B.MaskPolicy;
  1058. }
  1059. static bool isNonZeroAVL(const MachineOperand &MO) {
  1060. if (MO.isReg())
  1061. return RISCV::X0 == MO.getReg();
  1062. assert(MO.isImm());
  1063. return 0 != MO.getImm();
  1064. }
  1065. // Return true if we can mutate PrevMI to match MI without changing any the
  1066. // fields which would be observed.
  1067. static bool canMutatePriorConfig(const MachineInstr &PrevMI,
  1068. const MachineInstr &MI,
  1069. const DemandedFields &Used) {
  1070. // If the VL values aren't equal, return false if either a) the former is
  1071. // demanded, or b) we can't rewrite the former to be the later for
  1072. // implementation reasons.
  1073. if (!isVLPreservingConfig(MI)) {
  1074. if (Used.VLAny)
  1075. return false;
  1076. // TODO: Requires more care in the mutation...
  1077. if (isVLPreservingConfig(PrevMI))
  1078. return false;
  1079. // We don't bother to handle the equally zero case here as it's largely
  1080. // uninteresting.
  1081. if (Used.VLZeroness &&
  1082. (!isNonZeroAVL(MI.getOperand(1)) ||
  1083. !isNonZeroAVL(PrevMI.getOperand(1))))
  1084. return false;
  1085. // TODO: Track whether the register is defined between
  1086. // PrevMI and MI.
  1087. if (MI.getOperand(1).isReg() &&
  1088. RISCV::X0 != MI.getOperand(1).getReg())
  1089. return false;
  1090. // TODO: We need to change the result register to allow this rewrite
  1091. // without the result forming a vl preserving vsetvli which is not
  1092. // a correct state merge.
  1093. if (PrevMI.getOperand(0).getReg() == RISCV::X0 &&
  1094. MI.getOperand(1).isReg())
  1095. return false;
  1096. }
  1097. if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
  1098. return false;
  1099. auto PriorVType = PrevMI.getOperand(2).getImm();
  1100. auto VType = MI.getOperand(2).getImm();
  1101. return areCompatibleVTYPEs(PriorVType, VType, Used);
  1102. }
  1103. void RISCVInsertVSETVLI::doLocalPostpass(MachineBasicBlock &MBB) {
  1104. MachineInstr *NextMI = nullptr;
  1105. // We can have arbitrary code in successors, so VL and VTYPE
  1106. // must be considered demanded.
  1107. DemandedFields Used;
  1108. Used.demandVL();
  1109. Used.demandVTYPE();
  1110. SmallVector<MachineInstr*> ToDelete;
  1111. for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend())) {
  1112. if (!isVectorConfigInstr(MI)) {
  1113. doUnion(Used, getDemanded(MI));
  1114. continue;
  1115. }
  1116. Register VRegDef = MI.getOperand(0).getReg();
  1117. if (VRegDef != RISCV::X0 &&
  1118. !(VRegDef.isVirtual() && MRI->use_nodbg_empty(VRegDef)))
  1119. Used.demandVL();
  1120. if (NextMI) {
  1121. if (!Used.usedVL() && !Used.usedVTYPE()) {
  1122. ToDelete.push_back(&MI);
  1123. // Leave NextMI unchanged
  1124. continue;
  1125. } else if (canMutatePriorConfig(MI, *NextMI, Used)) {
  1126. if (!isVLPreservingConfig(*NextMI)) {
  1127. if (NextMI->getOperand(1).isImm())
  1128. MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
  1129. else
  1130. MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(), false);
  1131. MI.setDesc(NextMI->getDesc());
  1132. }
  1133. MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());
  1134. ToDelete.push_back(NextMI);
  1135. // fallthrough
  1136. }
  1137. }
  1138. NextMI = &MI;
  1139. Used = getDemanded(MI);
  1140. }
  1141. for (auto *MI : ToDelete)
  1142. MI->eraseFromParent();
  1143. }
  1144. void RISCVInsertVSETVLI::insertReadVL(MachineBasicBlock &MBB) {
  1145. for (auto I = MBB.begin(), E = MBB.end(); I != E;) {
  1146. MachineInstr &MI = *I++;
  1147. if (RISCV::isFaultFirstLoad(MI)) {
  1148. Register VLOutput = MI.getOperand(1).getReg();
  1149. if (!MRI->use_nodbg_empty(VLOutput))
  1150. BuildMI(MBB, I, MI.getDebugLoc(), TII->get(RISCV::PseudoReadVL),
  1151. VLOutput);
  1152. // We don't use the vl output of the VLEFF/VLSEGFF anymore.
  1153. MI.getOperand(1).setReg(RISCV::X0);
  1154. }
  1155. }
  1156. }
  1157. bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
  1158. // Skip if the vector extension is not enabled.
  1159. const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
  1160. if (!ST.hasVInstructions())
  1161. return false;
  1162. LLVM_DEBUG(dbgs() << "Entering InsertVSETVLI for " << MF.getName() << "\n");
  1163. TII = ST.getInstrInfo();
  1164. MRI = &MF.getRegInfo();
  1165. assert(BlockInfo.empty() && "Expect empty block infos");
  1166. BlockInfo.resize(MF.getNumBlockIDs());
  1167. bool HaveVectorOp = false;
  1168. // Phase 1 - determine how VL/VTYPE are affected by the each block.
  1169. for (const MachineBasicBlock &MBB : MF) {
  1170. HaveVectorOp |= computeVLVTYPEChanges(MBB);
  1171. // Initial exit state is whatever change we found in the block.
  1172. BlockData &BBInfo = BlockInfo[MBB.getNumber()];
  1173. BBInfo.Exit = BBInfo.Change;
  1174. LLVM_DEBUG(dbgs() << "Initial exit state of " << printMBBReference(MBB)
  1175. << " is " << BBInfo.Exit << "\n");
  1176. }
  1177. // If we didn't find any instructions that need VSETVLI, we're done.
  1178. if (!HaveVectorOp) {
  1179. BlockInfo.clear();
  1180. return false;
  1181. }
  1182. // Phase 2 - determine the exit VL/VTYPE from each block. We add all
  1183. // blocks to the list here, but will also add any that need to be revisited
  1184. // during Phase 2 processing.
  1185. for (const MachineBasicBlock &MBB : MF) {
  1186. WorkList.push(&MBB);
  1187. BlockInfo[MBB.getNumber()].InQueue = true;
  1188. }
  1189. while (!WorkList.empty()) {
  1190. const MachineBasicBlock &MBB = *WorkList.front();
  1191. WorkList.pop();
  1192. computeIncomingVLVTYPE(MBB);
  1193. }
  1194. // Perform partial redundancy elimination of vsetvli transitions.
  1195. for (MachineBasicBlock &MBB : MF)
  1196. doPRE(MBB);
  1197. // Phase 3 - add any vsetvli instructions needed in the block. Use the
  1198. // Phase 2 information to avoid adding vsetvlis before the first vector
  1199. // instruction in the block if the VL/VTYPE is satisfied by its
  1200. // predecessors.
  1201. for (MachineBasicBlock &MBB : MF)
  1202. emitVSETVLIs(MBB);
  1203. // Now that all vsetvlis are explicit, go through and do block local
  1204. // DSE and peephole based demanded fields based transforms. Note that
  1205. // this *must* be done outside the main dataflow so long as we allow
  1206. // any cross block analysis within the dataflow. We can't have both
  1207. // demanded fields based mutation and non-local analysis in the
  1208. // dataflow at the same time without introducing inconsistencies.
  1209. for (MachineBasicBlock &MBB : MF)
  1210. doLocalPostpass(MBB);
  1211. // Once we're fully done rewriting all the instructions, do a final pass
  1212. // through to check for VSETVLIs which write to an unused destination.
  1213. // For the non X0, X0 variant, we can replace the destination register
  1214. // with X0 to reduce register pressure. This is really a generic
  1215. // optimization which can be applied to any dead def (TODO: generalize).
  1216. for (MachineBasicBlock &MBB : MF) {
  1217. for (MachineInstr &MI : MBB) {
  1218. if (MI.getOpcode() == RISCV::PseudoVSETVLI ||
  1219. MI.getOpcode() == RISCV::PseudoVSETIVLI) {
  1220. Register VRegDef = MI.getOperand(0).getReg();
  1221. if (VRegDef != RISCV::X0 && MRI->use_nodbg_empty(VRegDef))
  1222. MI.getOperand(0).setReg(RISCV::X0);
  1223. }
  1224. }
  1225. }
  1226. // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
  1227. // of VLEFF/VLSEGFF.
  1228. for (MachineBasicBlock &MBB : MF)
  1229. insertReadVL(MBB);
  1230. BlockInfo.clear();
  1231. return HaveVectorOp;
  1232. }
  1233. /// Returns an instance of the Insert VSETVLI pass.
  1234. FunctionPass *llvm::createRISCVInsertVSETVLIPass() {
  1235. return new RISCVInsertVSETVLI();
  1236. }