X86DomainReassignment.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795
  1. //===--- X86DomainReassignment.cpp - Selectively switch register classes---===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass attempts to find instruction chains (closures) in one domain,
  10. // and convert them to equivalent instructions in a different domain,
  11. // if profitable.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "X86.h"
  15. #include "X86InstrInfo.h"
  16. #include "X86Subtarget.h"
  17. #include "llvm/ADT/BitVector.h"
  18. #include "llvm/ADT/DenseMap.h"
  19. #include "llvm/ADT/DenseMapInfo.h"
  20. #include "llvm/ADT/STLExtras.h"
  21. #include "llvm/ADT/SmallVector.h"
  22. #include "llvm/ADT/Statistic.h"
  23. #include "llvm/CodeGen/MachineFunctionPass.h"
  24. #include "llvm/CodeGen/MachineInstrBuilder.h"
  25. #include "llvm/CodeGen/MachineRegisterInfo.h"
  26. #include "llvm/CodeGen/TargetRegisterInfo.h"
  27. #include "llvm/Support/Debug.h"
  28. #include "llvm/Support/Printable.h"
  29. #include <bitset>
  30. using namespace llvm;
  31. #define DEBUG_TYPE "x86-domain-reassignment"
  32. STATISTIC(NumClosuresConverted, "Number of closures converted by the pass");
  33. static cl::opt<bool> DisableX86DomainReassignment(
  34. "disable-x86-domain-reassignment", cl::Hidden,
  35. cl::desc("X86: Disable Virtual Register Reassignment."), cl::init(false));
  36. namespace {
  37. enum RegDomain { NoDomain = -1, GPRDomain, MaskDomain, OtherDomain, NumDomains };
  38. static bool isGPR(const TargetRegisterClass *RC) {
  39. return X86::GR64RegClass.hasSubClassEq(RC) ||
  40. X86::GR32RegClass.hasSubClassEq(RC) ||
  41. X86::GR16RegClass.hasSubClassEq(RC) ||
  42. X86::GR8RegClass.hasSubClassEq(RC);
  43. }
  44. static bool isMask(const TargetRegisterClass *RC,
  45. const TargetRegisterInfo *TRI) {
  46. return X86::VK16RegClass.hasSubClassEq(RC);
  47. }
  48. static RegDomain getDomain(const TargetRegisterClass *RC,
  49. const TargetRegisterInfo *TRI) {
  50. if (isGPR(RC))
  51. return GPRDomain;
  52. if (isMask(RC, TRI))
  53. return MaskDomain;
  54. return OtherDomain;
  55. }
  56. /// Return a register class equivalent to \p SrcRC, in \p Domain.
  57. static const TargetRegisterClass *getDstRC(const TargetRegisterClass *SrcRC,
  58. RegDomain Domain) {
  59. assert(Domain == MaskDomain && "add domain");
  60. if (X86::GR8RegClass.hasSubClassEq(SrcRC))
  61. return &X86::VK8RegClass;
  62. if (X86::GR16RegClass.hasSubClassEq(SrcRC))
  63. return &X86::VK16RegClass;
  64. if (X86::GR32RegClass.hasSubClassEq(SrcRC))
  65. return &X86::VK32RegClass;
  66. if (X86::GR64RegClass.hasSubClassEq(SrcRC))
  67. return &X86::VK64RegClass;
  68. llvm_unreachable("add register class");
  69. return nullptr;
  70. }
  71. /// Abstract Instruction Converter class.
  72. class InstrConverterBase {
  73. protected:
  74. unsigned SrcOpcode;
  75. public:
  76. InstrConverterBase(unsigned SrcOpcode) : SrcOpcode(SrcOpcode) {}
  77. virtual ~InstrConverterBase() = default;
  78. /// \returns true if \p MI is legal to convert.
  79. virtual bool isLegal(const MachineInstr *MI,
  80. const TargetInstrInfo *TII) const {
  81. assert(MI->getOpcode() == SrcOpcode &&
  82. "Wrong instruction passed to converter");
  83. return true;
  84. }
  85. /// Applies conversion to \p MI.
  86. ///
  87. /// \returns true if \p MI is no longer need, and can be deleted.
  88. virtual bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
  89. MachineRegisterInfo *MRI) const = 0;
  90. /// \returns the cost increment incurred by converting \p MI.
  91. virtual double getExtraCost(const MachineInstr *MI,
  92. MachineRegisterInfo *MRI) const = 0;
  93. };
  94. /// An Instruction Converter which ignores the given instruction.
  95. /// For example, PHI instructions can be safely ignored since only the registers
  96. /// need to change.
  97. class InstrIgnore : public InstrConverterBase {
  98. public:
  99. InstrIgnore(unsigned SrcOpcode) : InstrConverterBase(SrcOpcode) {}
  100. bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
  101. MachineRegisterInfo *MRI) const override {
  102. assert(isLegal(MI, TII) && "Cannot convert instruction");
  103. return false;
  104. }
  105. double getExtraCost(const MachineInstr *MI,
  106. MachineRegisterInfo *MRI) const override {
  107. return 0;
  108. }
  109. };
  110. /// An Instruction Converter which replaces an instruction with another.
  111. class InstrReplacer : public InstrConverterBase {
  112. public:
  113. /// Opcode of the destination instruction.
  114. unsigned DstOpcode;
  115. InstrReplacer(unsigned SrcOpcode, unsigned DstOpcode)
  116. : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {}
  117. bool isLegal(const MachineInstr *MI,
  118. const TargetInstrInfo *TII) const override {
  119. if (!InstrConverterBase::isLegal(MI, TII))
  120. return false;
  121. // It's illegal to replace an instruction that implicitly defines a register
  122. // with an instruction that doesn't, unless that register dead.
  123. for (const auto &MO : MI->implicit_operands())
  124. if (MO.isReg() && MO.isDef() && !MO.isDead() &&
  125. !TII->get(DstOpcode).hasImplicitDefOfPhysReg(MO.getReg()))
  126. return false;
  127. return true;
  128. }
  129. bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
  130. MachineRegisterInfo *MRI) const override {
  131. assert(isLegal(MI, TII) && "Cannot convert instruction");
  132. MachineInstrBuilder Bld =
  133. BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(DstOpcode));
  134. // Transfer explicit operands from original instruction. Implicit operands
  135. // are handled by BuildMI.
  136. for (auto &Op : MI->explicit_operands())
  137. Bld.add(Op);
  138. return true;
  139. }
  140. double getExtraCost(const MachineInstr *MI,
  141. MachineRegisterInfo *MRI) const override {
  142. // Assuming instructions have the same cost.
  143. return 0;
  144. }
  145. };
  146. /// An Instruction Converter which replaces an instruction with another, and
  147. /// adds a COPY from the new instruction's destination to the old one's.
  148. class InstrReplacerDstCOPY : public InstrConverterBase {
  149. public:
  150. unsigned DstOpcode;
  151. InstrReplacerDstCOPY(unsigned SrcOpcode, unsigned DstOpcode)
  152. : InstrConverterBase(SrcOpcode), DstOpcode(DstOpcode) {}
  153. bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
  154. MachineRegisterInfo *MRI) const override {
  155. assert(isLegal(MI, TII) && "Cannot convert instruction");
  156. MachineBasicBlock *MBB = MI->getParent();
  157. const DebugLoc &DL = MI->getDebugLoc();
  158. Register Reg = MRI->createVirtualRegister(
  159. TII->getRegClass(TII->get(DstOpcode), 0, MRI->getTargetRegisterInfo(),
  160. *MBB->getParent()));
  161. MachineInstrBuilder Bld = BuildMI(*MBB, MI, DL, TII->get(DstOpcode), Reg);
  162. for (const MachineOperand &MO : llvm::drop_begin(MI->operands()))
  163. Bld.add(MO);
  164. BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::COPY))
  165. .add(MI->getOperand(0))
  166. .addReg(Reg);
  167. return true;
  168. }
  169. double getExtraCost(const MachineInstr *MI,
  170. MachineRegisterInfo *MRI) const override {
  171. // Assuming instructions have the same cost, and that COPY is in the same
  172. // domain so it will be eliminated.
  173. return 0;
  174. }
  175. };
  176. /// An Instruction Converter for replacing COPY instructions.
  177. class InstrCOPYReplacer : public InstrReplacer {
  178. public:
  179. RegDomain DstDomain;
  180. InstrCOPYReplacer(unsigned SrcOpcode, RegDomain DstDomain, unsigned DstOpcode)
  181. : InstrReplacer(SrcOpcode, DstOpcode), DstDomain(DstDomain) {}
  182. bool isLegal(const MachineInstr *MI,
  183. const TargetInstrInfo *TII) const override {
  184. if (!InstrConverterBase::isLegal(MI, TII))
  185. return false;
  186. // Don't allow copies to/flow GR8/GR16 physical registers.
  187. // FIXME: Is there some better way to support this?
  188. Register DstReg = MI->getOperand(0).getReg();
  189. if (DstReg.isPhysical() && (X86::GR8RegClass.contains(DstReg) ||
  190. X86::GR16RegClass.contains(DstReg)))
  191. return false;
  192. Register SrcReg = MI->getOperand(1).getReg();
  193. if (SrcReg.isPhysical() && (X86::GR8RegClass.contains(SrcReg) ||
  194. X86::GR16RegClass.contains(SrcReg)))
  195. return false;
  196. return true;
  197. }
  198. double getExtraCost(const MachineInstr *MI,
  199. MachineRegisterInfo *MRI) const override {
  200. assert(MI->getOpcode() == TargetOpcode::COPY && "Expected a COPY");
  201. for (const auto &MO : MI->operands()) {
  202. // Physical registers will not be converted. Assume that converting the
  203. // COPY to the destination domain will eventually result in a actual
  204. // instruction.
  205. if (MO.getReg().isPhysical())
  206. return 1;
  207. RegDomain OpDomain = getDomain(MRI->getRegClass(MO.getReg()),
  208. MRI->getTargetRegisterInfo());
  209. // Converting a cross domain COPY to a same domain COPY should eliminate
  210. // an insturction
  211. if (OpDomain == DstDomain)
  212. return -1;
  213. }
  214. return 0;
  215. }
  216. };
  217. /// An Instruction Converter which replaces an instruction with a COPY.
  218. class InstrReplaceWithCopy : public InstrConverterBase {
  219. public:
  220. // Source instruction operand Index, to be used as the COPY source.
  221. unsigned SrcOpIdx;
  222. InstrReplaceWithCopy(unsigned SrcOpcode, unsigned SrcOpIdx)
  223. : InstrConverterBase(SrcOpcode), SrcOpIdx(SrcOpIdx) {}
  224. bool convertInstr(MachineInstr *MI, const TargetInstrInfo *TII,
  225. MachineRegisterInfo *MRI) const override {
  226. assert(isLegal(MI, TII) && "Cannot convert instruction");
  227. BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
  228. TII->get(TargetOpcode::COPY))
  229. .add({MI->getOperand(0), MI->getOperand(SrcOpIdx)});
  230. return true;
  231. }
  232. double getExtraCost(const MachineInstr *MI,
  233. MachineRegisterInfo *MRI) const override {
  234. return 0;
  235. }
  236. };
  237. // Key type to be used by the Instruction Converters map.
  238. // A converter is identified by <destination domain, source opcode>
  239. typedef std::pair<int, unsigned> InstrConverterBaseKeyTy;
  240. typedef DenseMap<InstrConverterBaseKeyTy, std::unique_ptr<InstrConverterBase>>
  241. InstrConverterBaseMap;
  242. /// A closure is a set of virtual register representing all of the edges in
  243. /// the closure, as well as all of the instructions connected by those edges.
  244. ///
  245. /// A closure may encompass virtual registers in the same register bank that
  246. /// have different widths. For example, it may contain 32-bit GPRs as well as
  247. /// 64-bit GPRs.
  248. ///
  249. /// A closure that computes an address (i.e. defines a virtual register that is
  250. /// used in a memory operand) excludes the instructions that contain memory
  251. /// operands using the address. Such an instruction will be included in a
  252. /// different closure that manipulates the loaded or stored value.
  253. class Closure {
  254. private:
  255. /// Virtual registers in the closure.
  256. DenseSet<Register> Edges;
  257. /// Instructions in the closure.
  258. SmallVector<MachineInstr *, 8> Instrs;
  259. /// Domains which this closure can legally be reassigned to.
  260. std::bitset<NumDomains> LegalDstDomains;
  261. /// An ID to uniquely identify this closure, even when it gets
  262. /// moved around
  263. unsigned ID;
  264. public:
  265. Closure(unsigned ID, std::initializer_list<RegDomain> LegalDstDomainList) : ID(ID) {
  266. for (RegDomain D : LegalDstDomainList)
  267. LegalDstDomains.set(D);
  268. }
  269. /// Mark this closure as illegal for reassignment to all domains.
  270. void setAllIllegal() { LegalDstDomains.reset(); }
  271. /// \returns true if this closure has domains which are legal to reassign to.
  272. bool hasLegalDstDomain() const { return LegalDstDomains.any(); }
  273. /// \returns true if is legal to reassign this closure to domain \p RD.
  274. bool isLegal(RegDomain RD) const { return LegalDstDomains[RD]; }
  275. /// Mark this closure as illegal for reassignment to domain \p RD.
  276. void setIllegal(RegDomain RD) { LegalDstDomains[RD] = false; }
  277. bool empty() const { return Edges.empty(); }
  278. bool insertEdge(Register Reg) { return Edges.insert(Reg).second; }
  279. using const_edge_iterator = DenseSet<Register>::const_iterator;
  280. iterator_range<const_edge_iterator> edges() const {
  281. return iterator_range<const_edge_iterator>(Edges.begin(), Edges.end());
  282. }
  283. void addInstruction(MachineInstr *I) {
  284. Instrs.push_back(I);
  285. }
  286. ArrayRef<MachineInstr *> instructions() const {
  287. return Instrs;
  288. }
  289. LLVM_DUMP_METHOD void dump(const MachineRegisterInfo *MRI) const {
  290. dbgs() << "Registers: ";
  291. bool First = true;
  292. for (Register Reg : Edges) {
  293. if (!First)
  294. dbgs() << ", ";
  295. First = false;
  296. dbgs() << printReg(Reg, MRI->getTargetRegisterInfo(), 0, MRI);
  297. }
  298. dbgs() << "\n" << "Instructions:";
  299. for (MachineInstr *MI : Instrs) {
  300. dbgs() << "\n ";
  301. MI->print(dbgs());
  302. }
  303. dbgs() << "\n";
  304. }
  305. unsigned getID() const {
  306. return ID;
  307. }
  308. };
  309. class X86DomainReassignment : public MachineFunctionPass {
  310. const X86Subtarget *STI = nullptr;
  311. MachineRegisterInfo *MRI = nullptr;
  312. const X86InstrInfo *TII = nullptr;
  313. /// All edges that are included in some closure
  314. BitVector EnclosedEdges{8, false};
  315. /// All instructions that are included in some closure.
  316. DenseMap<MachineInstr *, unsigned> EnclosedInstrs;
  317. public:
  318. static char ID;
  319. X86DomainReassignment() : MachineFunctionPass(ID) { }
  320. bool runOnMachineFunction(MachineFunction &MF) override;
  321. void getAnalysisUsage(AnalysisUsage &AU) const override {
  322. AU.setPreservesCFG();
  323. MachineFunctionPass::getAnalysisUsage(AU);
  324. }
  325. StringRef getPassName() const override {
  326. return "X86 Domain Reassignment Pass";
  327. }
  328. private:
  329. /// A map of available Instruction Converters.
  330. InstrConverterBaseMap Converters;
  331. /// Initialize Converters map.
  332. void initConverters();
  333. /// Starting from \Reg, expand the closure as much as possible.
  334. void buildClosure(Closure &, Register Reg);
  335. /// Enqueue \p Reg to be considered for addition to the closure.
  336. void visitRegister(Closure &, Register Reg, RegDomain &Domain,
  337. SmallVectorImpl<unsigned> &Worklist);
  338. /// Reassign the closure to \p Domain.
  339. void reassign(const Closure &C, RegDomain Domain) const;
  340. /// Add \p MI to the closure.
  341. void encloseInstr(Closure &C, MachineInstr *MI);
  342. /// /returns true if it is profitable to reassign the closure to \p Domain.
  343. bool isReassignmentProfitable(const Closure &C, RegDomain Domain) const;
  344. /// Calculate the total cost of reassigning the closure to \p Domain.
  345. double calculateCost(const Closure &C, RegDomain Domain) const;
  346. };
  347. char X86DomainReassignment::ID = 0;
  348. } // End anonymous namespace.
  349. void X86DomainReassignment::visitRegister(Closure &C, Register Reg,
  350. RegDomain &Domain,
  351. SmallVectorImpl<unsigned> &Worklist) {
  352. if (!Reg.isVirtual())
  353. return;
  354. if (EnclosedEdges.test(Register::virtReg2Index(Reg)))
  355. return;
  356. if (!MRI->hasOneDef(Reg))
  357. return;
  358. RegDomain RD = getDomain(MRI->getRegClass(Reg), MRI->getTargetRegisterInfo());
  359. // First edge in closure sets the domain.
  360. if (Domain == NoDomain)
  361. Domain = RD;
  362. if (Domain != RD)
  363. return;
  364. Worklist.push_back(Reg);
  365. }
  366. void X86DomainReassignment::encloseInstr(Closure &C, MachineInstr *MI) {
  367. auto I = EnclosedInstrs.find(MI);
  368. if (I != EnclosedInstrs.end()) {
  369. if (I->second != C.getID())
  370. // Instruction already belongs to another closure, avoid conflicts between
  371. // closure and mark this closure as illegal.
  372. C.setAllIllegal();
  373. return;
  374. }
  375. EnclosedInstrs[MI] = C.getID();
  376. C.addInstruction(MI);
  377. // Mark closure as illegal for reassignment to domains, if there is no
  378. // converter for the instruction or if the converter cannot convert the
  379. // instruction.
  380. for (int i = 0; i != NumDomains; ++i) {
  381. if (C.isLegal((RegDomain)i)) {
  382. auto I = Converters.find({i, MI->getOpcode()});
  383. if (I == Converters.end() || !I->second->isLegal(MI, TII))
  384. C.setIllegal((RegDomain)i);
  385. }
  386. }
  387. }
  388. double X86DomainReassignment::calculateCost(const Closure &C,
  389. RegDomain DstDomain) const {
  390. assert(C.isLegal(DstDomain) && "Cannot calculate cost for illegal closure");
  391. double Cost = 0.0;
  392. for (auto *MI : C.instructions())
  393. Cost += Converters.find({DstDomain, MI->getOpcode()})
  394. ->second->getExtraCost(MI, MRI);
  395. return Cost;
  396. }
  397. bool X86DomainReassignment::isReassignmentProfitable(const Closure &C,
  398. RegDomain Domain) const {
  399. return calculateCost(C, Domain) < 0.0;
  400. }
  401. void X86DomainReassignment::reassign(const Closure &C, RegDomain Domain) const {
  402. assert(C.isLegal(Domain) && "Cannot convert illegal closure");
  403. // Iterate all instructions in the closure, convert each one using the
  404. // appropriate converter.
  405. SmallVector<MachineInstr *, 8> ToErase;
  406. for (auto *MI : C.instructions())
  407. if (Converters.find({Domain, MI->getOpcode()})
  408. ->second->convertInstr(MI, TII, MRI))
  409. ToErase.push_back(MI);
  410. // Iterate all registers in the closure, replace them with registers in the
  411. // destination domain.
  412. for (Register Reg : C.edges()) {
  413. MRI->setRegClass(Reg, getDstRC(MRI->getRegClass(Reg), Domain));
  414. for (auto &MO : MRI->use_operands(Reg)) {
  415. if (MO.isReg())
  416. // Remove all subregister references as they are not valid in the
  417. // destination domain.
  418. MO.setSubReg(0);
  419. }
  420. }
  421. for (auto *MI : ToErase)
  422. MI->eraseFromParent();
  423. }
  424. /// \returns true when \p Reg is used as part of an address calculation in \p
  425. /// MI.
  426. static bool usedAsAddr(const MachineInstr &MI, Register Reg,
  427. const TargetInstrInfo *TII) {
  428. if (!MI.mayLoadOrStore())
  429. return false;
  430. const MCInstrDesc &Desc = TII->get(MI.getOpcode());
  431. int MemOpStart = X86II::getMemoryOperandNo(Desc.TSFlags);
  432. if (MemOpStart == -1)
  433. return false;
  434. MemOpStart += X86II::getOperandBias(Desc);
  435. for (unsigned MemOpIdx = MemOpStart,
  436. MemOpEnd = MemOpStart + X86::AddrNumOperands;
  437. MemOpIdx < MemOpEnd; ++MemOpIdx) {
  438. const MachineOperand &Op = MI.getOperand(MemOpIdx);
  439. if (Op.isReg() && Op.getReg() == Reg)
  440. return true;
  441. }
  442. return false;
  443. }
  444. void X86DomainReassignment::buildClosure(Closure &C, Register Reg) {
  445. SmallVector<unsigned, 4> Worklist;
  446. RegDomain Domain = NoDomain;
  447. visitRegister(C, Reg, Domain, Worklist);
  448. while (!Worklist.empty()) {
  449. unsigned CurReg = Worklist.pop_back_val();
  450. // Register already in this closure.
  451. if (!C.insertEdge(CurReg))
  452. continue;
  453. EnclosedEdges.set(Register::virtReg2Index(Reg));
  454. MachineInstr *DefMI = MRI->getVRegDef(CurReg);
  455. encloseInstr(C, DefMI);
  456. // Add register used by the defining MI to the worklist.
  457. // Do not add registers which are used in address calculation, they will be
  458. // added to a different closure.
  459. int OpEnd = DefMI->getNumOperands();
  460. const MCInstrDesc &Desc = DefMI->getDesc();
  461. int MemOp = X86II::getMemoryOperandNo(Desc.TSFlags);
  462. if (MemOp != -1)
  463. MemOp += X86II::getOperandBias(Desc);
  464. for (int OpIdx = 0; OpIdx < OpEnd; ++OpIdx) {
  465. if (OpIdx == MemOp) {
  466. // skip address calculation.
  467. OpIdx += (X86::AddrNumOperands - 1);
  468. continue;
  469. }
  470. auto &Op = DefMI->getOperand(OpIdx);
  471. if (!Op.isReg() || !Op.isUse())
  472. continue;
  473. visitRegister(C, Op.getReg(), Domain, Worklist);
  474. }
  475. // Expand closure through register uses.
  476. for (auto &UseMI : MRI->use_nodbg_instructions(CurReg)) {
  477. // We would like to avoid converting closures which calculare addresses,
  478. // as this should remain in GPRs.
  479. if (usedAsAddr(UseMI, CurReg, TII)) {
  480. C.setAllIllegal();
  481. continue;
  482. }
  483. encloseInstr(C, &UseMI);
  484. for (auto &DefOp : UseMI.defs()) {
  485. if (!DefOp.isReg())
  486. continue;
  487. Register DefReg = DefOp.getReg();
  488. if (!DefReg.isVirtual()) {
  489. C.setAllIllegal();
  490. continue;
  491. }
  492. visitRegister(C, DefReg, Domain, Worklist);
  493. }
  494. }
  495. }
  496. }
  497. void X86DomainReassignment::initConverters() {
  498. Converters[{MaskDomain, TargetOpcode::PHI}] =
  499. std::make_unique<InstrIgnore>(TargetOpcode::PHI);
  500. Converters[{MaskDomain, TargetOpcode::IMPLICIT_DEF}] =
  501. std::make_unique<InstrIgnore>(TargetOpcode::IMPLICIT_DEF);
  502. Converters[{MaskDomain, TargetOpcode::INSERT_SUBREG}] =
  503. std::make_unique<InstrReplaceWithCopy>(TargetOpcode::INSERT_SUBREG, 2);
  504. Converters[{MaskDomain, TargetOpcode::COPY}] =
  505. std::make_unique<InstrCOPYReplacer>(TargetOpcode::COPY, MaskDomain,
  506. TargetOpcode::COPY);
  507. auto createReplacerDstCOPY = [&](unsigned From, unsigned To) {
  508. Converters[{MaskDomain, From}] =
  509. std::make_unique<InstrReplacerDstCOPY>(From, To);
  510. };
  511. createReplacerDstCOPY(X86::MOVZX32rm16, X86::KMOVWkm);
  512. createReplacerDstCOPY(X86::MOVZX64rm16, X86::KMOVWkm);
  513. createReplacerDstCOPY(X86::MOVZX32rr16, X86::KMOVWkk);
  514. createReplacerDstCOPY(X86::MOVZX64rr16, X86::KMOVWkk);
  515. if (STI->hasDQI()) {
  516. createReplacerDstCOPY(X86::MOVZX16rm8, X86::KMOVBkm);
  517. createReplacerDstCOPY(X86::MOVZX32rm8, X86::KMOVBkm);
  518. createReplacerDstCOPY(X86::MOVZX64rm8, X86::KMOVBkm);
  519. createReplacerDstCOPY(X86::MOVZX16rr8, X86::KMOVBkk);
  520. createReplacerDstCOPY(X86::MOVZX32rr8, X86::KMOVBkk);
  521. createReplacerDstCOPY(X86::MOVZX64rr8, X86::KMOVBkk);
  522. }
  523. auto createReplacer = [&](unsigned From, unsigned To) {
  524. Converters[{MaskDomain, From}] = std::make_unique<InstrReplacer>(From, To);
  525. };
  526. createReplacer(X86::MOV16rm, X86::KMOVWkm);
  527. createReplacer(X86::MOV16mr, X86::KMOVWmk);
  528. createReplacer(X86::MOV16rr, X86::KMOVWkk);
  529. createReplacer(X86::SHR16ri, X86::KSHIFTRWri);
  530. createReplacer(X86::SHL16ri, X86::KSHIFTLWri);
  531. createReplacer(X86::NOT16r, X86::KNOTWrr);
  532. createReplacer(X86::OR16rr, X86::KORWrr);
  533. createReplacer(X86::AND16rr, X86::KANDWrr);
  534. createReplacer(X86::XOR16rr, X86::KXORWrr);
  535. if (STI->hasBWI()) {
  536. createReplacer(X86::MOV32rm, X86::KMOVDkm);
  537. createReplacer(X86::MOV64rm, X86::KMOVQkm);
  538. createReplacer(X86::MOV32mr, X86::KMOVDmk);
  539. createReplacer(X86::MOV64mr, X86::KMOVQmk);
  540. createReplacer(X86::MOV32rr, X86::KMOVDkk);
  541. createReplacer(X86::MOV64rr, X86::KMOVQkk);
  542. createReplacer(X86::SHR32ri, X86::KSHIFTRDri);
  543. createReplacer(X86::SHR64ri, X86::KSHIFTRQri);
  544. createReplacer(X86::SHL32ri, X86::KSHIFTLDri);
  545. createReplacer(X86::SHL64ri, X86::KSHIFTLQri);
  546. createReplacer(X86::ADD32rr, X86::KADDDrr);
  547. createReplacer(X86::ADD64rr, X86::KADDQrr);
  548. createReplacer(X86::NOT32r, X86::KNOTDrr);
  549. createReplacer(X86::NOT64r, X86::KNOTQrr);
  550. createReplacer(X86::OR32rr, X86::KORDrr);
  551. createReplacer(X86::OR64rr, X86::KORQrr);
  552. createReplacer(X86::AND32rr, X86::KANDDrr);
  553. createReplacer(X86::AND64rr, X86::KANDQrr);
  554. createReplacer(X86::ANDN32rr, X86::KANDNDrr);
  555. createReplacer(X86::ANDN64rr, X86::KANDNQrr);
  556. createReplacer(X86::XOR32rr, X86::KXORDrr);
  557. createReplacer(X86::XOR64rr, X86::KXORQrr);
  558. // TODO: KTEST is not a replacement for TEST due to flag differences. Need
  559. // to prove only Z flag is used.
  560. //createReplacer(X86::TEST32rr, X86::KTESTDrr);
  561. //createReplacer(X86::TEST64rr, X86::KTESTQrr);
  562. }
  563. if (STI->hasDQI()) {
  564. createReplacer(X86::ADD8rr, X86::KADDBrr);
  565. createReplacer(X86::ADD16rr, X86::KADDWrr);
  566. createReplacer(X86::AND8rr, X86::KANDBrr);
  567. createReplacer(X86::MOV8rm, X86::KMOVBkm);
  568. createReplacer(X86::MOV8mr, X86::KMOVBmk);
  569. createReplacer(X86::MOV8rr, X86::KMOVBkk);
  570. createReplacer(X86::NOT8r, X86::KNOTBrr);
  571. createReplacer(X86::OR8rr, X86::KORBrr);
  572. createReplacer(X86::SHR8ri, X86::KSHIFTRBri);
  573. createReplacer(X86::SHL8ri, X86::KSHIFTLBri);
  574. // TODO: KTEST is not a replacement for TEST due to flag differences. Need
  575. // to prove only Z flag is used.
  576. //createReplacer(X86::TEST8rr, X86::KTESTBrr);
  577. //createReplacer(X86::TEST16rr, X86::KTESTWrr);
  578. createReplacer(X86::XOR8rr, X86::KXORBrr);
  579. }
  580. }
  581. bool X86DomainReassignment::runOnMachineFunction(MachineFunction &MF) {
  582. if (skipFunction(MF.getFunction()))
  583. return false;
  584. if (DisableX86DomainReassignment)
  585. return false;
  586. LLVM_DEBUG(
  587. dbgs() << "***** Machine Function before Domain Reassignment *****\n");
  588. LLVM_DEBUG(MF.print(dbgs()));
  589. STI = &MF.getSubtarget<X86Subtarget>();
  590. // GPR->K is the only transformation currently supported, bail out early if no
  591. // AVX512.
  592. // TODO: We're also bailing of AVX512BW isn't supported since we use VK32 and
  593. // VK64 for GR32/GR64, but those aren't legal classes on KNL. If the register
  594. // coalescer doesn't clean it up and we generate a spill we will crash.
  595. if (!STI->hasAVX512() || !STI->hasBWI())
  596. return false;
  597. MRI = &MF.getRegInfo();
  598. assert(MRI->isSSA() && "Expected MIR to be in SSA form");
  599. TII = STI->getInstrInfo();
  600. initConverters();
  601. bool Changed = false;
  602. EnclosedEdges.clear();
  603. EnclosedEdges.resize(MRI->getNumVirtRegs());
  604. EnclosedInstrs.clear();
  605. std::vector<Closure> Closures;
  606. // Go over all virtual registers and calculate a closure.
  607. unsigned ClosureID = 0;
  608. for (unsigned Idx = 0; Idx < MRI->getNumVirtRegs(); ++Idx) {
  609. Register Reg = Register::index2VirtReg(Idx);
  610. // GPR only current source domain supported.
  611. if (!isGPR(MRI->getRegClass(Reg)))
  612. continue;
  613. // Register already in closure.
  614. if (EnclosedEdges.test(Idx))
  615. continue;
  616. // Calculate closure starting with Reg.
  617. Closure C(ClosureID++, {MaskDomain});
  618. buildClosure(C, Reg);
  619. // Collect all closures that can potentially be converted.
  620. if (!C.empty() && C.isLegal(MaskDomain))
  621. Closures.push_back(std::move(C));
  622. }
  623. for (Closure &C : Closures) {
  624. LLVM_DEBUG(C.dump(MRI));
  625. if (isReassignmentProfitable(C, MaskDomain)) {
  626. reassign(C, MaskDomain);
  627. ++NumClosuresConverted;
  628. Changed = true;
  629. }
  630. }
  631. LLVM_DEBUG(
  632. dbgs() << "***** Machine Function after Domain Reassignment *****\n");
  633. LLVM_DEBUG(MF.print(dbgs()));
  634. return Changed;
  635. }
  636. INITIALIZE_PASS(X86DomainReassignment, "x86-domain-reassignment",
  637. "X86 Domain Reassignment Pass", false, false)
  638. /// Returns an instance of the Domain Reassignment pass.
  639. FunctionPass *llvm::createX86DomainReassignmentPass() {
  640. return new X86DomainReassignment();
  641. }