InstrBuilder.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742
  1. //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. ///
  10. /// This file implements the InstrBuilder interface.
  11. ///
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/MCA/InstrBuilder.h"
  14. #include "llvm/ADT/APInt.h"
  15. #include "llvm/ADT/DenseMap.h"
  16. #include "llvm/MC/MCInst.h"
  17. #include "llvm/Support/Debug.h"
  18. #include "llvm/Support/WithColor.h"
  19. #include "llvm/Support/raw_ostream.h"
  20. #define DEBUG_TYPE "llvm-mca"
  21. namespace llvm {
  22. namespace mca {
  23. InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
  24. const llvm::MCInstrInfo &mcii,
  25. const llvm::MCRegisterInfo &mri,
  26. const llvm::MCInstrAnalysis *mcia)
  27. : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
  28. FirstReturnInst(true) {
  29. const MCSchedModel &SM = STI.getSchedModel();
  30. ProcResourceMasks.resize(SM.getNumProcResourceKinds());
  31. computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
  32. }
  33. static void initializeUsedResources(InstrDesc &ID,
  34. const MCSchedClassDesc &SCDesc,
  35. const MCSubtargetInfo &STI,
  36. ArrayRef<uint64_t> ProcResourceMasks) {
  37. const MCSchedModel &SM = STI.getSchedModel();
  38. // Populate resources consumed.
  39. using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
  40. SmallVector<ResourcePlusCycles, 4> Worklist;
  41. // Track cycles contributed by resources that are in a "Super" relationship.
  42. // This is required if we want to correctly match the behavior of method
  43. // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
  44. // of "consumed" processor resources and resource cycles, the logic in
  45. // ExpandProcResource() doesn't update the number of resource cycles
  46. // contributed by a "Super" resource to a group.
  47. // We need to take this into account when we find that a processor resource is
  48. // part of a group, and it is also used as the "Super" of other resources.
  49. // This map stores the number of cycles contributed by sub-resources that are
  50. // part of a "Super" resource. The key value is the "Super" resource mask ID.
  51. DenseMap<uint64_t, unsigned> SuperResources;
  52. unsigned NumProcResources = SM.getNumProcResourceKinds();
  53. APInt Buffers(NumProcResources, 0);
  54. bool AllInOrderResources = true;
  55. bool AnyDispatchHazards = false;
  56. for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
  57. const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
  58. const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
  59. if (!PRE->Cycles) {
  60. #ifndef NDEBUG
  61. WithColor::warning()
  62. << "Ignoring invalid write of zero cycles on processor resource "
  63. << PR.Name << "\n";
  64. WithColor::note() << "found in scheduling class " << SCDesc.Name
  65. << " (write index #" << I << ")\n";
  66. #endif
  67. continue;
  68. }
  69. uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
  70. if (PR.BufferSize < 0) {
  71. AllInOrderResources = false;
  72. } else {
  73. Buffers.setBit(getResourceStateIndex(Mask));
  74. AnyDispatchHazards |= (PR.BufferSize == 0);
  75. AllInOrderResources &= (PR.BufferSize <= 1);
  76. }
  77. CycleSegment RCy(0, PRE->Cycles, false);
  78. Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
  79. if (PR.SuperIdx) {
  80. uint64_t Super = ProcResourceMasks[PR.SuperIdx];
  81. SuperResources[Super] += PRE->Cycles;
  82. }
  83. }
  84. ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
  85. // Sort elements by mask popcount, so that we prioritize resource units over
  86. // resource groups, and smaller groups over larger groups.
  87. sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
  88. unsigned popcntA = countPopulation(A.first);
  89. unsigned popcntB = countPopulation(B.first);
  90. if (popcntA < popcntB)
  91. return true;
  92. if (popcntA > popcntB)
  93. return false;
  94. return A.first < B.first;
  95. });
  96. uint64_t UsedResourceUnits = 0;
  97. uint64_t UsedResourceGroups = 0;
  98. auto GroupIt = find_if(Worklist, [](const ResourcePlusCycles &Elt) {
  99. return countPopulation(Elt.first) > 1;
  100. });
  101. unsigned FirstGroupIdx = std::distance(Worklist.begin(), GroupIt);
  102. uint64_t ImpliedUsesOfResourceUnits = 0;
  103. // Remove cycles contributed by smaller resources.
  104. for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
  105. ResourcePlusCycles &A = Worklist[I];
  106. if (!A.second.size()) {
  107. assert(countPopulation(A.first) > 1 && "Expected a group!");
  108. UsedResourceGroups |= PowerOf2Floor(A.first);
  109. continue;
  110. }
  111. ID.Resources.emplace_back(A);
  112. uint64_t NormalizedMask = A.first;
  113. if (countPopulation(A.first) == 1) {
  114. UsedResourceUnits |= A.first;
  115. } else {
  116. // Remove the leading 1 from the resource group mask.
  117. NormalizedMask ^= PowerOf2Floor(NormalizedMask);
  118. UsedResourceGroups |= (A.first ^ NormalizedMask);
  119. uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
  120. if ((NormalizedMask != AvailableMask) &&
  121. countPopulation(AvailableMask) == 1) {
  122. // At simulation time, this resource group use will decay into a simple
  123. // use of the resource unit identified by `AvailableMask`.
  124. ImpliedUsesOfResourceUnits |= AvailableMask;
  125. UsedResourceUnits |= AvailableMask;
  126. }
  127. }
  128. for (unsigned J = I + 1; J < E; ++J) {
  129. ResourcePlusCycles &B = Worklist[J];
  130. if ((NormalizedMask & B.first) == NormalizedMask) {
  131. B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
  132. if (countPopulation(B.first) > 1)
  133. B.second.NumUnits++;
  134. }
  135. }
  136. }
  137. // Look for implicit uses of processor resource units. These are resource
  138. // units which are indirectly consumed by resource groups, and that must be
  139. // always available on instruction issue.
  140. while (ImpliedUsesOfResourceUnits) {
  141. ID.ImplicitlyUsedProcResUnits |= ImpliedUsesOfResourceUnits;
  142. ImpliedUsesOfResourceUnits = 0;
  143. for (unsigned I = FirstGroupIdx, E = Worklist.size(); I < E; ++I) {
  144. ResourcePlusCycles &A = Worklist[I];
  145. if (!A.second.size())
  146. continue;
  147. uint64_t NormalizedMask = A.first;
  148. assert(countPopulation(NormalizedMask) > 1);
  149. // Remove the leading 1 from the resource group mask.
  150. NormalizedMask ^= PowerOf2Floor(NormalizedMask);
  151. uint64_t AvailableMask = NormalizedMask & ~UsedResourceUnits;
  152. if ((NormalizedMask != AvailableMask) &&
  153. countPopulation(AvailableMask) != 1)
  154. continue;
  155. UsedResourceUnits |= AvailableMask;
  156. ImpliedUsesOfResourceUnits |= AvailableMask;
  157. }
  158. }
  159. // A SchedWrite may specify a number of cycles in which a resource group
  160. // is reserved. For example (on target x86; cpu Haswell):
  161. //
  162. // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
  163. // let ResourceCycles = [2, 2, 3];
  164. // }
  165. //
  166. // This means:
  167. // Resource units HWPort0 and HWPort1 are both used for 2cy.
  168. // Resource group HWPort01 is the union of HWPort0 and HWPort1.
  169. // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
  170. // will not be usable for 2 entire cycles from instruction issue.
  171. //
  172. // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
  173. // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
  174. // extra delay on top of the 2 cycles latency.
  175. // During those extra cycles, HWPort01 is not usable by other instructions.
  176. for (ResourcePlusCycles &RPC : ID.Resources) {
  177. if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
  178. // Remove the leading 1 from the resource group mask.
  179. uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
  180. uint64_t MaxResourceUnits = countPopulation(Mask);
  181. if (RPC.second.NumUnits > countPopulation(Mask)) {
  182. RPC.second.setReserved();
  183. RPC.second.NumUnits = MaxResourceUnits;
  184. }
  185. }
  186. }
  187. // Identify extra buffers that are consumed through super resources.
  188. for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
  189. for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
  190. const MCProcResourceDesc &PR = *SM.getProcResource(I);
  191. if (PR.BufferSize == -1)
  192. continue;
  193. uint64_t Mask = ProcResourceMasks[I];
  194. if (Mask != SR.first && ((Mask & SR.first) == SR.first))
  195. Buffers.setBit(getResourceStateIndex(Mask));
  196. }
  197. }
  198. ID.UsedBuffers = Buffers.getZExtValue();
  199. ID.UsedProcResUnits = UsedResourceUnits;
  200. ID.UsedProcResGroups = UsedResourceGroups;
  201. LLVM_DEBUG({
  202. for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
  203. dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
  204. << "Reserved=" << R.second.isReserved() << ", "
  205. << "#Units=" << R.second.NumUnits << ", "
  206. << "cy=" << R.second.size() << '\n';
  207. uint64_t BufferIDs = ID.UsedBuffers;
  208. while (BufferIDs) {
  209. uint64_t Current = BufferIDs & (-BufferIDs);
  210. dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';
  211. BufferIDs ^= Current;
  212. }
  213. dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
  214. dbgs() << "\t\tImplicitly Used Units="
  215. << format_hex(ID.ImplicitlyUsedProcResUnits, 16) << '\n';
  216. dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
  217. << '\n';
  218. });
  219. }
  220. static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
  221. const MCSchedClassDesc &SCDesc,
  222. const MCSubtargetInfo &STI) {
  223. if (MCDesc.isCall()) {
  224. // We cannot estimate how long this call will take.
  225. // Artificially set an arbitrarily high latency (100cy).
  226. ID.MaxLatency = 100U;
  227. return;
  228. }
  229. int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
  230. // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
  231. ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
  232. }
  233. static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
  234. // Count register definitions, and skip non register operands in the process.
  235. unsigned I, E;
  236. unsigned NumExplicitDefs = MCDesc.getNumDefs();
  237. for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
  238. const MCOperand &Op = MCI.getOperand(I);
  239. if (Op.isReg())
  240. --NumExplicitDefs;
  241. }
  242. if (NumExplicitDefs) {
  243. return make_error<InstructionError<MCInst>>(
  244. "Expected more register operand definitions.", MCI);
  245. }
  246. if (MCDesc.hasOptionalDef()) {
  247. // Always assume that the optional definition is the last operand.
  248. const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
  249. if (I == MCI.getNumOperands() || !Op.isReg()) {
  250. std::string Message =
  251. "expected a register operand for an optional definition. Instruction "
  252. "has not been correctly analyzed.";
  253. return make_error<InstructionError<MCInst>>(Message, MCI);
  254. }
  255. }
  256. return ErrorSuccess();
  257. }
  258. void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
  259. unsigned SchedClassID) {
  260. const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
  261. const MCSchedModel &SM = STI.getSchedModel();
  262. const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
  263. // Assumptions made by this algorithm:
  264. // 1. The number of explicit and implicit register definitions in a MCInst
  265. // matches the number of explicit and implicit definitions according to
  266. // the opcode descriptor (MCInstrDesc).
  267. // 2. Uses start at index #(MCDesc.getNumDefs()).
  268. // 3. There can only be a single optional register definition, an it is
  269. // either the last operand of the sequence (excluding extra operands
  270. // contributed by variadic opcodes) or one of the explicit register
  271. // definitions. The latter occurs for some Thumb1 instructions.
  272. //
  273. // These assumptions work quite well for most out-of-order in-tree targets
  274. // like x86. This is mainly because the vast majority of instructions is
  275. // expanded to MCInst using a straightforward lowering logic that preserves
  276. // the ordering of the operands.
  277. //
  278. // About assumption 1.
  279. // The algorithm allows non-register operands between register operand
  280. // definitions. This helps to handle some special ARM instructions with
  281. // implicit operand increment (-mtriple=armv7):
  282. //
  283. // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
  284. // @ <MCOperand Reg:59>
  285. // @ <MCOperand Imm:0> (!!)
  286. // @ <MCOperand Reg:67>
  287. // @ <MCOperand Imm:0>
  288. // @ <MCOperand Imm:14>
  289. // @ <MCOperand Reg:0>>
  290. //
  291. // MCDesc reports:
  292. // 6 explicit operands.
  293. // 1 optional definition
  294. // 2 explicit definitions (!!)
  295. //
  296. // The presence of an 'Imm' operand between the two register definitions
  297. // breaks the assumption that "register definitions are always at the
  298. // beginning of the operand sequence".
  299. //
  300. // To workaround this issue, this algorithm ignores (i.e. skips) any
  301. // non-register operands between register definitions. The optional
  302. // definition is still at index #(NumOperands-1).
  303. //
  304. // According to assumption 2. register reads start at #(NumExplicitDefs-1).
  305. // That means, register R1 from the example is both read and written.
  306. unsigned NumExplicitDefs = MCDesc.getNumDefs();
  307. unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
  308. unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
  309. unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
  310. if (MCDesc.hasOptionalDef())
  311. TotalDefs++;
  312. unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
  313. ID.Writes.resize(TotalDefs + NumVariadicOps);
  314. // Iterate over the operands list, and skip non-register operands.
  315. // The first NumExplicitDefs register operands are expected to be register
  316. // definitions.
  317. unsigned CurrentDef = 0;
  318. unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
  319. unsigned i = 0;
  320. for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
  321. const MCOperand &Op = MCI.getOperand(i);
  322. if (!Op.isReg())
  323. continue;
  324. if (MCDesc.OpInfo[CurrentDef].isOptionalDef()) {
  325. OptionalDefIdx = CurrentDef++;
  326. continue;
  327. }
  328. WriteDescriptor &Write = ID.Writes[CurrentDef];
  329. Write.OpIndex = i;
  330. if (CurrentDef < NumWriteLatencyEntries) {
  331. const MCWriteLatencyEntry &WLE =
  332. *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
  333. // Conservatively default to MaxLatency.
  334. Write.Latency =
  335. WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
  336. Write.SClassOrWriteResourceID = WLE.WriteResourceID;
  337. } else {
  338. // Assign a default latency for this write.
  339. Write.Latency = ID.MaxLatency;
  340. Write.SClassOrWriteResourceID = 0;
  341. }
  342. Write.IsOptionalDef = false;
  343. LLVM_DEBUG({
  344. dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
  345. << ", Latency=" << Write.Latency
  346. << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
  347. });
  348. CurrentDef++;
  349. }
  350. assert(CurrentDef == NumExplicitDefs &&
  351. "Expected more register operand definitions.");
  352. for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
  353. unsigned Index = NumExplicitDefs + CurrentDef;
  354. WriteDescriptor &Write = ID.Writes[Index];
  355. Write.OpIndex = ~CurrentDef;
  356. Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
  357. if (Index < NumWriteLatencyEntries) {
  358. const MCWriteLatencyEntry &WLE =
  359. *STI.getWriteLatencyEntry(&SCDesc, Index);
  360. // Conservatively default to MaxLatency.
  361. Write.Latency =
  362. WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
  363. Write.SClassOrWriteResourceID = WLE.WriteResourceID;
  364. } else {
  365. // Assign a default latency for this write.
  366. Write.Latency = ID.MaxLatency;
  367. Write.SClassOrWriteResourceID = 0;
  368. }
  369. Write.IsOptionalDef = false;
  370. assert(Write.RegisterID != 0 && "Expected a valid phys register!");
  371. LLVM_DEBUG({
  372. dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
  373. << ", PhysReg=" << MRI.getName(Write.RegisterID)
  374. << ", Latency=" << Write.Latency
  375. << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
  376. });
  377. }
  378. if (MCDesc.hasOptionalDef()) {
  379. WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
  380. Write.OpIndex = OptionalDefIdx;
  381. // Assign a default latency for this write.
  382. Write.Latency = ID.MaxLatency;
  383. Write.SClassOrWriteResourceID = 0;
  384. Write.IsOptionalDef = true;
  385. LLVM_DEBUG({
  386. dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
  387. << ", Latency=" << Write.Latency
  388. << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
  389. });
  390. }
  391. if (!NumVariadicOps)
  392. return;
  393. bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs();
  394. CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
  395. for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
  396. I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
  397. const MCOperand &Op = MCI.getOperand(OpIndex);
  398. if (!Op.isReg())
  399. continue;
  400. WriteDescriptor &Write = ID.Writes[CurrentDef];
  401. Write.OpIndex = OpIndex;
  402. // Assign a default latency for this write.
  403. Write.Latency = ID.MaxLatency;
  404. Write.SClassOrWriteResourceID = 0;
  405. Write.IsOptionalDef = false;
  406. ++CurrentDef;
  407. LLVM_DEBUG({
  408. dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
  409. << ", Latency=" << Write.Latency
  410. << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
  411. });
  412. }
  413. ID.Writes.resize(CurrentDef);
  414. }
  415. void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
  416. unsigned SchedClassID) {
  417. const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
  418. unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
  419. unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
  420. // Remove the optional definition.
  421. if (MCDesc.hasOptionalDef())
  422. --NumExplicitUses;
  423. unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
  424. unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
  425. ID.Reads.resize(TotalUses);
  426. unsigned CurrentUse = 0;
  427. for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
  428. ++I, ++OpIndex) {
  429. const MCOperand &Op = MCI.getOperand(OpIndex);
  430. if (!Op.isReg())
  431. continue;
  432. ReadDescriptor &Read = ID.Reads[CurrentUse];
  433. Read.OpIndex = OpIndex;
  434. Read.UseIndex = I;
  435. Read.SchedClassID = SchedClassID;
  436. ++CurrentUse;
  437. LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
  438. << ", UseIndex=" << Read.UseIndex << '\n');
  439. }
  440. // For the purpose of ReadAdvance, implicit uses come directly after explicit
  441. // uses. The "UseIndex" must be updated according to that implicit layout.
  442. for (unsigned I = 0; I < NumImplicitUses; ++I) {
  443. ReadDescriptor &Read = ID.Reads[CurrentUse + I];
  444. Read.OpIndex = ~I;
  445. Read.UseIndex = NumExplicitUses + I;
  446. Read.RegisterID = MCDesc.getImplicitUses()[I];
  447. Read.SchedClassID = SchedClassID;
  448. LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
  449. << ", UseIndex=" << Read.UseIndex << ", RegisterID="
  450. << MRI.getName(Read.RegisterID) << '\n');
  451. }
  452. CurrentUse += NumImplicitUses;
  453. bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
  454. for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
  455. I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
  456. const MCOperand &Op = MCI.getOperand(OpIndex);
  457. if (!Op.isReg())
  458. continue;
  459. ReadDescriptor &Read = ID.Reads[CurrentUse];
  460. Read.OpIndex = OpIndex;
  461. Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
  462. Read.SchedClassID = SchedClassID;
  463. ++CurrentUse;
  464. LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
  465. << ", UseIndex=" << Read.UseIndex << '\n');
  466. }
  467. ID.Reads.resize(CurrentUse);
  468. }
  469. Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
  470. const MCInst &MCI) const {
  471. if (ID.NumMicroOps != 0)
  472. return ErrorSuccess();
  473. bool UsesBuffers = ID.UsedBuffers;
  474. bool UsesResources = !ID.Resources.empty();
  475. if (!UsesBuffers && !UsesResources)
  476. return ErrorSuccess();
  477. // FIXME: see PR44797. We should revisit these checks and possibly move them
  478. // in CodeGenSchedule.cpp.
  479. StringRef Message = "found an inconsistent instruction that decodes to zero "
  480. "opcodes and that consumes scheduler resources.";
  481. return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
  482. }
  483. Expected<const InstrDesc &>
  484. InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
  485. assert(STI.getSchedModel().hasInstrSchedModel() &&
  486. "Itineraries are not yet supported!");
  487. // Obtain the instruction descriptor from the opcode.
  488. unsigned short Opcode = MCI.getOpcode();
  489. const MCInstrDesc &MCDesc = MCII.get(Opcode);
  490. const MCSchedModel &SM = STI.getSchedModel();
  491. // Then obtain the scheduling class information from the instruction.
  492. unsigned SchedClassID = MCDesc.getSchedClass();
  493. bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
  494. // Try to solve variant scheduling classes.
  495. if (IsVariant) {
  496. unsigned CPUID = SM.getProcessorID();
  497. while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
  498. SchedClassID =
  499. STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
  500. if (!SchedClassID) {
  501. return make_error<InstructionError<MCInst>>(
  502. "unable to resolve scheduling class for write variant.", MCI);
  503. }
  504. }
  505. // Check if this instruction is supported. Otherwise, report an error.
  506. const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
  507. if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
  508. return make_error<InstructionError<MCInst>>(
  509. "found an unsupported instruction in the input assembly sequence.",
  510. MCI);
  511. }
  512. LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
  513. LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
  514. // Create a new empty descriptor.
  515. std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
  516. ID->NumMicroOps = SCDesc.NumMicroOps;
  517. ID->SchedClassID = SchedClassID;
  518. if (MCDesc.isCall() && FirstCallInst) {
  519. // We don't correctly model calls.
  520. WithColor::warning() << "found a call in the input assembly sequence.\n";
  521. WithColor::note() << "call instructions are not correctly modeled. "
  522. << "Assume a latency of 100cy.\n";
  523. FirstCallInst = false;
  524. }
  525. if (MCDesc.isReturn() && FirstReturnInst) {
  526. WithColor::warning() << "found a return instruction in the input"
  527. << " assembly sequence.\n";
  528. WithColor::note() << "program counter updates are ignored.\n";
  529. FirstReturnInst = false;
  530. }
  531. ID->MayLoad = MCDesc.mayLoad();
  532. ID->MayStore = MCDesc.mayStore();
  533. ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
  534. ID->BeginGroup = SCDesc.BeginGroup;
  535. ID->EndGroup = SCDesc.EndGroup;
  536. ID->RetireOOO = SCDesc.RetireOOO;
  537. initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
  538. computeMaxLatency(*ID, MCDesc, SCDesc, STI);
  539. if (Error Err = verifyOperands(MCDesc, MCI))
  540. return std::move(Err);
  541. populateWrites(*ID, MCI, SchedClassID);
  542. populateReads(*ID, MCI, SchedClassID);
  543. LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
  544. LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
  545. // Validation check on the instruction descriptor.
  546. if (Error Err = verifyInstrDesc(*ID, MCI))
  547. return std::move(Err);
  548. // Now add the new descriptor.
  549. bool IsVariadic = MCDesc.isVariadic();
  550. if (!IsVariadic && !IsVariant) {
  551. Descriptors[MCI.getOpcode()] = std::move(ID);
  552. return *Descriptors[MCI.getOpcode()];
  553. }
  554. VariantDescriptors[&MCI] = std::move(ID);
  555. return *VariantDescriptors[&MCI];
  556. }
  557. Expected<const InstrDesc &>
  558. InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
  559. if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
  560. return *Descriptors[MCI.getOpcode()];
  561. if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
  562. return *VariantDescriptors[&MCI];
  563. return createInstrDescImpl(MCI);
  564. }
  565. Expected<std::unique_ptr<Instruction>>
  566. InstrBuilder::createInstruction(const MCInst &MCI) {
  567. Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
  568. if (!DescOrErr)
  569. return DescOrErr.takeError();
  570. const InstrDesc &D = *DescOrErr;
  571. std::unique_ptr<Instruction> NewIS =
  572. std::make_unique<Instruction>(D, MCI.getOpcode());
  573. // Check if this is a dependency breaking instruction.
  574. APInt Mask;
  575. bool IsZeroIdiom = false;
  576. bool IsDepBreaking = false;
  577. if (MCIA) {
  578. unsigned ProcID = STI.getSchedModel().getProcessorID();
  579. IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
  580. IsDepBreaking =
  581. IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
  582. if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
  583. NewIS->setOptimizableMove();
  584. }
  585. // Initialize Reads first.
  586. MCPhysReg RegID = 0;
  587. for (const ReadDescriptor &RD : D.Reads) {
  588. if (!RD.isImplicitRead()) {
  589. // explicit read.
  590. const MCOperand &Op = MCI.getOperand(RD.OpIndex);
  591. // Skip non-register operands.
  592. if (!Op.isReg())
  593. continue;
  594. RegID = Op.getReg();
  595. } else {
  596. // Implicit read.
  597. RegID = RD.RegisterID;
  598. }
  599. // Skip invalid register operands.
  600. if (!RegID)
  601. continue;
  602. // Okay, this is a register operand. Create a ReadState for it.
  603. NewIS->getUses().emplace_back(RD, RegID);
  604. ReadState &RS = NewIS->getUses().back();
  605. if (IsDepBreaking) {
  606. // A mask of all zeroes means: explicit input operands are not
  607. // independent.
  608. if (Mask.isZero()) {
  609. if (!RD.isImplicitRead())
  610. RS.setIndependentFromDef();
  611. } else {
  612. // Check if this register operand is independent according to `Mask`.
  613. // Note that Mask may not have enough bits to describe all explicit and
  614. // implicit input operands. If this register operand doesn't have a
  615. // corresponding bit in Mask, then conservatively assume that it is
  616. // dependent.
  617. if (Mask.getBitWidth() > RD.UseIndex) {
  618. // Okay. This map describe register use `RD.UseIndex`.
  619. if (Mask[RD.UseIndex])
  620. RS.setIndependentFromDef();
  621. }
  622. }
  623. }
  624. }
  625. // Early exit if there are no writes.
  626. if (D.Writes.empty())
  627. return std::move(NewIS);
  628. // Track register writes that implicitly clear the upper portion of the
  629. // underlying super-registers using an APInt.
  630. APInt WriteMask(D.Writes.size(), 0);
  631. // Now query the MCInstrAnalysis object to obtain information about which
  632. // register writes implicitly clear the upper portion of a super-register.
  633. if (MCIA)
  634. MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
  635. // Initialize writes.
  636. unsigned WriteIndex = 0;
  637. for (const WriteDescriptor &WD : D.Writes) {
  638. RegID = WD.isImplicitWrite() ? WD.RegisterID
  639. : MCI.getOperand(WD.OpIndex).getReg();
  640. // Check if this is a optional definition that references NoReg.
  641. if (WD.IsOptionalDef && !RegID) {
  642. ++WriteIndex;
  643. continue;
  644. }
  645. assert(RegID && "Expected a valid register ID!");
  646. NewIS->getDefs().emplace_back(WD, RegID,
  647. /* ClearsSuperRegs */ WriteMask[WriteIndex],
  648. /* WritesZero */ IsZeroIdiom);
  649. ++WriteIndex;
  650. }
  651. return std::move(NewIS);
  652. }
  653. } // namespace mca
  654. } // namespace llvm