InstrBuilder.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780
  1. //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. /// \file
  9. ///
  10. /// This file implements the InstrBuilder interface.
  11. ///
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/MCA/InstrBuilder.h"
  14. #include "llvm/ADT/APInt.h"
  15. #include "llvm/ADT/DenseMap.h"
  16. #include "llvm/ADT/Statistic.h"
  17. #include "llvm/MC/MCInst.h"
  18. #include "llvm/Support/Debug.h"
  19. #include "llvm/Support/WithColor.h"
  20. #include "llvm/Support/raw_ostream.h"
  21. #define DEBUG_TYPE "llvm-mca-instrbuilder"
  22. namespace llvm {
  23. namespace mca {
  24. char RecycledInstErr::ID = 0;
  25. InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
  26. const llvm::MCInstrInfo &mcii,
  27. const llvm::MCRegisterInfo &mri,
  28. const llvm::MCInstrAnalysis *mcia,
  29. const mca::InstrumentManager &im)
  30. : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true),
  31. FirstReturnInst(true) {
  32. const MCSchedModel &SM = STI.getSchedModel();
  33. ProcResourceMasks.resize(SM.getNumProcResourceKinds());
  34. computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
  35. }
  36. static void initializeUsedResources(InstrDesc &ID,
  37. const MCSchedClassDesc &SCDesc,
  38. const MCSubtargetInfo &STI,
  39. ArrayRef<uint64_t> ProcResourceMasks) {
  40. const MCSchedModel &SM = STI.getSchedModel();
  41. // Populate resources consumed.
  42. using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
  43. SmallVector<ResourcePlusCycles, 4> Worklist;
  44. // Track cycles contributed by resources that are in a "Super" relationship.
  45. // This is required if we want to correctly match the behavior of method
  46. // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
  47. // of "consumed" processor resources and resource cycles, the logic in
  48. // ExpandProcResource() doesn't update the number of resource cycles
  49. // contributed by a "Super" resource to a group.
  50. // We need to take this into account when we find that a processor resource is
  51. // part of a group, and it is also used as the "Super" of other resources.
  52. // This map stores the number of cycles contributed by sub-resources that are
  53. // part of a "Super" resource. The key value is the "Super" resource mask ID.
  54. DenseMap<uint64_t, unsigned> SuperResources;
  55. unsigned NumProcResources = SM.getNumProcResourceKinds();
  56. APInt Buffers(NumProcResources, 0);
  57. bool AllInOrderResources = true;
  58. bool AnyDispatchHazards = false;
  59. for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
  60. const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
  61. const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);
  62. if (!PRE->Cycles) {
  63. #ifndef NDEBUG
  64. WithColor::warning()
  65. << "Ignoring invalid write of zero cycles on processor resource "
  66. << PR.Name << "\n";
  67. WithColor::note() << "found in scheduling class " << SCDesc.Name
  68. << " (write index #" << I << ")\n";
  69. #endif
  70. continue;
  71. }
  72. uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
  73. if (PR.BufferSize < 0) {
  74. AllInOrderResources = false;
  75. } else {
  76. Buffers.setBit(getResourceStateIndex(Mask));
  77. AnyDispatchHazards |= (PR.BufferSize == 0);
  78. AllInOrderResources &= (PR.BufferSize <= 1);
  79. }
  80. CycleSegment RCy(0, PRE->Cycles, false);
  81. Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
  82. if (PR.SuperIdx) {
  83. uint64_t Super = ProcResourceMasks[PR.SuperIdx];
  84. SuperResources[Super] += PRE->Cycles;
  85. }
  86. }
  87. ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
  88. // Sort elements by mask popcount, so that we prioritize resource units over
  89. // resource groups, and smaller groups over larger groups.
  90. sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
  91. unsigned popcntA = llvm::popcount(A.first);
  92. unsigned popcntB = llvm::popcount(B.first);
  93. if (popcntA < popcntB)
  94. return true;
  95. if (popcntA > popcntB)
  96. return false;
  97. return A.first < B.first;
  98. });
  99. uint64_t UsedResourceUnits = 0;
  100. uint64_t UsedResourceGroups = 0;
  101. uint64_t UnitsFromResourceGroups = 0;
  102. // Remove cycles contributed by smaller resources, and check if there
  103. // are partially overlapping resource groups.
  104. ID.HasPartiallyOverlappingGroups = false;
  105. for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
  106. ResourcePlusCycles &A = Worklist[I];
  107. if (!A.second.size()) {
  108. assert(llvm::popcount(A.first) > 1 && "Expected a group!");
  109. UsedResourceGroups |= PowerOf2Floor(A.first);
  110. continue;
  111. }
  112. ID.Resources.emplace_back(A);
  113. uint64_t NormalizedMask = A.first;
  114. if (llvm::popcount(A.first) == 1) {
  115. UsedResourceUnits |= A.first;
  116. } else {
  117. // Remove the leading 1 from the resource group mask.
  118. NormalizedMask ^= PowerOf2Floor(NormalizedMask);
  119. if (UnitsFromResourceGroups & NormalizedMask)
  120. ID.HasPartiallyOverlappingGroups = true;
  121. UnitsFromResourceGroups |= NormalizedMask;
  122. UsedResourceGroups |= (A.first ^ NormalizedMask);
  123. }
  124. for (unsigned J = I + 1; J < E; ++J) {
  125. ResourcePlusCycles &B = Worklist[J];
  126. if ((NormalizedMask & B.first) == NormalizedMask) {
  127. B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
  128. if (llvm::popcount(B.first) > 1)
  129. B.second.NumUnits++;
  130. }
  131. }
  132. }
  133. // A SchedWrite may specify a number of cycles in which a resource group
  134. // is reserved. For example (on target x86; cpu Haswell):
  135. //
  136. // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
  137. // let ResourceCycles = [2, 2, 3];
  138. // }
  139. //
  140. // This means:
  141. // Resource units HWPort0 and HWPort1 are both used for 2cy.
  142. // Resource group HWPort01 is the union of HWPort0 and HWPort1.
  143. // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
  144. // will not be usable for 2 entire cycles from instruction issue.
  145. //
  146. // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
  147. // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
  148. // extra delay on top of the 2 cycles latency.
  149. // During those extra cycles, HWPort01 is not usable by other instructions.
  150. for (ResourcePlusCycles &RPC : ID.Resources) {
  151. if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) {
  152. // Remove the leading 1 from the resource group mask.
  153. uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
  154. uint64_t MaxResourceUnits = llvm::popcount(Mask);
  155. if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) {
  156. RPC.second.setReserved();
  157. RPC.second.NumUnits = MaxResourceUnits;
  158. }
  159. }
  160. }
  161. // Identify extra buffers that are consumed through super resources.
  162. for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
  163. for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
  164. const MCProcResourceDesc &PR = *SM.getProcResource(I);
  165. if (PR.BufferSize == -1)
  166. continue;
  167. uint64_t Mask = ProcResourceMasks[I];
  168. if (Mask != SR.first && ((Mask & SR.first) == SR.first))
  169. Buffers.setBit(getResourceStateIndex(Mask));
  170. }
  171. }
  172. ID.UsedBuffers = Buffers.getZExtValue();
  173. ID.UsedProcResUnits = UsedResourceUnits;
  174. ID.UsedProcResGroups = UsedResourceGroups;
  175. LLVM_DEBUG({
  176. for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
  177. dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
  178. << "Reserved=" << R.second.isReserved() << ", "
  179. << "#Units=" << R.second.NumUnits << ", "
  180. << "cy=" << R.second.size() << '\n';
  181. uint64_t BufferIDs = ID.UsedBuffers;
  182. while (BufferIDs) {
  183. uint64_t Current = BufferIDs & (-BufferIDs);
  184. dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';
  185. BufferIDs ^= Current;
  186. }
  187. dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
  188. dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
  189. << '\n';
  190. dbgs() << "\t\tHasPartiallyOverlappingGroups="
  191. << ID.HasPartiallyOverlappingGroups << '\n';
  192. });
  193. }
  194. static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
  195. const MCSchedClassDesc &SCDesc,
  196. const MCSubtargetInfo &STI) {
  197. if (MCDesc.isCall()) {
  198. // We cannot estimate how long this call will take.
  199. // Artificially set an arbitrarily high latency (100cy).
  200. ID.MaxLatency = 100U;
  201. return;
  202. }
  203. int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
  204. // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
  205. ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
  206. }
  207. static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
  208. // Count register definitions, and skip non register operands in the process.
  209. unsigned I, E;
  210. unsigned NumExplicitDefs = MCDesc.getNumDefs();
  211. for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
  212. const MCOperand &Op = MCI.getOperand(I);
  213. if (Op.isReg())
  214. --NumExplicitDefs;
  215. }
  216. if (NumExplicitDefs) {
  217. return make_error<InstructionError<MCInst>>(
  218. "Expected more register operand definitions.", MCI);
  219. }
  220. if (MCDesc.hasOptionalDef()) {
  221. // Always assume that the optional definition is the last operand.
  222. const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
  223. if (I == MCI.getNumOperands() || !Op.isReg()) {
  224. std::string Message =
  225. "expected a register operand for an optional definition. Instruction "
  226. "has not been correctly analyzed.";
  227. return make_error<InstructionError<MCInst>>(Message, MCI);
  228. }
  229. }
  230. return ErrorSuccess();
  231. }
  232. void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
  233. unsigned SchedClassID) {
  234. const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
  235. const MCSchedModel &SM = STI.getSchedModel();
  236. const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
  237. // Assumptions made by this algorithm:
  238. // 1. The number of explicit and implicit register definitions in a MCInst
  239. // matches the number of explicit and implicit definitions according to
  240. // the opcode descriptor (MCInstrDesc).
  241. // 2. Uses start at index #(MCDesc.getNumDefs()).
  242. // 3. There can only be a single optional register definition, an it is
  243. // either the last operand of the sequence (excluding extra operands
  244. // contributed by variadic opcodes) or one of the explicit register
  245. // definitions. The latter occurs for some Thumb1 instructions.
  246. //
  247. // These assumptions work quite well for most out-of-order in-tree targets
  248. // like x86. This is mainly because the vast majority of instructions is
  249. // expanded to MCInst using a straightforward lowering logic that preserves
  250. // the ordering of the operands.
  251. //
  252. // About assumption 1.
  253. // The algorithm allows non-register operands between register operand
  254. // definitions. This helps to handle some special ARM instructions with
  255. // implicit operand increment (-mtriple=armv7):
  256. //
  257. // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
  258. // @ <MCOperand Reg:59>
  259. // @ <MCOperand Imm:0> (!!)
  260. // @ <MCOperand Reg:67>
  261. // @ <MCOperand Imm:0>
  262. // @ <MCOperand Imm:14>
  263. // @ <MCOperand Reg:0>>
  264. //
  265. // MCDesc reports:
  266. // 6 explicit operands.
  267. // 1 optional definition
  268. // 2 explicit definitions (!!)
  269. //
  270. // The presence of an 'Imm' operand between the two register definitions
  271. // breaks the assumption that "register definitions are always at the
  272. // beginning of the operand sequence".
  273. //
  274. // To workaround this issue, this algorithm ignores (i.e. skips) any
  275. // non-register operands between register definitions. The optional
  276. // definition is still at index #(NumOperands-1).
  277. //
  278. // According to assumption 2. register reads start at #(NumExplicitDefs-1).
  279. // That means, register R1 from the example is both read and written.
  280. unsigned NumExplicitDefs = MCDesc.getNumDefs();
  281. unsigned NumImplicitDefs = MCDesc.implicit_defs().size();
  282. unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
  283. unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
  284. if (MCDesc.hasOptionalDef())
  285. TotalDefs++;
  286. unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
  287. ID.Writes.resize(TotalDefs + NumVariadicOps);
  288. // Iterate over the operands list, and skip non-register operands.
  289. // The first NumExplicitDefs register operands are expected to be register
  290. // definitions.
  291. unsigned CurrentDef = 0;
  292. unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
  293. unsigned i = 0;
  294. for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
  295. const MCOperand &Op = MCI.getOperand(i);
  296. if (!Op.isReg())
  297. continue;
  298. if (MCDesc.operands()[CurrentDef].isOptionalDef()) {
  299. OptionalDefIdx = CurrentDef++;
  300. continue;
  301. }
  302. WriteDescriptor &Write = ID.Writes[CurrentDef];
  303. Write.OpIndex = i;
  304. if (CurrentDef < NumWriteLatencyEntries) {
  305. const MCWriteLatencyEntry &WLE =
  306. *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
  307. // Conservatively default to MaxLatency.
  308. Write.Latency =
  309. WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
  310. Write.SClassOrWriteResourceID = WLE.WriteResourceID;
  311. } else {
  312. // Assign a default latency for this write.
  313. Write.Latency = ID.MaxLatency;
  314. Write.SClassOrWriteResourceID = 0;
  315. }
  316. Write.IsOptionalDef = false;
  317. LLVM_DEBUG({
  318. dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
  319. << ", Latency=" << Write.Latency
  320. << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
  321. });
  322. CurrentDef++;
  323. }
  324. assert(CurrentDef == NumExplicitDefs &&
  325. "Expected more register operand definitions.");
  326. for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
  327. unsigned Index = NumExplicitDefs + CurrentDef;
  328. WriteDescriptor &Write = ID.Writes[Index];
  329. Write.OpIndex = ~CurrentDef;
  330. Write.RegisterID = MCDesc.implicit_defs()[CurrentDef];
  331. if (Index < NumWriteLatencyEntries) {
  332. const MCWriteLatencyEntry &WLE =
  333. *STI.getWriteLatencyEntry(&SCDesc, Index);
  334. // Conservatively default to MaxLatency.
  335. Write.Latency =
  336. WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
  337. Write.SClassOrWriteResourceID = WLE.WriteResourceID;
  338. } else {
  339. // Assign a default latency for this write.
  340. Write.Latency = ID.MaxLatency;
  341. Write.SClassOrWriteResourceID = 0;
  342. }
  343. Write.IsOptionalDef = false;
  344. assert(Write.RegisterID != 0 && "Expected a valid phys register!");
  345. LLVM_DEBUG({
  346. dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
  347. << ", PhysReg=" << MRI.getName(Write.RegisterID)
  348. << ", Latency=" << Write.Latency
  349. << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
  350. });
  351. }
  352. if (MCDesc.hasOptionalDef()) {
  353. WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
  354. Write.OpIndex = OptionalDefIdx;
  355. // Assign a default latency for this write.
  356. Write.Latency = ID.MaxLatency;
  357. Write.SClassOrWriteResourceID = 0;
  358. Write.IsOptionalDef = true;
  359. LLVM_DEBUG({
  360. dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
  361. << ", Latency=" << Write.Latency
  362. << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
  363. });
  364. }
  365. if (!NumVariadicOps)
  366. return;
  367. bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs();
  368. CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
  369. for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
  370. I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
  371. const MCOperand &Op = MCI.getOperand(OpIndex);
  372. if (!Op.isReg())
  373. continue;
  374. WriteDescriptor &Write = ID.Writes[CurrentDef];
  375. Write.OpIndex = OpIndex;
  376. // Assign a default latency for this write.
  377. Write.Latency = ID.MaxLatency;
  378. Write.SClassOrWriteResourceID = 0;
  379. Write.IsOptionalDef = false;
  380. ++CurrentDef;
  381. LLVM_DEBUG({
  382. dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
  383. << ", Latency=" << Write.Latency
  384. << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
  385. });
  386. }
  387. ID.Writes.resize(CurrentDef);
  388. }
  389. void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
  390. unsigned SchedClassID) {
  391. const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
  392. unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
  393. unsigned NumImplicitUses = MCDesc.implicit_uses().size();
  394. // Remove the optional definition.
  395. if (MCDesc.hasOptionalDef())
  396. --NumExplicitUses;
  397. unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
  398. unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
  399. ID.Reads.resize(TotalUses);
  400. unsigned CurrentUse = 0;
  401. for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
  402. ++I, ++OpIndex) {
  403. const MCOperand &Op = MCI.getOperand(OpIndex);
  404. if (!Op.isReg())
  405. continue;
  406. ReadDescriptor &Read = ID.Reads[CurrentUse];
  407. Read.OpIndex = OpIndex;
  408. Read.UseIndex = I;
  409. Read.SchedClassID = SchedClassID;
  410. ++CurrentUse;
  411. LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
  412. << ", UseIndex=" << Read.UseIndex << '\n');
  413. }
  414. // For the purpose of ReadAdvance, implicit uses come directly after explicit
  415. // uses. The "UseIndex" must be updated according to that implicit layout.
  416. for (unsigned I = 0; I < NumImplicitUses; ++I) {
  417. ReadDescriptor &Read = ID.Reads[CurrentUse + I];
  418. Read.OpIndex = ~I;
  419. Read.UseIndex = NumExplicitUses + I;
  420. Read.RegisterID = MCDesc.implicit_uses()[I];
  421. Read.SchedClassID = SchedClassID;
  422. LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
  423. << ", UseIndex=" << Read.UseIndex << ", RegisterID="
  424. << MRI.getName(Read.RegisterID) << '\n');
  425. }
  426. CurrentUse += NumImplicitUses;
  427. bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
  428. for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
  429. I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
  430. const MCOperand &Op = MCI.getOperand(OpIndex);
  431. if (!Op.isReg())
  432. continue;
  433. ReadDescriptor &Read = ID.Reads[CurrentUse];
  434. Read.OpIndex = OpIndex;
  435. Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
  436. Read.SchedClassID = SchedClassID;
  437. ++CurrentUse;
  438. LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
  439. << ", UseIndex=" << Read.UseIndex << '\n');
  440. }
  441. ID.Reads.resize(CurrentUse);
  442. }
  443. Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
  444. const MCInst &MCI) const {
  445. if (ID.NumMicroOps != 0)
  446. return ErrorSuccess();
  447. bool UsesBuffers = ID.UsedBuffers;
  448. bool UsesResources = !ID.Resources.empty();
  449. if (!UsesBuffers && !UsesResources)
  450. return ErrorSuccess();
  451. // FIXME: see PR44797. We should revisit these checks and possibly move them
  452. // in CodeGenSchedule.cpp.
  453. StringRef Message = "found an inconsistent instruction that decodes to zero "
  454. "opcodes and that consumes scheduler resources.";
  455. return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
  456. }
  457. Expected<const InstrDesc &>
  458. InstrBuilder::createInstrDescImpl(const MCInst &MCI,
  459. const SmallVector<SharedInstrument> &IVec) {
  460. assert(STI.getSchedModel().hasInstrSchedModel() &&
  461. "Itineraries are not yet supported!");
  462. // Obtain the instruction descriptor from the opcode.
  463. unsigned short Opcode = MCI.getOpcode();
  464. const MCInstrDesc &MCDesc = MCII.get(Opcode);
  465. const MCSchedModel &SM = STI.getSchedModel();
  466. // Then obtain the scheduling class information from the instruction.
  467. // Allow InstrumentManager to override and use a different SchedClassID
  468. unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
  469. bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
  470. // Try to solve variant scheduling classes.
  471. if (IsVariant) {
  472. unsigned CPUID = SM.getProcessorID();
  473. while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
  474. SchedClassID =
  475. STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
  476. if (!SchedClassID) {
  477. return make_error<InstructionError<MCInst>>(
  478. "unable to resolve scheduling class for write variant.", MCI);
  479. }
  480. }
  481. // Check if this instruction is supported. Otherwise, report an error.
  482. const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
  483. if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
  484. return make_error<InstructionError<MCInst>>(
  485. "found an unsupported instruction in the input assembly sequence.",
  486. MCI);
  487. }
  488. LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
  489. LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
  490. LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n');
  491. // Create a new empty descriptor.
  492. std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
  493. ID->NumMicroOps = SCDesc.NumMicroOps;
  494. ID->SchedClassID = SchedClassID;
  495. if (MCDesc.isCall() && FirstCallInst) {
  496. // We don't correctly model calls.
  497. WithColor::warning() << "found a call in the input assembly sequence.\n";
  498. WithColor::note() << "call instructions are not correctly modeled. "
  499. << "Assume a latency of 100cy.\n";
  500. FirstCallInst = false;
  501. }
  502. if (MCDesc.isReturn() && FirstReturnInst) {
  503. WithColor::warning() << "found a return instruction in the input"
  504. << " assembly sequence.\n";
  505. WithColor::note() << "program counter updates are ignored.\n";
  506. FirstReturnInst = false;
  507. }
  508. initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
  509. computeMaxLatency(*ID, MCDesc, SCDesc, STI);
  510. if (Error Err = verifyOperands(MCDesc, MCI))
  511. return std::move(Err);
  512. populateWrites(*ID, MCI, SchedClassID);
  513. populateReads(*ID, MCI, SchedClassID);
  514. LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
  515. LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
  516. // Validation check on the instruction descriptor.
  517. if (Error Err = verifyInstrDesc(*ID, MCI))
  518. return std::move(Err);
  519. // Now add the new descriptor.
  520. bool IsVariadic = MCDesc.isVariadic();
  521. if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) {
  522. auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
  523. Descriptors[DKey] = std::move(ID);
  524. return *Descriptors[DKey];
  525. }
  526. auto VDKey = std::make_pair(&MCI, SchedClassID);
  527. VariantDescriptors[VDKey] = std::move(ID);
  528. return *VariantDescriptors[VDKey];
  529. }
  530. Expected<const InstrDesc &>
  531. InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI,
  532. const SmallVector<SharedInstrument> &IVec) {
  533. // Cache lookup using SchedClassID from Instrumentation
  534. unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
  535. auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
  536. if (Descriptors.find_as(DKey) != Descriptors.end())
  537. return *Descriptors[DKey];
  538. unsigned CPUID = STI.getSchedModel().getProcessorID();
  539. SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
  540. auto VDKey = std::make_pair(&MCI, SchedClassID);
  541. if (VariantDescriptors.find(VDKey) != VariantDescriptors.end())
  542. return *VariantDescriptors[VDKey];
  543. return createInstrDescImpl(MCI, IVec);
  544. }
  545. STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");
  546. Expected<std::unique_ptr<Instruction>>
  547. InstrBuilder::createInstruction(const MCInst &MCI,
  548. const SmallVector<SharedInstrument> &IVec) {
  549. Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec);
  550. if (!DescOrErr)
  551. return DescOrErr.takeError();
  552. const InstrDesc &D = *DescOrErr;
  553. Instruction *NewIS = nullptr;
  554. std::unique_ptr<Instruction> CreatedIS;
  555. bool IsInstRecycled = false;
  556. if (!D.IsRecyclable)
  557. ++NumVariantInst;
  558. if (D.IsRecyclable && InstRecycleCB) {
  559. if (auto *I = InstRecycleCB(D)) {
  560. NewIS = I;
  561. NewIS->reset();
  562. IsInstRecycled = true;
  563. }
  564. }
  565. if (!IsInstRecycled) {
  566. CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode());
  567. NewIS = CreatedIS.get();
  568. }
  569. const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
  570. const MCSchedClassDesc &SCDesc =
  571. *STI.getSchedModel().getSchedClassDesc(D.SchedClassID);
  572. NewIS->setMayLoad(MCDesc.mayLoad());
  573. NewIS->setMayStore(MCDesc.mayStore());
  574. NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects());
  575. NewIS->setBeginGroup(SCDesc.BeginGroup);
  576. NewIS->setEndGroup(SCDesc.EndGroup);
  577. NewIS->setRetireOOO(SCDesc.RetireOOO);
  578. // Check if this is a dependency breaking instruction.
  579. APInt Mask;
  580. bool IsZeroIdiom = false;
  581. bool IsDepBreaking = false;
  582. if (MCIA) {
  583. unsigned ProcID = STI.getSchedModel().getProcessorID();
  584. IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
  585. IsDepBreaking =
  586. IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
  587. if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
  588. NewIS->setOptimizableMove();
  589. }
  590. // Initialize Reads first.
  591. MCPhysReg RegID = 0;
  592. size_t Idx = 0U;
  593. for (const ReadDescriptor &RD : D.Reads) {
  594. if (!RD.isImplicitRead()) {
  595. // explicit read.
  596. const MCOperand &Op = MCI.getOperand(RD.OpIndex);
  597. // Skip non-register operands.
  598. if (!Op.isReg())
  599. continue;
  600. RegID = Op.getReg();
  601. } else {
  602. // Implicit read.
  603. RegID = RD.RegisterID;
  604. }
  605. // Skip invalid register operands.
  606. if (!RegID)
  607. continue;
  608. // Okay, this is a register operand. Create a ReadState for it.
  609. ReadState *RS = nullptr;
  610. if (IsInstRecycled && Idx < NewIS->getUses().size()) {
  611. NewIS->getUses()[Idx] = ReadState(RD, RegID);
  612. RS = &NewIS->getUses()[Idx++];
  613. } else {
  614. NewIS->getUses().emplace_back(RD, RegID);
  615. RS = &NewIS->getUses().back();
  616. ++Idx;
  617. }
  618. if (IsDepBreaking) {
  619. // A mask of all zeroes means: explicit input operands are not
  620. // independent.
  621. if (Mask.isZero()) {
  622. if (!RD.isImplicitRead())
  623. RS->setIndependentFromDef();
  624. } else {
  625. // Check if this register operand is independent according to `Mask`.
  626. // Note that Mask may not have enough bits to describe all explicit and
  627. // implicit input operands. If this register operand doesn't have a
  628. // corresponding bit in Mask, then conservatively assume that it is
  629. // dependent.
  630. if (Mask.getBitWidth() > RD.UseIndex) {
  631. // Okay. This map describe register use `RD.UseIndex`.
  632. if (Mask[RD.UseIndex])
  633. RS->setIndependentFromDef();
  634. }
  635. }
  636. }
  637. }
  638. if (IsInstRecycled && Idx < NewIS->getUses().size())
  639. NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx);
  640. // Early exit if there are no writes.
  641. if (D.Writes.empty()) {
  642. if (IsInstRecycled)
  643. return llvm::make_error<RecycledInstErr>(NewIS);
  644. else
  645. return std::move(CreatedIS);
  646. }
  647. // Track register writes that implicitly clear the upper portion of the
  648. // underlying super-registers using an APInt.
  649. APInt WriteMask(D.Writes.size(), 0);
  650. // Now query the MCInstrAnalysis object to obtain information about which
  651. // register writes implicitly clear the upper portion of a super-register.
  652. if (MCIA)
  653. MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
  654. // Initialize writes.
  655. unsigned WriteIndex = 0;
  656. Idx = 0U;
  657. for (const WriteDescriptor &WD : D.Writes) {
  658. RegID = WD.isImplicitWrite() ? WD.RegisterID
  659. : MCI.getOperand(WD.OpIndex).getReg();
  660. // Check if this is a optional definition that references NoReg.
  661. if (WD.IsOptionalDef && !RegID) {
  662. ++WriteIndex;
  663. continue;
  664. }
  665. assert(RegID && "Expected a valid register ID!");
  666. if (IsInstRecycled && Idx < NewIS->getDefs().size()) {
  667. NewIS->getDefs()[Idx++] =
  668. WriteState(WD, RegID,
  669. /* ClearsSuperRegs */ WriteMask[WriteIndex],
  670. /* WritesZero */ IsZeroIdiom);
  671. } else {
  672. NewIS->getDefs().emplace_back(WD, RegID,
  673. /* ClearsSuperRegs */ WriteMask[WriteIndex],
  674. /* WritesZero */ IsZeroIdiom);
  675. ++Idx;
  676. }
  677. ++WriteIndex;
  678. }
  679. if (IsInstRecycled && Idx < NewIS->getDefs().size())
  680. NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx);
  681. if (IsInstRecycled)
  682. return llvm::make_error<RecycledInstErr>(NewIS);
  683. else
  684. return std::move(CreatedIS);
  685. }
  686. } // namespace mca
  687. } // namespace llvm