PPCHazardRecognizers.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. //===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements hazard recognizers for scheduling on PowerPC processors.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "PPCHazardRecognizers.h"
  13. #include "PPCInstrInfo.h"
  14. #include "PPCSubtarget.h"
  15. #include "llvm/CodeGen/ScheduleDAG.h"
  16. #include "llvm/Support/Debug.h"
  17. #include "llvm/Support/ErrorHandling.h"
  18. #include "llvm/Support/raw_ostream.h"
  19. using namespace llvm;
  20. #define DEBUG_TYPE "pre-RA-sched"
  21. bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {
  22. // FIXME: Move this.
  23. if (isBCTRAfterSet(SU))
  24. return true;
  25. const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
  26. if (!MCID)
  27. return false;
  28. if (!MCID->mayLoad())
  29. return false;
  30. // SU is a load; for any predecessors in this dispatch group, that are stores,
  31. // and with which we have an ordering dependency, return true.
  32. for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
  33. const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
  34. if (!PredMCID || !PredMCID->mayStore())
  35. continue;
  36. if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())
  37. continue;
  38. for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
  39. if (SU->Preds[i].getSUnit() == CurGroup[j])
  40. return true;
  41. }
  42. return false;
  43. }
  44. bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {
  45. const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
  46. if (!MCID)
  47. return false;
  48. if (!MCID->isBranch())
  49. return false;
  50. // SU is a branch; for any predecessors in this dispatch group, with which we
  51. // have a data dependence and set the counter register, return true.
  52. for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {
  53. const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());
  54. if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)
  55. continue;
  56. if (SU->Preds[i].isCtrl())
  57. continue;
  58. for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)
  59. if (SU->Preds[i].getSUnit() == CurGroup[j])
  60. return true;
  61. }
  62. return false;
  63. }
  64. // FIXME: Remove this when we don't need this:
  65. namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } }
  66. // FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.
  67. bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,
  68. unsigned &NSlots) {
  69. // FIXME: Indirectly, this information is contained in the itinerary, and
  70. // we should derive it from there instead of separately specifying it
  71. // here.
  72. unsigned IIC = MCID->getSchedClass();
  73. switch (IIC) {
  74. default:
  75. NSlots = 1;
  76. break;
  77. case PPC::Sched::IIC_IntDivW:
  78. case PPC::Sched::IIC_IntDivD:
  79. case PPC::Sched::IIC_LdStLoadUpd:
  80. case PPC::Sched::IIC_LdStLDU:
  81. case PPC::Sched::IIC_LdStLFDU:
  82. case PPC::Sched::IIC_LdStLFDUX:
  83. case PPC::Sched::IIC_LdStLHA:
  84. case PPC::Sched::IIC_LdStLHAU:
  85. case PPC::Sched::IIC_LdStLWA:
  86. case PPC::Sched::IIC_LdStSTU:
  87. case PPC::Sched::IIC_LdStSTFDU:
  88. NSlots = 2;
  89. break;
  90. case PPC::Sched::IIC_LdStLoadUpdX:
  91. case PPC::Sched::IIC_LdStLDUX:
  92. case PPC::Sched::IIC_LdStLHAUX:
  93. case PPC::Sched::IIC_LdStLWARX:
  94. case PPC::Sched::IIC_LdStLDARX:
  95. case PPC::Sched::IIC_LdStSTUX:
  96. case PPC::Sched::IIC_LdStSTDCX:
  97. case PPC::Sched::IIC_LdStSTWCX:
  98. case PPC::Sched::IIC_BrMCRX: // mtcr
  99. // FIXME: Add sync/isync (here and in the itinerary).
  100. NSlots = 4;
  101. break;
  102. }
  103. // FIXME: record-form instructions need a different itinerary class.
  104. if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)
  105. NSlots = 2;
  106. switch (IIC) {
  107. default:
  108. // All multi-slot instructions must come first.
  109. return NSlots > 1;
  110. case PPC::Sched::IIC_BrCR: // cr logicals
  111. case PPC::Sched::IIC_SprMFCR:
  112. case PPC::Sched::IIC_SprMFCRF:
  113. case PPC::Sched::IIC_SprMTSPR:
  114. return true;
  115. }
  116. }
  117. ScheduleHazardRecognizer::HazardType
  118. PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
  119. if (Stalls == 0 && isLoadAfterStore(SU))
  120. return NoopHazard;
  121. return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
  122. }
  123. bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
  124. const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
  125. unsigned NSlots;
  126. if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)
  127. return true;
  128. return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);
  129. }
  130. unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {
  131. // We only need to fill out a maximum of 5 slots here: The 6th slot could
  132. // only be a second branch, and otherwise the next instruction will start a
  133. // new group.
  134. if (isLoadAfterStore(SU) && CurSlots < 6) {
  135. unsigned Directive =
  136. DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
  137. // If we're using a special group-terminating nop, then we need only one.
  138. // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
  139. if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
  140. Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9)
  141. return 1;
  142. return 5 - CurSlots;
  143. }
  144. return ScoreboardHazardRecognizer::PreEmitNoops(SU);
  145. }
  146. void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {
  147. const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
  148. if (MCID) {
  149. if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {
  150. CurGroup.clear();
  151. CurSlots = CurBranches = 0;
  152. } else {
  153. LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: ");
  154. LLVM_DEBUG(DAG->dumpNode(*SU));
  155. unsigned NSlots;
  156. bool MustBeFirst = mustComeFirst(MCID, NSlots);
  157. // If this instruction must come first, but does not, then it starts a
  158. // new group.
  159. if (MustBeFirst && CurSlots) {
  160. CurSlots = CurBranches = 0;
  161. CurGroup.clear();
  162. }
  163. CurSlots += NSlots;
  164. CurGroup.push_back(SU);
  165. if (MCID->isBranch())
  166. ++CurBranches;
  167. }
  168. }
  169. return ScoreboardHazardRecognizer::EmitInstruction(SU);
  170. }
  171. void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {
  172. return ScoreboardHazardRecognizer::AdvanceCycle();
  173. }
  174. void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {
  175. llvm_unreachable("Bottom-up scheduling not supported");
  176. }
  177. void PPCDispatchGroupSBHazardRecognizer::Reset() {
  178. CurGroup.clear();
  179. CurSlots = CurBranches = 0;
  180. return ScoreboardHazardRecognizer::Reset();
  181. }
  182. void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
  183. unsigned Directive =
  184. DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();
  185. // If the group has now filled all of its slots, or if we're using a special
  186. // group-terminating nop, the group is complete.
  187. // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
  188. if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
  189. Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9 ||
  190. CurSlots == 6) {
  191. CurGroup.clear();
  192. CurSlots = CurBranches = 0;
  193. } else {
  194. CurGroup.push_back(nullptr);
  195. ++CurSlots;
  196. }
  197. }
  198. //===----------------------------------------------------------------------===//
  199. // PowerPC 970 Hazard Recognizer
  200. //
  201. // This models the dispatch group formation of the PPC970 processor. Dispatch
  202. // groups are bundles of up to five instructions that can contain various mixes
  203. // of instructions. The PPC970 can dispatch a peak of 4 non-branch and one
  204. // branch instruction per-cycle.
  205. //
  206. // There are a number of restrictions to dispatch group formation: some
  207. // instructions can only be issued in the first slot of a dispatch group, & some
  208. // instructions fill an entire dispatch group. Additionally, only branches can
  209. // issue in the 5th (last) slot.
  210. //
  211. // Finally, there are a number of "structural" hazards on the PPC970. These
  212. // conditions cause large performance penalties due to misprediction, recovery,
  213. // and replay logic that has to happen. These cases include setting a CTR and
  214. // branching through it in the same dispatch group, and storing to an address,
  215. // then loading from the same address within a dispatch group. To avoid these
  216. // conditions, we insert no-op instructions when appropriate.
  217. //
  218. // FIXME: This is missing some significant cases:
  219. // 1. Modeling of microcoded instructions.
  220. // 2. Handling of serialized operations.
  221. // 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
  222. //
  223. PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG)
  224. : DAG(DAG) {
  225. EndDispatchGroup();
  226. }
  227. void PPCHazardRecognizer970::EndDispatchGroup() {
  228. LLVM_DEBUG(errs() << "=== Start of dispatch group\n");
  229. NumIssued = 0;
  230. // Structural hazard info.
  231. HasCTRSet = false;
  232. NumStores = 0;
  233. }
  234. PPCII::PPC970_Unit
  235. PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
  236. bool &isFirst, bool &isSingle,
  237. bool &isCracked,
  238. bool &isLoad, bool &isStore) {
  239. const MCInstrDesc &MCID = DAG.TII->get(Opcode);
  240. isLoad = MCID.mayLoad();
  241. isStore = MCID.mayStore();
  242. uint64_t TSFlags = MCID.TSFlags;
  243. isFirst = TSFlags & PPCII::PPC970_First;
  244. isSingle = TSFlags & PPCII::PPC970_Single;
  245. isCracked = TSFlags & PPCII::PPC970_Cracked;
  246. return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);
  247. }
  248. /// isLoadOfStoredAddress - If we have a load from the previously stored pointer
  249. /// as indicated by StorePtr1/StorePtr2/StoreSize, return true.
  250. bool PPCHazardRecognizer970::
  251. isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,
  252. const Value *LoadValue) const {
  253. for (unsigned i = 0, e = NumStores; i != e; ++i) {
  254. // Handle exact and commuted addresses.
  255. if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])
  256. return true;
  257. // Okay, we don't have an exact match, if this is an indexed offset, see if
  258. // we have overlap (which happens during fp->int conversion for example).
  259. if (StoreValue[i] == LoadValue) {
  260. // Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check
  261. // to see if the load and store actually overlap.
  262. if (StoreOffset[i] < LoadOffset) {
  263. if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;
  264. } else {
  265. if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;
  266. }
  267. }
  268. }
  269. return false;
  270. }
  271. /// getHazardType - We return hazard for any non-branch instruction that would
  272. /// terminate the dispatch group. We turn NoopHazard for any
  273. /// instructions that wouldn't terminate the dispatch group that would cause a
  274. /// pipeline flush.
  275. ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::
  276. getHazardType(SUnit *SU, int Stalls) {
  277. assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");
  278. MachineInstr *MI = SU->getInstr();
  279. if (MI->isDebugInstr())
  280. return NoHazard;
  281. unsigned Opcode = MI->getOpcode();
  282. bool isFirst, isSingle, isCracked, isLoad, isStore;
  283. PPCII::PPC970_Unit InstrType =
  284. GetInstrType(Opcode, isFirst, isSingle, isCracked,
  285. isLoad, isStore);
  286. if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;
  287. // We can only issue a PPC970_First/PPC970_Single instruction (such as
  288. // crand/mtspr/etc) if this is the first cycle of the dispatch group.
  289. if (NumIssued != 0 && (isFirst || isSingle))
  290. return Hazard;
  291. // If this instruction is cracked into two ops by the decoder, we know that
  292. // it is not a branch and that it cannot issue if 3 other instructions are
  293. // already in the dispatch group.
  294. if (isCracked && NumIssued > 2)
  295. return Hazard;
  296. switch (InstrType) {
  297. default: llvm_unreachable("Unknown instruction type!");
  298. case PPCII::PPC970_FXU:
  299. case PPCII::PPC970_LSU:
  300. case PPCII::PPC970_FPU:
  301. case PPCII::PPC970_VALU:
  302. case PPCII::PPC970_VPERM:
  303. // We can only issue a branch as the last instruction in a group.
  304. if (NumIssued == 4) return Hazard;
  305. break;
  306. case PPCII::PPC970_CRU:
  307. // We can only issue a CR instruction in the first two slots.
  308. if (NumIssued >= 2) return Hazard;
  309. break;
  310. case PPCII::PPC970_BRU:
  311. break;
  312. }
  313. // Do not allow MTCTR and BCTRL to be in the same dispatch group.
  314. if (HasCTRSet && Opcode == PPC::BCTRL)
  315. return NoopHazard;
  316. // If this is a load following a store, make sure it's not to the same or
  317. // overlapping address.
  318. if (isLoad && NumStores && !MI->memoperands_empty()) {
  319. MachineMemOperand *MO = *MI->memoperands_begin();
  320. if (isLoadOfStoredAddress(MO->getSize(),
  321. MO->getOffset(), MO->getValue()))
  322. return NoopHazard;
  323. }
  324. return NoHazard;
  325. }
  326. void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
  327. MachineInstr *MI = SU->getInstr();
  328. if (MI->isDebugInstr())
  329. return;
  330. unsigned Opcode = MI->getOpcode();
  331. bool isFirst, isSingle, isCracked, isLoad, isStore;
  332. PPCII::PPC970_Unit InstrType =
  333. GetInstrType(Opcode, isFirst, isSingle, isCracked,
  334. isLoad, isStore);
  335. if (InstrType == PPCII::PPC970_Pseudo) return;
  336. // Update structural hazard information.
  337. if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;
  338. // Track the address stored to.
  339. if (isStore && NumStores < 4 && !MI->memoperands_empty()) {
  340. MachineMemOperand *MO = *MI->memoperands_begin();
  341. StoreSize[NumStores] = MO->getSize();
  342. StoreOffset[NumStores] = MO->getOffset();
  343. StoreValue[NumStores] = MO->getValue();
  344. ++NumStores;
  345. }
  346. if (InstrType == PPCII::PPC970_BRU || isSingle)
  347. NumIssued = 4; // Terminate a d-group.
  348. ++NumIssued;
  349. // If this instruction is cracked into two ops by the decoder, remember that
  350. // we issued two pieces.
  351. if (isCracked)
  352. ++NumIssued;
  353. if (NumIssued == 5)
  354. EndDispatchGroup();
  355. }
  356. void PPCHazardRecognizer970::AdvanceCycle() {
  357. assert(NumIssued < 5 && "Illegal dispatch group!");
  358. ++NumIssued;
  359. if (NumIssued == 5)
  360. EndDispatchGroup();
  361. }
  362. void PPCHazardRecognizer970::Reset() {
  363. EndDispatchGroup();
  364. }