ARMScheduleM4.td 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. //==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the SchedRead/Write data for the ARM Cortex-M4 processor.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. def CortexM4Model : SchedMachineModel {
  13. let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue
  14. let MicroOpBufferSize = 0; // In-order
  15. let LoadLatency = 2; // Latency when not pipelined, not pc-relative
  16. let MispredictPenalty = 2; // Best case branch taken cost
  17. let PostRAScheduler = 1;
  18. let CompleteModel = 0;
  19. let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasZCZ, HasMVEInt,
  20. IsNotMClass, HasDPVFP, HasFPARMv8, HasFullFP16, Has8MSecExt, HasV8,
  21. HasV8_3a, HasTrustZone, HasDFB, IsWindows];
  22. }
  23. // We model the entire cpu as a single pipeline with a BufferSize = 0 since
  24. // Cortex-M4 is in-order.
  25. def M4Unit : ProcResource<1> { let BufferSize = 0; }
  26. let SchedModel = CortexM4Model in {
  27. // Some definitions of latencies we apply to different instructions
  28. class M4UnitL1<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 1; }
  29. class M4UnitL2<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 2; }
  30. class M4UnitL3<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 3; }
  31. class M4UnitL14<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 14; }
  32. def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; }
  33. def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; }
  34. class M4UnitL1I<dag instr> : InstRW<[M4UnitL1_wr], instr>;
  35. class M4UnitL2I<dag instr> : InstRW<[M4UnitL2_wr], instr>;
  36. // Loads, MAC's and DIV all get a higher latency of 2
  37. def : M4UnitL2<WriteLd>;
  38. def : M4UnitL2<WriteMAC32>;
  39. def : M4UnitL2<WriteMAC64Hi>;
  40. def : M4UnitL2<WriteMAC64Lo>;
  41. def : M4UnitL2<WriteMAC16>;
  42. def : M4UnitL2<WriteDIV>;
  43. def : M4UnitL2I<(instregex "(t|t2)LDM")>;
  44. def : M4UnitL2I<(instregex "(t|t2)LDR")>;
  45. // Stores we use a latency of 1 as they have no outputs
  46. def : M4UnitL1<WriteST>;
  47. def : M4UnitL1I<(instregex "(t|t2)STM")>;
  48. // Everything else has a Latency of 1
  49. def : M4UnitL1<WriteALU>;
  50. def : M4UnitL1<WriteALUsi>;
  51. def : M4UnitL1<WriteALUsr>;
  52. def : M4UnitL1<WriteALUSsr>;
  53. def : M4UnitL1<WriteBr>;
  54. def : M4UnitL1<WriteBrL>;
  55. def : M4UnitL1<WriteBrTbl>;
  56. def : M4UnitL1<WriteCMPsi>;
  57. def : M4UnitL1<WriteCMPsr>;
  58. def : M4UnitL1<WriteCMP>;
  59. def : M4UnitL1<WriteMUL32>;
  60. def : M4UnitL1<WriteMUL64Hi>;
  61. def : M4UnitL1<WriteMUL64Lo>;
  62. def : M4UnitL1<WriteMUL16>;
  63. def : M4UnitL1<WriteNoop>;
  64. def : M4UnitL1<WritePreLd>;
  65. def : M4UnitL1I<(instregex "(t|t2)MOV")>;
  66. def : M4UnitL1I<(instrs COPY)>;
  67. def : M4UnitL1I<(instregex "t2IT", "t2MSR", "t2MRS")>;
  68. def : M4UnitL1I<(instregex "t2CLREX")>;
  69. def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", "t2SML[AS]",
  70. "t2(S|Q|SH|U|UQ|UH|QD)(ADD|ASX|SAX|SUB)", "t2USADA8", "(t|t2)REV")>;
  71. // These instructions are not of much interest to scheduling as they will not
  72. // be generated or it is not very useful to schedule them. They are here to make
  73. // the model more complete.
  74. def : M4UnitL1I<(instregex "t2CDP", "t2LDC", "t2MCR", "t2MRC", "t2MRRC", "t2STC")>;
  75. def : M4UnitL1I<(instregex "tCPS", "t2ISB", "t2DSB", "t2DMB", "t2?HINT$")>;
  76. def : M4UnitL1I<(instregex "t2?UDF$", "tBKPT", "t2DBG")>;
  77. def : M4UnitL1I<(instregex "t?2?Int_eh_sjlj_", "tADDframe", "t?ADJCALL")>;
  78. def : M4UnitL1I<(instregex "CMP_SWAP", "JUMPTABLE", "MEMCPY")>;
  79. def : M4UnitL1I<(instregex "VSETLNi32", "VGETLNi32")>;
  80. def : ReadAdvance<ReadALU, 0>;
  81. def : ReadAdvance<ReadALUsr, 0>;
  82. def : ReadAdvance<ReadMUL, 0>;
  83. def : ReadAdvance<ReadMAC, 0>;
  84. // Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's.
  85. // Loads still take 2 cycles.
  86. def : M4UnitL1<WriteFPCVT>;
  87. def : M4UnitL1<WriteFPMOV>;
  88. def : M4UnitL1<WriteFPALU32>;
  89. def : M4UnitL1<WriteFPALU64>;
  90. def : M4UnitL1<WriteFPMUL32>;
  91. def : M4UnitL1<WriteFPMUL64>;
  92. def : M4UnitL2I<(instregex "VLD")>;
  93. def : M4UnitL1I<(instregex "VST")>;
  94. def : M4UnitL3<WriteFPMAC32>;
  95. def : M4UnitL3<WriteFPMAC64>;
  96. def : M4UnitL14<WriteFPDIV32>;
  97. def : M4UnitL14<WriteFPDIV64>;
  98. def : M4UnitL14<WriteFPSQRT32>;
  99. def : M4UnitL14<WriteFPSQRT64>;
  100. def : M4UnitL1<WriteVLD1>;
  101. def : M4UnitL1<WriteVLD2>;
  102. def : M4UnitL1<WriteVLD3>;
  103. def : M4UnitL1<WriteVLD4>;
  104. def : M4UnitL1<WriteVST1>;
  105. def : M4UnitL1<WriteVST2>;
  106. def : M4UnitL1<WriteVST3>;
  107. def : M4UnitL1<WriteVST4>;
  108. def : M4UnitL1I<(instregex "VMOVS", "FCONSTS", "VCMP", "VNEG", "VABS")>;
  109. def : M4UnitL2I<(instregex "VMOVD")>;
  110. def : M4UnitL1I<(instregex "VMRS", "VMSR", "FMSTAT")>;
  111. def : ReadAdvance<ReadFPMUL, 0>;
  112. def : ReadAdvance<ReadFPMAC, 0>;
  113. }