AArch64MacroFusion.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. //===- AArch64MacroFusion.cpp - AArch64 Macro Fusion ----------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file This file contains the AArch64 implementation of the DAG scheduling
  10. /// mutation to pair instructions back to back.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "AArch64MacroFusion.h"
  14. #include "AArch64Subtarget.h"
  15. #include "llvm/CodeGen/MacroFusion.h"
  16. #include "llvm/CodeGen/TargetInstrInfo.h"
  17. using namespace llvm;
  18. /// CMN, CMP, TST followed by Bcc
  19. static bool isArithmeticBccPair(const MachineInstr *FirstMI,
  20. const MachineInstr &SecondMI, bool CmpOnly) {
  21. if (SecondMI.getOpcode() != AArch64::Bcc)
  22. return false;
  23. // Assume the 1st instr to be a wildcard if it is unspecified.
  24. if (FirstMI == nullptr)
  25. return true;
  26. // If we're in CmpOnly mode, we only fuse arithmetic instructions that
  27. // discard their result.
  28. if (CmpOnly && !(FirstMI->getOperand(0).getReg() == AArch64::XZR ||
  29. FirstMI->getOperand(0).getReg() == AArch64::WZR)) {
  30. return false;
  31. }
  32. switch (FirstMI->getOpcode()) {
  33. case AArch64::ADDSWri:
  34. case AArch64::ADDSWrr:
  35. case AArch64::ADDSXri:
  36. case AArch64::ADDSXrr:
  37. case AArch64::ANDSWri:
  38. case AArch64::ANDSWrr:
  39. case AArch64::ANDSXri:
  40. case AArch64::ANDSXrr:
  41. case AArch64::SUBSWri:
  42. case AArch64::SUBSWrr:
  43. case AArch64::SUBSXri:
  44. case AArch64::SUBSXrr:
  45. case AArch64::BICSWrr:
  46. case AArch64::BICSXrr:
  47. return true;
  48. case AArch64::ADDSWrs:
  49. case AArch64::ADDSXrs:
  50. case AArch64::ANDSWrs:
  51. case AArch64::ANDSXrs:
  52. case AArch64::SUBSWrs:
  53. case AArch64::SUBSXrs:
  54. case AArch64::BICSWrs:
  55. case AArch64::BICSXrs:
  56. // Shift value can be 0 making these behave like the "rr" variant...
  57. return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
  58. }
  59. return false;
  60. }
  61. /// ALU operations followed by CBZ/CBNZ.
  62. static bool isArithmeticCbzPair(const MachineInstr *FirstMI,
  63. const MachineInstr &SecondMI) {
  64. if (SecondMI.getOpcode() != AArch64::CBZW &&
  65. SecondMI.getOpcode() != AArch64::CBZX &&
  66. SecondMI.getOpcode() != AArch64::CBNZW &&
  67. SecondMI.getOpcode() != AArch64::CBNZX)
  68. return false;
  69. // Assume the 1st instr to be a wildcard if it is unspecified.
  70. if (FirstMI == nullptr)
  71. return true;
  72. switch (FirstMI->getOpcode()) {
  73. case AArch64::ADDWri:
  74. case AArch64::ADDWrr:
  75. case AArch64::ADDXri:
  76. case AArch64::ADDXrr:
  77. case AArch64::ANDWri:
  78. case AArch64::ANDWrr:
  79. case AArch64::ANDXri:
  80. case AArch64::ANDXrr:
  81. case AArch64::EORWri:
  82. case AArch64::EORWrr:
  83. case AArch64::EORXri:
  84. case AArch64::EORXrr:
  85. case AArch64::ORRWri:
  86. case AArch64::ORRWrr:
  87. case AArch64::ORRXri:
  88. case AArch64::ORRXrr:
  89. case AArch64::SUBWri:
  90. case AArch64::SUBWrr:
  91. case AArch64::SUBXri:
  92. case AArch64::SUBXrr:
  93. return true;
  94. case AArch64::ADDWrs:
  95. case AArch64::ADDXrs:
  96. case AArch64::ANDWrs:
  97. case AArch64::ANDXrs:
  98. case AArch64::SUBWrs:
  99. case AArch64::SUBXrs:
  100. case AArch64::BICWrs:
  101. case AArch64::BICXrs:
  102. // Shift value can be 0 making these behave like the "rr" variant...
  103. return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
  104. }
  105. return false;
  106. }
  107. /// AES crypto encoding or decoding.
  108. static bool isAESPair(const MachineInstr *FirstMI,
  109. const MachineInstr &SecondMI) {
  110. // Assume the 1st instr to be a wildcard if it is unspecified.
  111. switch (SecondMI.getOpcode()) {
  112. // AES encode.
  113. case AArch64::AESMCrr:
  114. case AArch64::AESMCrrTied:
  115. return FirstMI == nullptr || FirstMI->getOpcode() == AArch64::AESErr;
  116. // AES decode.
  117. case AArch64::AESIMCrr:
  118. case AArch64::AESIMCrrTied:
  119. return FirstMI == nullptr || FirstMI->getOpcode() == AArch64::AESDrr;
  120. }
  121. return false;
  122. }
  123. /// AESE/AESD/PMULL + EOR.
  124. static bool isCryptoEORPair(const MachineInstr *FirstMI,
  125. const MachineInstr &SecondMI) {
  126. if (SecondMI.getOpcode() != AArch64::EORv16i8)
  127. return false;
  128. // Assume the 1st instr to be a wildcard if it is unspecified.
  129. if (FirstMI == nullptr)
  130. return true;
  131. switch (FirstMI->getOpcode()) {
  132. case AArch64::AESErr:
  133. case AArch64::AESDrr:
  134. case AArch64::PMULLv16i8:
  135. case AArch64::PMULLv8i8:
  136. case AArch64::PMULLv1i64:
  137. case AArch64::PMULLv2i64:
  138. return true;
  139. }
  140. return false;
  141. }
  142. /// Literal generation.
  143. static bool isLiteralsPair(const MachineInstr *FirstMI,
  144. const MachineInstr &SecondMI) {
  145. // Assume the 1st instr to be a wildcard if it is unspecified.
  146. // PC relative address.
  147. if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::ADRP) &&
  148. SecondMI.getOpcode() == AArch64::ADDXri)
  149. return true;
  150. // 32 bit immediate.
  151. if ((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZWi) &&
  152. (SecondMI.getOpcode() == AArch64::MOVKWi &&
  153. SecondMI.getOperand(3).getImm() == 16))
  154. return true;
  155. // Lower half of 64 bit immediate.
  156. if((FirstMI == nullptr || FirstMI->getOpcode() == AArch64::MOVZXi) &&
  157. (SecondMI.getOpcode() == AArch64::MOVKXi &&
  158. SecondMI.getOperand(3).getImm() == 16))
  159. return true;
  160. // Upper half of 64 bit immediate.
  161. if ((FirstMI == nullptr ||
  162. (FirstMI->getOpcode() == AArch64::MOVKXi &&
  163. FirstMI->getOperand(3).getImm() == 32)) &&
  164. (SecondMI.getOpcode() == AArch64::MOVKXi &&
  165. SecondMI.getOperand(3).getImm() == 48))
  166. return true;
  167. return false;
  168. }
  169. /// Fuse address generation and loads or stores.
  170. static bool isAddressLdStPair(const MachineInstr *FirstMI,
  171. const MachineInstr &SecondMI) {
  172. switch (SecondMI.getOpcode()) {
  173. case AArch64::STRBBui:
  174. case AArch64::STRBui:
  175. case AArch64::STRDui:
  176. case AArch64::STRHHui:
  177. case AArch64::STRHui:
  178. case AArch64::STRQui:
  179. case AArch64::STRSui:
  180. case AArch64::STRWui:
  181. case AArch64::STRXui:
  182. case AArch64::LDRBBui:
  183. case AArch64::LDRBui:
  184. case AArch64::LDRDui:
  185. case AArch64::LDRHHui:
  186. case AArch64::LDRHui:
  187. case AArch64::LDRQui:
  188. case AArch64::LDRSui:
  189. case AArch64::LDRWui:
  190. case AArch64::LDRXui:
  191. case AArch64::LDRSBWui:
  192. case AArch64::LDRSBXui:
  193. case AArch64::LDRSHWui:
  194. case AArch64::LDRSHXui:
  195. case AArch64::LDRSWui:
  196. // Assume the 1st instr to be a wildcard if it is unspecified.
  197. if (FirstMI == nullptr)
  198. return true;
  199. switch (FirstMI->getOpcode()) {
  200. case AArch64::ADR:
  201. return SecondMI.getOperand(2).getImm() == 0;
  202. case AArch64::ADRP:
  203. return true;
  204. }
  205. }
  206. return false;
  207. }
  208. /// Compare and conditional select.
  209. static bool isCCSelectPair(const MachineInstr *FirstMI,
  210. const MachineInstr &SecondMI) {
  211. // 32 bits
  212. if (SecondMI.getOpcode() == AArch64::CSELWr) {
  213. // Assume the 1st instr to be a wildcard if it is unspecified.
  214. if (FirstMI == nullptr)
  215. return true;
  216. if (FirstMI->definesRegister(AArch64::WZR))
  217. switch (FirstMI->getOpcode()) {
  218. case AArch64::SUBSWrs:
  219. return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
  220. case AArch64::SUBSWrx:
  221. return !AArch64InstrInfo::hasExtendedReg(*FirstMI);
  222. case AArch64::SUBSWrr:
  223. case AArch64::SUBSWri:
  224. return true;
  225. }
  226. }
  227. // 64 bits
  228. if (SecondMI.getOpcode() == AArch64::CSELXr) {
  229. // Assume the 1st instr to be a wildcard if it is unspecified.
  230. if (FirstMI == nullptr)
  231. return true;
  232. if (FirstMI->definesRegister(AArch64::XZR))
  233. switch (FirstMI->getOpcode()) {
  234. case AArch64::SUBSXrs:
  235. return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
  236. case AArch64::SUBSXrx:
  237. case AArch64::SUBSXrx64:
  238. return !AArch64InstrInfo::hasExtendedReg(*FirstMI);
  239. case AArch64::SUBSXrr:
  240. case AArch64::SUBSXri:
  241. return true;
  242. }
  243. }
  244. return false;
  245. }
  246. // Arithmetic and logic.
  247. static bool isArithmeticLogicPair(const MachineInstr *FirstMI,
  248. const MachineInstr &SecondMI) {
  249. if (AArch64InstrInfo::hasShiftedReg(SecondMI))
  250. return false;
  251. switch (SecondMI.getOpcode()) {
  252. // Arithmetic
  253. case AArch64::ADDWrr:
  254. case AArch64::ADDXrr:
  255. case AArch64::SUBWrr:
  256. case AArch64::SUBXrr:
  257. case AArch64::ADDWrs:
  258. case AArch64::ADDXrs:
  259. case AArch64::SUBWrs:
  260. case AArch64::SUBXrs:
  261. // Logic
  262. case AArch64::ANDWrr:
  263. case AArch64::ANDXrr:
  264. case AArch64::BICWrr:
  265. case AArch64::BICXrr:
  266. case AArch64::EONWrr:
  267. case AArch64::EONXrr:
  268. case AArch64::EORWrr:
  269. case AArch64::EORXrr:
  270. case AArch64::ORNWrr:
  271. case AArch64::ORNXrr:
  272. case AArch64::ORRWrr:
  273. case AArch64::ORRXrr:
  274. case AArch64::ANDWrs:
  275. case AArch64::ANDXrs:
  276. case AArch64::BICWrs:
  277. case AArch64::BICXrs:
  278. case AArch64::EONWrs:
  279. case AArch64::EONXrs:
  280. case AArch64::EORWrs:
  281. case AArch64::EORXrs:
  282. case AArch64::ORNWrs:
  283. case AArch64::ORNXrs:
  284. case AArch64::ORRWrs:
  285. case AArch64::ORRXrs:
  286. // Assume the 1st instr to be a wildcard if it is unspecified.
  287. if (FirstMI == nullptr)
  288. return true;
  289. // Arithmetic
  290. switch (FirstMI->getOpcode()) {
  291. case AArch64::ADDWrr:
  292. case AArch64::ADDXrr:
  293. case AArch64::ADDSWrr:
  294. case AArch64::ADDSXrr:
  295. case AArch64::SUBWrr:
  296. case AArch64::SUBXrr:
  297. case AArch64::SUBSWrr:
  298. case AArch64::SUBSXrr:
  299. return true;
  300. case AArch64::ADDWrs:
  301. case AArch64::ADDXrs:
  302. case AArch64::ADDSWrs:
  303. case AArch64::ADDSXrs:
  304. case AArch64::SUBWrs:
  305. case AArch64::SUBXrs:
  306. case AArch64::SUBSWrs:
  307. case AArch64::SUBSXrs:
  308. return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
  309. }
  310. break;
  311. // Arithmetic, setting flags.
  312. case AArch64::ADDSWrr:
  313. case AArch64::ADDSXrr:
  314. case AArch64::SUBSWrr:
  315. case AArch64::SUBSXrr:
  316. case AArch64::ADDSWrs:
  317. case AArch64::ADDSXrs:
  318. case AArch64::SUBSWrs:
  319. case AArch64::SUBSXrs:
  320. // Assume the 1st instr to be a wildcard if it is unspecified.
  321. if (FirstMI == nullptr)
  322. return true;
  323. // Arithmetic, not setting flags.
  324. switch (FirstMI->getOpcode()) {
  325. case AArch64::ADDWrr:
  326. case AArch64::ADDXrr:
  327. case AArch64::SUBWrr:
  328. case AArch64::SUBXrr:
  329. return true;
  330. case AArch64::ADDWrs:
  331. case AArch64::ADDXrs:
  332. case AArch64::SUBWrs:
  333. case AArch64::SUBXrs:
  334. return !AArch64InstrInfo::hasShiftedReg(*FirstMI);
  335. }
  336. break;
  337. }
  338. return false;
  339. }
  340. /// \brief Check if the instr pair, FirstMI and SecondMI, should be fused
  341. /// together. Given SecondMI, when FirstMI is unspecified, then check if
  342. /// SecondMI may be part of a fused pair at all.
  343. static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
  344. const TargetSubtargetInfo &TSI,
  345. const MachineInstr *FirstMI,
  346. const MachineInstr &SecondMI) {
  347. const AArch64Subtarget &ST = static_cast<const AArch64Subtarget&>(TSI);
  348. // All checking functions assume that the 1st instr is a wildcard if it is
  349. // unspecified.
  350. if (ST.hasCmpBccFusion() || ST.hasArithmeticBccFusion()) {
  351. bool CmpOnly = !ST.hasArithmeticBccFusion();
  352. if (isArithmeticBccPair(FirstMI, SecondMI, CmpOnly))
  353. return true;
  354. }
  355. if (ST.hasArithmeticCbzFusion() && isArithmeticCbzPair(FirstMI, SecondMI))
  356. return true;
  357. if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
  358. return true;
  359. if (ST.hasFuseCryptoEOR() && isCryptoEORPair(FirstMI, SecondMI))
  360. return true;
  361. if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI))
  362. return true;
  363. if (ST.hasFuseAddress() && isAddressLdStPair(FirstMI, SecondMI))
  364. return true;
  365. if (ST.hasFuseCCSelect() && isCCSelectPair(FirstMI, SecondMI))
  366. return true;
  367. if (ST.hasFuseArithmeticLogic() && isArithmeticLogicPair(FirstMI, SecondMI))
  368. return true;
  369. return false;
  370. }
  371. std::unique_ptr<ScheduleDAGMutation>
  372. llvm::createAArch64MacroFusionDAGMutation() {
  373. return createMacroFusionDAGMutation(shouldScheduleAdjacent);
  374. }