BPFISelLowering.cpp 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882
  1. //===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation ------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the interfaces that BPF uses to lower LLVM code into a
  10. // selection DAG.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "BPFISelLowering.h"
  14. #include "BPF.h"
  15. #include "BPFSubtarget.h"
  16. #include "BPFTargetMachine.h"
  17. #include "llvm/CodeGen/CallingConvLower.h"
  18. #include "llvm/CodeGen/MachineFrameInfo.h"
  19. #include "llvm/CodeGen/MachineFunction.h"
  20. #include "llvm/CodeGen/MachineInstrBuilder.h"
  21. #include "llvm/CodeGen/MachineRegisterInfo.h"
  22. #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  23. #include "llvm/CodeGen/ValueTypes.h"
  24. #include "llvm/IR/DiagnosticInfo.h"
  25. #include "llvm/IR/DiagnosticPrinter.h"
  26. #include "llvm/Support/Debug.h"
  27. #include "llvm/Support/ErrorHandling.h"
  28. #include "llvm/Support/raw_ostream.h"
  29. using namespace llvm;
  30. #define DEBUG_TYPE "bpf-lower"
  31. static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
  32. cl::Hidden, cl::init(false),
  33. cl::desc("Expand memcpy into load/store pairs in order"));
  34. static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg) {
  35. MachineFunction &MF = DAG.getMachineFunction();
  36. DAG.getContext()->diagnose(
  37. DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
  38. }
  39. static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg,
  40. SDValue Val) {
  41. MachineFunction &MF = DAG.getMachineFunction();
  42. std::string Str;
  43. raw_string_ostream OS(Str);
  44. OS << Msg;
  45. Val->print(OS);
  46. OS.flush();
  47. DAG.getContext()->diagnose(
  48. DiagnosticInfoUnsupported(MF.getFunction(), Str, DL.getDebugLoc()));
  49. }
  50. BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
  51. const BPFSubtarget &STI)
  52. : TargetLowering(TM) {
  53. // Set up the register classes.
  54. addRegisterClass(MVT::i64, &BPF::GPRRegClass);
  55. if (STI.getHasAlu32())
  56. addRegisterClass(MVT::i32, &BPF::GPR32RegClass);
  57. // Compute derived properties from the register classes
  58. computeRegisterProperties(STI.getRegisterInfo());
  59. setStackPointerRegisterToSaveRestore(BPF::R11);
  60. setOperationAction(ISD::BR_CC, MVT::i64, Custom);
  61. setOperationAction(ISD::BR_JT, MVT::Other, Expand);
  62. setOperationAction(ISD::BRIND, MVT::Other, Expand);
  63. setOperationAction(ISD::BRCOND, MVT::Other, Expand);
  64. setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
  65. setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
  66. setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
  67. setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
  68. // Set unsupported atomic operations as Custom so
  69. // we can emit better error messages than fatal error
  70. // from selectiondag.
  71. for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) {
  72. if (VT == MVT::i32) {
  73. if (STI.getHasAlu32())
  74. continue;
  75. } else {
  76. setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
  77. }
  78. setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
  79. setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
  80. setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
  81. setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
  82. setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
  83. }
  84. for (auto VT : { MVT::i32, MVT::i64 }) {
  85. if (VT == MVT::i32 && !STI.getHasAlu32())
  86. continue;
  87. setOperationAction(ISD::SDIVREM, VT, Expand);
  88. setOperationAction(ISD::UDIVREM, VT, Expand);
  89. setOperationAction(ISD::SREM, VT, Expand);
  90. setOperationAction(ISD::MULHU, VT, Expand);
  91. setOperationAction(ISD::MULHS, VT, Expand);
  92. setOperationAction(ISD::UMUL_LOHI, VT, Expand);
  93. setOperationAction(ISD::SMUL_LOHI, VT, Expand);
  94. setOperationAction(ISD::ROTR, VT, Expand);
  95. setOperationAction(ISD::ROTL, VT, Expand);
  96. setOperationAction(ISD::SHL_PARTS, VT, Expand);
  97. setOperationAction(ISD::SRL_PARTS, VT, Expand);
  98. setOperationAction(ISD::SRA_PARTS, VT, Expand);
  99. setOperationAction(ISD::CTPOP, VT, Expand);
  100. setOperationAction(ISD::SETCC, VT, Expand);
  101. setOperationAction(ISD::SELECT, VT, Expand);
  102. setOperationAction(ISD::SELECT_CC, VT, Custom);
  103. }
  104. if (STI.getHasAlu32()) {
  105. setOperationAction(ISD::BSWAP, MVT::i32, Promote);
  106. setOperationAction(ISD::BR_CC, MVT::i32,
  107. STI.getHasJmp32() ? Custom : Promote);
  108. }
  109. setOperationAction(ISD::CTTZ, MVT::i64, Custom);
  110. setOperationAction(ISD::CTLZ, MVT::i64, Custom);
  111. setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
  112. setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
  113. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
  114. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
  115. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
  116. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
  117. // Extended load operations for i1 types must be promoted
  118. for (MVT VT : MVT::integer_valuetypes()) {
  119. setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
  120. setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
  121. setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
  122. setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Expand);
  123. setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Expand);
  124. setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
  125. }
  126. setBooleanContents(ZeroOrOneBooleanContent);
  127. // Function alignments
  128. setMinFunctionAlignment(Align(8));
  129. setPrefFunctionAlignment(Align(8));
  130. if (BPFExpandMemcpyInOrder) {
  131. // LLVM generic code will try to expand memcpy into load/store pairs at this
  132. // stage which is before quite a few IR optimization passes, therefore the
  133. // loads and stores could potentially be moved apart from each other which
  134. // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT
  135. // compilers.
  136. //
  137. // When -bpf-expand-memcpy-in-order specified, we want to defer the expand
  138. // of memcpy to later stage in IR optimization pipeline so those load/store
  139. // pairs won't be touched and could be kept in order. Hence, we set
  140. // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores
  141. // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy.
  142. MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;
  143. MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;
  144. MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
  145. MaxLoadsPerMemcmp = 0;
  146. } else {
  147. // inline memcpy() for kernel to see explicit copy
  148. unsigned CommonMaxStores =
  149. STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc();
  150. MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
  151. MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
  152. MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
  153. MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores;
  154. }
  155. // CPU/Feature control
  156. HasAlu32 = STI.getHasAlu32();
  157. HasJmp32 = STI.getHasJmp32();
  158. HasJmpExt = STI.getHasJmpExt();
  159. }
  160. bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
  161. return false;
  162. }
  163. bool BPFTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
  164. if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
  165. return false;
  166. unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
  167. unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
  168. return NumBits1 > NumBits2;
  169. }
  170. bool BPFTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
  171. if (!VT1.isInteger() || !VT2.isInteger())
  172. return false;
  173. unsigned NumBits1 = VT1.getSizeInBits();
  174. unsigned NumBits2 = VT2.getSizeInBits();
  175. return NumBits1 > NumBits2;
  176. }
  177. bool BPFTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
  178. if (!getHasAlu32() || !Ty1->isIntegerTy() || !Ty2->isIntegerTy())
  179. return false;
  180. unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
  181. unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
  182. return NumBits1 == 32 && NumBits2 == 64;
  183. }
  184. bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
  185. if (!getHasAlu32() || !VT1.isInteger() || !VT2.isInteger())
  186. return false;
  187. unsigned NumBits1 = VT1.getSizeInBits();
  188. unsigned NumBits2 = VT2.getSizeInBits();
  189. return NumBits1 == 32 && NumBits2 == 64;
  190. }
  191. BPFTargetLowering::ConstraintType
  192. BPFTargetLowering::getConstraintType(StringRef Constraint) const {
  193. if (Constraint.size() == 1) {
  194. switch (Constraint[0]) {
  195. default:
  196. break;
  197. case 'w':
  198. return C_RegisterClass;
  199. }
  200. }
  201. return TargetLowering::getConstraintType(Constraint);
  202. }
  203. std::pair<unsigned, const TargetRegisterClass *>
  204. BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
  205. StringRef Constraint,
  206. MVT VT) const {
  207. if (Constraint.size() == 1)
  208. // GCC Constraint Letters
  209. switch (Constraint[0]) {
  210. case 'r': // GENERAL_REGS
  211. return std::make_pair(0U, &BPF::GPRRegClass);
  212. case 'w':
  213. if (HasAlu32)
  214. return std::make_pair(0U, &BPF::GPR32RegClass);
  215. break;
  216. default:
  217. break;
  218. }
  219. return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
  220. }
  221. void BPFTargetLowering::ReplaceNodeResults(
  222. SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
  223. const char *err_msg;
  224. uint32_t Opcode = N->getOpcode();
  225. switch (Opcode) {
  226. default:
  227. report_fatal_error("Unhandled custom legalization");
  228. case ISD::ATOMIC_LOAD_ADD:
  229. case ISD::ATOMIC_LOAD_AND:
  230. case ISD::ATOMIC_LOAD_OR:
  231. case ISD::ATOMIC_LOAD_XOR:
  232. case ISD::ATOMIC_SWAP:
  233. case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
  234. if (HasAlu32 || Opcode == ISD::ATOMIC_LOAD_ADD)
  235. err_msg = "Unsupported atomic operations, please use 32/64 bit version";
  236. else
  237. err_msg = "Unsupported atomic operations, please use 64 bit version";
  238. break;
  239. }
  240. SDLoc DL(N);
  241. fail(DL, DAG, err_msg);
  242. }
  243. SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
  244. switch (Op.getOpcode()) {
  245. case ISD::BR_CC:
  246. return LowerBR_CC(Op, DAG);
  247. case ISD::GlobalAddress:
  248. return LowerGlobalAddress(Op, DAG);
  249. case ISD::SELECT_CC:
  250. return LowerSELECT_CC(Op, DAG);
  251. case ISD::DYNAMIC_STACKALLOC:
  252. report_fatal_error("Unsupported dynamic stack allocation");
  253. default:
  254. llvm_unreachable("unimplemented operand");
  255. }
  256. }
  257. // Calling Convention Implementation
  258. #include "BPFGenCallingConv.inc"
  259. SDValue BPFTargetLowering::LowerFormalArguments(
  260. SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
  261. const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
  262. SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
  263. switch (CallConv) {
  264. default:
  265. report_fatal_error("Unsupported calling convention");
  266. case CallingConv::C:
  267. case CallingConv::Fast:
  268. break;
  269. }
  270. MachineFunction &MF = DAG.getMachineFunction();
  271. MachineRegisterInfo &RegInfo = MF.getRegInfo();
  272. // Assign locations to all of the incoming arguments.
  273. SmallVector<CCValAssign, 16> ArgLocs;
  274. CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
  275. CCInfo.AnalyzeFormalArguments(Ins, getHasAlu32() ? CC_BPF32 : CC_BPF64);
  276. for (auto &VA : ArgLocs) {
  277. if (VA.isRegLoc()) {
  278. // Arguments passed in registers
  279. EVT RegVT = VA.getLocVT();
  280. MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy;
  281. switch (SimpleTy) {
  282. default: {
  283. errs() << "LowerFormalArguments Unhandled argument type: "
  284. << RegVT.getEVTString() << '\n';
  285. llvm_unreachable(nullptr);
  286. }
  287. case MVT::i32:
  288. case MVT::i64:
  289. Register VReg = RegInfo.createVirtualRegister(
  290. SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass);
  291. RegInfo.addLiveIn(VA.getLocReg(), VReg);
  292. SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT);
  293. // If this is an value that has been promoted to wider types, insert an
  294. // assert[sz]ext to capture this, then truncate to the right size.
  295. if (VA.getLocInfo() == CCValAssign::SExt)
  296. ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue,
  297. DAG.getValueType(VA.getValVT()));
  298. else if (VA.getLocInfo() == CCValAssign::ZExt)
  299. ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue,
  300. DAG.getValueType(VA.getValVT()));
  301. if (VA.getLocInfo() != CCValAssign::Full)
  302. ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);
  303. InVals.push_back(ArgValue);
  304. break;
  305. }
  306. } else {
  307. fail(DL, DAG, "defined with too many args");
  308. InVals.push_back(DAG.getConstant(0, DL, VA.getLocVT()));
  309. }
  310. }
  311. if (IsVarArg || MF.getFunction().hasStructRetAttr()) {
  312. fail(DL, DAG, "functions with VarArgs or StructRet are not supported");
  313. }
  314. return Chain;
  315. }
  316. const unsigned BPFTargetLowering::MaxArgs = 5;
  317. SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
  318. SmallVectorImpl<SDValue> &InVals) const {
  319. SelectionDAG &DAG = CLI.DAG;
  320. auto &Outs = CLI.Outs;
  321. auto &OutVals = CLI.OutVals;
  322. auto &Ins = CLI.Ins;
  323. SDValue Chain = CLI.Chain;
  324. SDValue Callee = CLI.Callee;
  325. bool &IsTailCall = CLI.IsTailCall;
  326. CallingConv::ID CallConv = CLI.CallConv;
  327. bool IsVarArg = CLI.IsVarArg;
  328. MachineFunction &MF = DAG.getMachineFunction();
  329. // BPF target does not support tail call optimization.
  330. IsTailCall = false;
  331. switch (CallConv) {
  332. default:
  333. report_fatal_error("Unsupported calling convention");
  334. case CallingConv::Fast:
  335. case CallingConv::C:
  336. break;
  337. }
  338. // Analyze operands of the call, assigning locations to each operand.
  339. SmallVector<CCValAssign, 16> ArgLocs;
  340. CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
  341. CCInfo.AnalyzeCallOperands(Outs, getHasAlu32() ? CC_BPF32 : CC_BPF64);
  342. unsigned NumBytes = CCInfo.getNextStackOffset();
  343. if (Outs.size() > MaxArgs)
  344. fail(CLI.DL, DAG, "too many args to ", Callee);
  345. for (auto &Arg : Outs) {
  346. ISD::ArgFlagsTy Flags = Arg.Flags;
  347. if (!Flags.isByVal())
  348. continue;
  349. fail(CLI.DL, DAG, "pass by value not supported ", Callee);
  350. }
  351. auto PtrVT = getPointerTy(MF.getDataLayout());
  352. Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
  353. SmallVector<std::pair<unsigned, SDValue>, MaxArgs> RegsToPass;
  354. // Walk arg assignments
  355. for (unsigned i = 0,
  356. e = std::min(static_cast<unsigned>(ArgLocs.size()), MaxArgs);
  357. i != e; ++i) {
  358. CCValAssign &VA = ArgLocs[i];
  359. SDValue Arg = OutVals[i];
  360. // Promote the value if needed.
  361. switch (VA.getLocInfo()) {
  362. default:
  363. llvm_unreachable("Unknown loc info");
  364. case CCValAssign::Full:
  365. break;
  366. case CCValAssign::SExt:
  367. Arg = DAG.getNode(ISD::SIGN_EXTEND, CLI.DL, VA.getLocVT(), Arg);
  368. break;
  369. case CCValAssign::ZExt:
  370. Arg = DAG.getNode(ISD::ZERO_EXTEND, CLI.DL, VA.getLocVT(), Arg);
  371. break;
  372. case CCValAssign::AExt:
  373. Arg = DAG.getNode(ISD::ANY_EXTEND, CLI.DL, VA.getLocVT(), Arg);
  374. break;
  375. }
  376. // Push arguments into RegsToPass vector
  377. if (VA.isRegLoc())
  378. RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
  379. else
  380. llvm_unreachable("call arg pass bug");
  381. }
  382. SDValue InFlag;
  383. // Build a sequence of copy-to-reg nodes chained together with token chain and
  384. // flag operands which copy the outgoing args into registers. The InFlag in
  385. // necessary since all emitted instructions must be stuck together.
  386. for (auto &Reg : RegsToPass) {
  387. Chain = DAG.getCopyToReg(Chain, CLI.DL, Reg.first, Reg.second, InFlag);
  388. InFlag = Chain.getValue(1);
  389. }
  390. // If the callee is a GlobalAddress node (quite common, every direct call is)
  391. // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
  392. // Likewise ExternalSymbol -> TargetExternalSymbol.
  393. if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
  394. Callee = DAG.getTargetGlobalAddress(G->getGlobal(), CLI.DL, PtrVT,
  395. G->getOffset(), 0);
  396. } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
  397. Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
  398. fail(CLI.DL, DAG, Twine("A call to built-in function '"
  399. + StringRef(E->getSymbol())
  400. + "' is not supported."));
  401. }
  402. // Returns a chain & a flag for retval copy to use.
  403. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
  404. SmallVector<SDValue, 8> Ops;
  405. Ops.push_back(Chain);
  406. Ops.push_back(Callee);
  407. // Add argument registers to the end of the list so that they are
  408. // known live into the call.
  409. for (auto &Reg : RegsToPass)
  410. Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
  411. if (InFlag.getNode())
  412. Ops.push_back(InFlag);
  413. Chain = DAG.getNode(BPFISD::CALL, CLI.DL, NodeTys, Ops);
  414. InFlag = Chain.getValue(1);
  415. // Create the CALLSEQ_END node.
  416. Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InFlag, CLI.DL);
  417. InFlag = Chain.getValue(1);
  418. // Handle result values, copying them out of physregs into vregs that we
  419. // return.
  420. return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, CLI.DL, DAG,
  421. InVals);
  422. }
  423. SDValue
  424. BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
  425. bool IsVarArg,
  426. const SmallVectorImpl<ISD::OutputArg> &Outs,
  427. const SmallVectorImpl<SDValue> &OutVals,
  428. const SDLoc &DL, SelectionDAG &DAG) const {
  429. unsigned Opc = BPFISD::RET_FLAG;
  430. // CCValAssign - represent the assignment of the return value to a location
  431. SmallVector<CCValAssign, 16> RVLocs;
  432. MachineFunction &MF = DAG.getMachineFunction();
  433. // CCState - Info about the registers and stack slot.
  434. CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
  435. if (MF.getFunction().getReturnType()->isAggregateType()) {
  436. fail(DL, DAG, "only integer returns supported");
  437. return DAG.getNode(Opc, DL, MVT::Other, Chain);
  438. }
  439. // Analize return values.
  440. CCInfo.AnalyzeReturn(Outs, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
  441. SDValue Flag;
  442. SmallVector<SDValue, 4> RetOps(1, Chain);
  443. // Copy the result values into the output registers.
  444. for (unsigned i = 0; i != RVLocs.size(); ++i) {
  445. CCValAssign &VA = RVLocs[i];
  446. assert(VA.isRegLoc() && "Can only return in registers!");
  447. Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Flag);
  448. // Guarantee that all emitted copies are stuck together,
  449. // avoiding something bad.
  450. Flag = Chain.getValue(1);
  451. RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
  452. }
  453. RetOps[0] = Chain; // Update chain.
  454. // Add the flag if we have it.
  455. if (Flag.getNode())
  456. RetOps.push_back(Flag);
  457. return DAG.getNode(Opc, DL, MVT::Other, RetOps);
  458. }
  459. SDValue BPFTargetLowering::LowerCallResult(
  460. SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg,
  461. const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
  462. SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
  463. MachineFunction &MF = DAG.getMachineFunction();
  464. // Assign locations to each value returned by this call.
  465. SmallVector<CCValAssign, 16> RVLocs;
  466. CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
  467. if (Ins.size() >= 2) {
  468. fail(DL, DAG, "only small returns supported");
  469. for (unsigned i = 0, e = Ins.size(); i != e; ++i)
  470. InVals.push_back(DAG.getConstant(0, DL, Ins[i].VT));
  471. return DAG.getCopyFromReg(Chain, DL, 1, Ins[0].VT, InFlag).getValue(1);
  472. }
  473. CCInfo.AnalyzeCallResult(Ins, getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
  474. // Copy all of the result registers out of their specified physreg.
  475. for (auto &Val : RVLocs) {
  476. Chain = DAG.getCopyFromReg(Chain, DL, Val.getLocReg(),
  477. Val.getValVT(), InFlag).getValue(1);
  478. InFlag = Chain.getValue(2);
  479. InVals.push_back(Chain.getValue(0));
  480. }
  481. return Chain;
  482. }
  483. static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
  484. switch (CC) {
  485. default:
  486. break;
  487. case ISD::SETULT:
  488. case ISD::SETULE:
  489. case ISD::SETLT:
  490. case ISD::SETLE:
  491. CC = ISD::getSetCCSwappedOperands(CC);
  492. std::swap(LHS, RHS);
  493. break;
  494. }
  495. }
  496. SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
  497. SDValue Chain = Op.getOperand(0);
  498. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
  499. SDValue LHS = Op.getOperand(2);
  500. SDValue RHS = Op.getOperand(3);
  501. SDValue Dest = Op.getOperand(4);
  502. SDLoc DL(Op);
  503. if (!getHasJmpExt())
  504. NegateCC(LHS, RHS, CC);
  505. return DAG.getNode(BPFISD::BR_CC, DL, Op.getValueType(), Chain, LHS, RHS,
  506. DAG.getConstant(CC, DL, LHS.getValueType()), Dest);
  507. }
  508. SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
  509. SDValue LHS = Op.getOperand(0);
  510. SDValue RHS = Op.getOperand(1);
  511. SDValue TrueV = Op.getOperand(2);
  512. SDValue FalseV = Op.getOperand(3);
  513. ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
  514. SDLoc DL(Op);
  515. if (!getHasJmpExt())
  516. NegateCC(LHS, RHS, CC);
  517. SDValue TargetCC = DAG.getConstant(CC, DL, LHS.getValueType());
  518. SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
  519. SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
  520. return DAG.getNode(BPFISD::SELECT_CC, DL, VTs, Ops);
  521. }
  522. const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
  523. switch ((BPFISD::NodeType)Opcode) {
  524. case BPFISD::FIRST_NUMBER:
  525. break;
  526. case BPFISD::RET_FLAG:
  527. return "BPFISD::RET_FLAG";
  528. case BPFISD::CALL:
  529. return "BPFISD::CALL";
  530. case BPFISD::SELECT_CC:
  531. return "BPFISD::SELECT_CC";
  532. case BPFISD::BR_CC:
  533. return "BPFISD::BR_CC";
  534. case BPFISD::Wrapper:
  535. return "BPFISD::Wrapper";
  536. case BPFISD::MEMCPY:
  537. return "BPFISD::MEMCPY";
  538. }
  539. return nullptr;
  540. }
  541. SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
  542. SelectionDAG &DAG) const {
  543. auto N = cast<GlobalAddressSDNode>(Op);
  544. assert(N->getOffset() == 0 && "Invalid offset for global address");
  545. SDLoc DL(Op);
  546. const GlobalValue *GV = N->getGlobal();
  547. SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i64);
  548. return DAG.getNode(BPFISD::Wrapper, DL, MVT::i64, GA);
  549. }
  550. unsigned
  551. BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
  552. unsigned Reg, bool isSigned) const {
  553. const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
  554. const TargetRegisterClass *RC = getRegClassFor(MVT::i64);
  555. int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri;
  556. MachineFunction *F = BB->getParent();
  557. DebugLoc DL = MI.getDebugLoc();
  558. MachineRegisterInfo &RegInfo = F->getRegInfo();
  559. if (!isSigned) {
  560. Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
  561. BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
  562. return PromotedReg0;
  563. }
  564. Register PromotedReg0 = RegInfo.createVirtualRegister(RC);
  565. Register PromotedReg1 = RegInfo.createVirtualRegister(RC);
  566. Register PromotedReg2 = RegInfo.createVirtualRegister(RC);
  567. BuildMI(BB, DL, TII.get(BPF::MOV_32_64), PromotedReg0).addReg(Reg);
  568. BuildMI(BB, DL, TII.get(BPF::SLL_ri), PromotedReg1)
  569. .addReg(PromotedReg0).addImm(32);
  570. BuildMI(BB, DL, TII.get(RShiftOp), PromotedReg2)
  571. .addReg(PromotedReg1).addImm(32);
  572. return PromotedReg2;
  573. }
  574. MachineBasicBlock *
  575. BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,
  576. MachineBasicBlock *BB)
  577. const {
  578. MachineFunction *MF = MI.getParent()->getParent();
  579. MachineRegisterInfo &MRI = MF->getRegInfo();
  580. MachineInstrBuilder MIB(*MF, MI);
  581. unsigned ScratchReg;
  582. // This function does custom insertion during lowering BPFISD::MEMCPY which
  583. // only has two register operands from memcpy semantics, the copy source
  584. // address and the copy destination address.
  585. //
  586. // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need
  587. // a third scratch register to serve as the destination register of load and
  588. // source register of store.
  589. //
  590. // The scratch register here is with the Define | Dead | EarlyClobber flags.
  591. // The EarlyClobber flag has the semantic property that the operand it is
  592. // attached to is clobbered before the rest of the inputs are read. Hence it
  593. // must be unique among the operands to the instruction. The Define flag is
  594. // needed to coerce the machine verifier that an Undef value isn't a problem
  595. // as we anyway is loading memory into it. The Dead flag is needed as the
  596. // value in scratch isn't supposed to be used by any other instruction.
  597. ScratchReg = MRI.createVirtualRegister(&BPF::GPRRegClass);
  598. MIB.addReg(ScratchReg,
  599. RegState::Define | RegState::Dead | RegState::EarlyClobber);
  600. return BB;
  601. }
  602. MachineBasicBlock *
  603. BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
  604. MachineBasicBlock *BB) const {
  605. const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
  606. DebugLoc DL = MI.getDebugLoc();
  607. unsigned Opc = MI.getOpcode();
  608. bool isSelectRROp = (Opc == BPF::Select ||
  609. Opc == BPF::Select_64_32 ||
  610. Opc == BPF::Select_32 ||
  611. Opc == BPF::Select_32_64);
  612. bool isMemcpyOp = Opc == BPF::MEMCPY;
  613. #ifndef NDEBUG
  614. bool isSelectRIOp = (Opc == BPF::Select_Ri ||
  615. Opc == BPF::Select_Ri_64_32 ||
  616. Opc == BPF::Select_Ri_32 ||
  617. Opc == BPF::Select_Ri_32_64);
  618. assert((isSelectRROp || isSelectRIOp || isMemcpyOp) &&
  619. "Unexpected instr type to insert");
  620. #endif
  621. if (isMemcpyOp)
  622. return EmitInstrWithCustomInserterMemcpy(MI, BB);
  623. bool is32BitCmp = (Opc == BPF::Select_32 ||
  624. Opc == BPF::Select_32_64 ||
  625. Opc == BPF::Select_Ri_32 ||
  626. Opc == BPF::Select_Ri_32_64);
  627. // To "insert" a SELECT instruction, we actually have to insert the diamond
  628. // control-flow pattern. The incoming instruction knows the destination vreg
  629. // to set, the condition code register to branch on, the true/false values to
  630. // select between, and a branch opcode to use.
  631. const BasicBlock *LLVM_BB = BB->getBasicBlock();
  632. MachineFunction::iterator I = ++BB->getIterator();
  633. // ThisMBB:
  634. // ...
  635. // TrueVal = ...
  636. // jmp_XX r1, r2 goto Copy1MBB
  637. // fallthrough --> Copy0MBB
  638. MachineBasicBlock *ThisMBB = BB;
  639. MachineFunction *F = BB->getParent();
  640. MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
  641. MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(LLVM_BB);
  642. F->insert(I, Copy0MBB);
  643. F->insert(I, Copy1MBB);
  644. // Update machine-CFG edges by transferring all successors of the current
  645. // block to the new block which will contain the Phi node for the select.
  646. Copy1MBB->splice(Copy1MBB->begin(), BB,
  647. std::next(MachineBasicBlock::iterator(MI)), BB->end());
  648. Copy1MBB->transferSuccessorsAndUpdatePHIs(BB);
  649. // Next, add the true and fallthrough blocks as its successors.
  650. BB->addSuccessor(Copy0MBB);
  651. BB->addSuccessor(Copy1MBB);
  652. // Insert Branch if Flag
  653. int CC = MI.getOperand(3).getImm();
  654. int NewCC;
  655. switch (CC) {
  656. #define SET_NEWCC(X, Y) \
  657. case ISD::X: \
  658. if (is32BitCmp && HasJmp32) \
  659. NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \
  660. else \
  661. NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \
  662. break
  663. SET_NEWCC(SETGT, JSGT);
  664. SET_NEWCC(SETUGT, JUGT);
  665. SET_NEWCC(SETGE, JSGE);
  666. SET_NEWCC(SETUGE, JUGE);
  667. SET_NEWCC(SETEQ, JEQ);
  668. SET_NEWCC(SETNE, JNE);
  669. SET_NEWCC(SETLT, JSLT);
  670. SET_NEWCC(SETULT, JULT);
  671. SET_NEWCC(SETLE, JSLE);
  672. SET_NEWCC(SETULE, JULE);
  673. default:
  674. report_fatal_error("unimplemented select CondCode " + Twine(CC));
  675. }
  676. Register LHS = MI.getOperand(1).getReg();
  677. bool isSignedCmp = (CC == ISD::SETGT ||
  678. CC == ISD::SETGE ||
  679. CC == ISD::SETLT ||
  680. CC == ISD::SETLE);
  681. // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need
  682. // to be promoted, however if the 32-bit comparison operands are destination
  683. // registers then they are implicitly zero-extended already, there is no
  684. // need of explicit zero-extend sequence for them.
  685. //
  686. // We simply do extension for all situations in this method, but we will
  687. // try to remove those unnecessary in BPFMIPeephole pass.
  688. if (is32BitCmp && !HasJmp32)
  689. LHS = EmitSubregExt(MI, BB, LHS, isSignedCmp);
  690. if (isSelectRROp) {
  691. Register RHS = MI.getOperand(2).getReg();
  692. if (is32BitCmp && !HasJmp32)
  693. RHS = EmitSubregExt(MI, BB, RHS, isSignedCmp);
  694. BuildMI(BB, DL, TII.get(NewCC)).addReg(LHS).addReg(RHS).addMBB(Copy1MBB);
  695. } else {
  696. int64_t imm32 = MI.getOperand(2).getImm();
  697. // Check before we build J*_ri instruction.
  698. assert (isInt<32>(imm32));
  699. BuildMI(BB, DL, TII.get(NewCC))
  700. .addReg(LHS).addImm(imm32).addMBB(Copy1MBB);
  701. }
  702. // Copy0MBB:
  703. // %FalseValue = ...
  704. // # fallthrough to Copy1MBB
  705. BB = Copy0MBB;
  706. // Update machine-CFG edges
  707. BB->addSuccessor(Copy1MBB);
  708. // Copy1MBB:
  709. // %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ]
  710. // ...
  711. BB = Copy1MBB;
  712. BuildMI(*BB, BB->begin(), DL, TII.get(BPF::PHI), MI.getOperand(0).getReg())
  713. .addReg(MI.getOperand(5).getReg())
  714. .addMBB(Copy0MBB)
  715. .addReg(MI.getOperand(4).getReg())
  716. .addMBB(ThisMBB);
  717. MI.eraseFromParent(); // The pseudo instruction is gone now.
  718. return BB;
  719. }
  720. EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
  721. EVT VT) const {
  722. return getHasAlu32() ? MVT::i32 : MVT::i64;
  723. }
  724. MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
  725. EVT VT) const {
  726. return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64;
  727. }
  728. bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL,
  729. const AddrMode &AM, Type *Ty,
  730. unsigned AS,
  731. Instruction *I) const {
  732. // No global is ever allowed as a base.
  733. if (AM.BaseGV)
  734. return false;
  735. switch (AM.Scale) {
  736. case 0: // "r+i" or just "i", depending on HasBaseReg.
  737. break;
  738. case 1:
  739. if (!AM.HasBaseReg) // allow "r+i".
  740. break;
  741. return false; // disallow "r+r" or "r+r+i".
  742. default:
  743. return false;
  744. }
  745. return true;
  746. }