|
- //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- // This file defines the AArch64-specific support for the FastISel class. Some
- // of the target-specific code is generated by tablegen in the file
- // AArch64GenFastISel.inc, which is #included here.
- //
- //===----------------------------------------------------------------------===//
- #include "AArch64.h"
- #include "AArch64CallingConvention.h"
- #include "AArch64MachineFunctionInfo.h"
- #include "AArch64RegisterInfo.h"
- #include "AArch64Subtarget.h"
- #include "MCTargetDesc/AArch64AddressingModes.h"
- #include "Utils/AArch64BaseInfo.h"
- #include "llvm/ADT/APFloat.h"
- #include "llvm/ADT/APInt.h"
- #include "llvm/ADT/DenseMap.h"
- #include "llvm/ADT/SmallVector.h"
- #include "llvm/Analysis/BranchProbabilityInfo.h"
- #include "llvm/CodeGen/CallingConvLower.h"
- #include "llvm/CodeGen/FastISel.h"
- #include "llvm/CodeGen/FunctionLoweringInfo.h"
- #include "llvm/CodeGen/ISDOpcodes.h"
- #include "llvm/CodeGen/MachineBasicBlock.h"
- #include "llvm/CodeGen/MachineConstantPool.h"
- #include "llvm/CodeGen/MachineFrameInfo.h"
- #include "llvm/CodeGen/MachineInstr.h"
- #include "llvm/CodeGen/MachineInstrBuilder.h"
- #include "llvm/CodeGen/MachineMemOperand.h"
- #include "llvm/CodeGen/MachineRegisterInfo.h"
- #include "llvm/CodeGen/RuntimeLibcalls.h"
- #include "llvm/CodeGen/ValueTypes.h"
- #include "llvm/IR/Argument.h"
- #include "llvm/IR/Attributes.h"
- #include "llvm/IR/BasicBlock.h"
- #include "llvm/IR/CallingConv.h"
- #include "llvm/IR/Constant.h"
- #include "llvm/IR/Constants.h"
- #include "llvm/IR/DataLayout.h"
- #include "llvm/IR/DerivedTypes.h"
- #include "llvm/IR/Function.h"
- #include "llvm/IR/GetElementPtrTypeIterator.h"
- #include "llvm/IR/GlobalValue.h"
- #include "llvm/IR/InstrTypes.h"
- #include "llvm/IR/Instruction.h"
- #include "llvm/IR/Instructions.h"
- #include "llvm/IR/IntrinsicInst.h"
- #include "llvm/IR/Intrinsics.h"
- #include "llvm/IR/Operator.h"
- #include "llvm/IR/Type.h"
- #include "llvm/IR/User.h"
- #include "llvm/IR/Value.h"
- #include "llvm/MC/MCInstrDesc.h"
- #include "llvm/MC/MCRegisterInfo.h"
- #include "llvm/MC/MCSymbol.h"
- #include "llvm/Support/AtomicOrdering.h"
- #include "llvm/Support/Casting.h"
- #include "llvm/Support/CodeGen.h"
- #include "llvm/Support/Compiler.h"
- #include "llvm/Support/ErrorHandling.h"
- #include "llvm/Support/MachineValueType.h"
- #include "llvm/Support/MathExtras.h"
- #include <algorithm>
- #include <cassert>
- #include <cstdint>
- #include <iterator>
- #include <utility>
- using namespace llvm;
- namespace {
- class AArch64FastISel final : public FastISel {
- class Address {
- public:
- using BaseKind = enum {
- RegBase,
- FrameIndexBase
- };
- private:
- BaseKind Kind = RegBase;
- AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
- union {
- unsigned Reg;
- int FI;
- } Base;
- unsigned OffsetReg = 0;
- unsigned Shift = 0;
- int64_t Offset = 0;
- const GlobalValue *GV = nullptr;
- public:
- Address() { Base.Reg = 0; }
- void setKind(BaseKind K) { Kind = K; }
- BaseKind getKind() const { return Kind; }
- void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
- AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
- bool isRegBase() const { return Kind == RegBase; }
- bool isFIBase() const { return Kind == FrameIndexBase; }
- void setReg(unsigned Reg) {
- assert(isRegBase() && "Invalid base register access!");
- Base.Reg = Reg;
- }
- unsigned getReg() const {
- assert(isRegBase() && "Invalid base register access!");
- return Base.Reg;
- }
- void setOffsetReg(unsigned Reg) {
- OffsetReg = Reg;
- }
- unsigned getOffsetReg() const {
- return OffsetReg;
- }
- void setFI(unsigned FI) {
- assert(isFIBase() && "Invalid base frame index access!");
- Base.FI = FI;
- }
- unsigned getFI() const {
- assert(isFIBase() && "Invalid base frame index access!");
- return Base.FI;
- }
- void setOffset(int64_t O) { Offset = O; }
- int64_t getOffset() { return Offset; }
- void setShift(unsigned S) { Shift = S; }
- unsigned getShift() { return Shift; }
- void setGlobalValue(const GlobalValue *G) { GV = G; }
- const GlobalValue *getGlobalValue() { return GV; }
- };
- /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
- /// make the right decision when generating code for different targets.
- const AArch64Subtarget *Subtarget;
- LLVMContext *Context;
- bool fastLowerArguments() override;
- bool fastLowerCall(CallLoweringInfo &CLI) override;
- bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
- private:
- // Selection routines.
- bool selectAddSub(const Instruction *I);
- bool selectLogicalOp(const Instruction *I);
- bool selectLoad(const Instruction *I);
- bool selectStore(const Instruction *I);
- bool selectBranch(const Instruction *I);
- bool selectIndirectBr(const Instruction *I);
- bool selectCmp(const Instruction *I);
- bool selectSelect(const Instruction *I);
- bool selectFPExt(const Instruction *I);
- bool selectFPTrunc(const Instruction *I);
- bool selectFPToInt(const Instruction *I, bool Signed);
- bool selectIntToFP(const Instruction *I, bool Signed);
- bool selectRem(const Instruction *I, unsigned ISDOpcode);
- bool selectRet(const Instruction *I);
- bool selectTrunc(const Instruction *I);
- bool selectIntExt(const Instruction *I);
- bool selectMul(const Instruction *I);
- bool selectShift(const Instruction *I);
- bool selectBitCast(const Instruction *I);
- bool selectFRem(const Instruction *I);
- bool selectSDiv(const Instruction *I);
- bool selectGetElementPtr(const Instruction *I);
- bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
- // Utility helper routines.
- bool isTypeLegal(Type *Ty, MVT &VT);
- bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
- bool isValueAvailable(const Value *V) const;
- bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
- bool computeCallAddress(const Value *V, Address &Addr);
- bool simplifyAddress(Address &Addr, MVT VT);
- void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
- MachineMemOperand::Flags Flags,
- unsigned ScaleFactor, MachineMemOperand *MMO);
- bool isMemCpySmall(uint64_t Len, unsigned Alignment);
- bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
- unsigned Alignment);
- bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
- const Value *Cond);
- bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
- bool optimizeSelect(const SelectInst *SI);
- unsigned getRegForGEPIndex(const Value *Idx);
- // Emit helper routines.
- unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
- const Value *RHS, bool SetFlags = false,
- bool WantResult = true, bool IsZExt = false);
- unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
- unsigned RHSReg, bool SetFlags = false,
- bool WantResult = true);
- unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
- uint64_t Imm, bool SetFlags = false,
- bool WantResult = true);
- unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
- unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
- uint64_t ShiftImm, bool SetFlags = false,
- bool WantResult = true);
- unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
- unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
- uint64_t ShiftImm, bool SetFlags = false,
- bool WantResult = true);
- // Emit functions.
- bool emitCompareAndBranch(const BranchInst *BI);
- bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
- bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
- bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
- bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
- unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
- MachineMemOperand *MMO = nullptr);
- bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
- MachineMemOperand *MMO = nullptr);
- bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
- MachineMemOperand *MMO = nullptr);
- unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
- unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
- unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
- bool SetFlags = false, bool WantResult = true,
- bool IsZExt = false);
- unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
- unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
- bool SetFlags = false, bool WantResult = true,
- bool IsZExt = false);
- unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
- bool WantResult = true);
- unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
- AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
- bool WantResult = true);
- unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
- const Value *RHS);
- unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
- uint64_t Imm);
- unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
- unsigned RHSReg, uint64_t ShiftImm);
- unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
- unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
- unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
- unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
- unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
- unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
- bool IsZExt = true);
- unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
- unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
- bool IsZExt = true);
- unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
- unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
- bool IsZExt = false);
- unsigned materializeInt(const ConstantInt *CI, MVT VT);
- unsigned materializeFP(const ConstantFP *CFP, MVT VT);
- unsigned materializeGV(const GlobalValue *GV);
- // Call handling routines.
- private:
- CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
- bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
- unsigned &NumBytes);
- bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
- public:
- // Backend specific FastISel code.
- unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
- unsigned fastMaterializeConstant(const Constant *C) override;
- unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
- explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
- const TargetLibraryInfo *LibInfo)
- : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
- Subtarget =
- &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
- Context = &FuncInfo.Fn->getContext();
- }
- bool fastSelectInstruction(const Instruction *I) override;
- #include "AArch64GenFastISel.inc"
- };
- } // end anonymous namespace
- /// Check if the sign-/zero-extend will be a noop.
- static bool isIntExtFree(const Instruction *I) {
- assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
- "Unexpected integer extend instruction.");
- assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
- "Unexpected value type.");
- bool IsZExt = isa<ZExtInst>(I);
- if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
- if (LI->hasOneUse())
- return true;
- if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
- if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
- return true;
- return false;
- }
- /// Determine the implicit scale factor that is applied by a memory
- /// operation for a given value type.
- static unsigned getImplicitScaleFactor(MVT VT) {
- switch (VT.SimpleTy) {
- default:
- return 0; // invalid
- case MVT::i1: // fall-through
- case MVT::i8:
- return 1;
- case MVT::i16:
- return 2;
- case MVT::i32: // fall-through
- case MVT::f32:
- return 4;
- case MVT::i64: // fall-through
- case MVT::f64:
- return 8;
- }
- }
- CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
- if (CC == CallingConv::WebKit_JS)
- return CC_AArch64_WebKit_JS;
- if (CC == CallingConv::GHC)
- return CC_AArch64_GHC;
- if (CC == CallingConv::CFGuard_Check)
- return CC_AArch64_Win64_CFGuard_Check;
- return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
- }
- unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
- assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
- "Alloca should always return a pointer.");
- // Don't handle dynamic allocas.
- if (!FuncInfo.StaticAllocaMap.count(AI))
- return 0;
- DenseMap<const AllocaInst *, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
- if (SI != FuncInfo.StaticAllocaMap.end()) {
- Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
- ResultReg)
- .addFrameIndex(SI->second)
- .addImm(0)
- .addImm(0);
- return ResultReg;
- }
- return 0;
- }
- unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
- if (VT > MVT::i64)
- return 0;
- if (!CI->isZero())
- return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
- // Create a copy from the zero register to materialize a "0" value.
- const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
- : &AArch64::GPR32RegClass;
- unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
- Register ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
- ResultReg).addReg(ZeroReg, getKillRegState(true));
- return ResultReg;
- }
- unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
- // Positive zero (+0.0) has to be materialized with a fmov from the zero
- // register, because the immediate version of fmov cannot encode zero.
- if (CFP->isNullValue())
- return fastMaterializeFloatZero(CFP);
- if (VT != MVT::f32 && VT != MVT::f64)
- return 0;
- const APFloat Val = CFP->getValueAPF();
- bool Is64Bit = (VT == MVT::f64);
- // This checks to see if we can use FMOV instructions to materialize
- // a constant, otherwise we have to materialize via the constant pool.
- int Imm =
- Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
- if (Imm != -1) {
- unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
- return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
- }
- // For the large code model materialize the FP constant in code.
- if (TM.getCodeModel() == CodeModel::Large) {
- unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
- const TargetRegisterClass *RC = Is64Bit ?
- &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- Register TmpReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
- .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
- Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(TmpReg, getKillRegState(true));
- return ResultReg;
- }
- // Materialize via constant pool. MachineConstantPool wants an explicit
- // alignment.
- Align Alignment = DL.getPrefTypeAlign(CFP->getType());
- unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
- Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
- unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
- Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(ADRPReg)
- .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- return ResultReg;
- }
- unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
- // We can't handle thread-local variables quickly yet.
- if (GV->isThreadLocal())
- return 0;
- // MachO still uses GOT for large code-model accesses, but ELF requires
- // movz/movk sequences, which FastISel doesn't handle yet.
- if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
- return 0;
- unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
- EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
- if (!DestEVT.isSimple())
- return 0;
- Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
- unsigned ResultReg;
- if (OpFlags & AArch64II::MO_GOT) {
- // ADRP + LDRX
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
- unsigned LdrOpc;
- if (Subtarget->isTargetILP32()) {
- ResultReg = createResultReg(&AArch64::GPR32RegClass);
- LdrOpc = AArch64::LDRWui;
- } else {
- ResultReg = createResultReg(&AArch64::GPR64RegClass);
- LdrOpc = AArch64::LDRXui;
- }
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
- ResultReg)
- .addReg(ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
- AArch64II::MO_NC | OpFlags);
- if (!Subtarget->isTargetILP32())
- return ResultReg;
- // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
- // so we must extend the result on ILP32.
- Register Result64 = createResultReg(&AArch64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::SUBREG_TO_REG))
- .addDef(Result64)
- .addImm(0)
- .addReg(ResultReg, RegState::Kill)
- .addImm(AArch64::sub_32);
- return Result64;
- } else {
- // ADRP + ADDX
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg)
- .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
- ResultReg = createResultReg(&AArch64::GPR64spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
- ResultReg)
- .addReg(ADRPReg)
- .addGlobalAddress(GV, 0,
- AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
- .addImm(0);
- }
- return ResultReg;
- }
- unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
- EVT CEVT = TLI.getValueType(DL, C->getType(), true);
- // Only handle simple types.
- if (!CEVT.isSimple())
- return 0;
- MVT VT = CEVT.getSimpleVT();
- // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
- // 'null' pointers need to have a somewhat special treatment.
- if (isa<ConstantPointerNull>(C)) {
- assert(VT == MVT::i64 && "Expected 64-bit pointers");
- return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
- }
- if (const auto *CI = dyn_cast<ConstantInt>(C))
- return materializeInt(CI, VT);
- else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
- return materializeFP(CFP, VT);
- else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return materializeGV(GV);
- return 0;
- }
- unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
- assert(CFP->isNullValue() &&
- "Floating-point constant is not a positive zero.");
- MVT VT;
- if (!isTypeLegal(CFP->getType(), VT))
- return 0;
- if (VT != MVT::f32 && VT != MVT::f64)
- return 0;
- bool Is64Bit = (VT == MVT::f64);
- unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
- unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
- return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
- }
- /// Check if the multiply is by a power-of-2 constant.
- static bool isMulPowOf2(const Value *I) {
- if (const auto *MI = dyn_cast<MulOperator>(I)) {
- if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
- if (C->getValue().isPowerOf2())
- return true;
- if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
- if (C->getValue().isPowerOf2())
- return true;
- }
- return false;
- }
- // Computes the address to get to an object.
- bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
- {
- const User *U = nullptr;
- unsigned Opcode = Instruction::UserOp1;
- if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
- // Don't walk into other basic blocks unless the object is an alloca from
- // another block, otherwise it may not have a virtual register assigned.
- if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
- FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
- Opcode = I->getOpcode();
- U = I;
- }
- } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
- Opcode = C->getOpcode();
- U = C;
- }
- if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
- if (Ty->getAddressSpace() > 255)
- // Fast instruction selection doesn't support the special
- // address spaces.
- return false;
- switch (Opcode) {
- default:
- break;
- case Instruction::BitCast:
- // Look through bitcasts.
- return computeAddress(U->getOperand(0), Addr, Ty);
- case Instruction::IntToPtr:
- // Look past no-op inttoptrs.
- if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
- TLI.getPointerTy(DL))
- return computeAddress(U->getOperand(0), Addr, Ty);
- break;
- case Instruction::PtrToInt:
- // Look past no-op ptrtoints.
- if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
- return computeAddress(U->getOperand(0), Addr, Ty);
- break;
- case Instruction::GetElementPtr: {
- Address SavedAddr = Addr;
- uint64_t TmpOffset = Addr.getOffset();
- // Iterate through the GEP folding the constants into offsets where
- // we can.
- for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
- GTI != E; ++GTI) {
- const Value *Op = GTI.getOperand();
- if (StructType *STy = GTI.getStructTypeOrNull()) {
- const StructLayout *SL = DL.getStructLayout(STy);
- unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
- TmpOffset += SL->getElementOffset(Idx);
- } else {
- uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
- while (true) {
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
- // Constant-offset addressing.
- TmpOffset += CI->getSExtValue() * S;
- break;
- }
- if (canFoldAddIntoGEP(U, Op)) {
- // A compatible add with a constant operand. Fold the constant.
- ConstantInt *CI =
- cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
- TmpOffset += CI->getSExtValue() * S;
- // Iterate on the other operand.
- Op = cast<AddOperator>(Op)->getOperand(0);
- continue;
- }
- // Unsupported
- goto unsupported_gep;
- }
- }
- }
- // Try to grab the base operand now.
- Addr.setOffset(TmpOffset);
- if (computeAddress(U->getOperand(0), Addr, Ty))
- return true;
- // We failed, restore everything and try the other options.
- Addr = SavedAddr;
- unsupported_gep:
- break;
- }
- case Instruction::Alloca: {
- const AllocaInst *AI = cast<AllocaInst>(Obj);
- DenseMap<const AllocaInst *, int>::iterator SI =
- FuncInfo.StaticAllocaMap.find(AI);
- if (SI != FuncInfo.StaticAllocaMap.end()) {
- Addr.setKind(Address::FrameIndexBase);
- Addr.setFI(SI->second);
- return true;
- }
- break;
- }
- case Instruction::Add: {
- // Adds of constants are common and easy enough.
- const Value *LHS = U->getOperand(0);
- const Value *RHS = U->getOperand(1);
- if (isa<ConstantInt>(LHS))
- std::swap(LHS, RHS);
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
- return computeAddress(LHS, Addr, Ty);
- }
- Address Backup = Addr;
- if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
- return true;
- Addr = Backup;
- break;
- }
- case Instruction::Sub: {
- // Subs of constants are common and easy enough.
- const Value *LHS = U->getOperand(0);
- const Value *RHS = U->getOperand(1);
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
- Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
- return computeAddress(LHS, Addr, Ty);
- }
- break;
- }
- case Instruction::Shl: {
- if (Addr.getOffsetReg())
- break;
- const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
- if (!CI)
- break;
- unsigned Val = CI->getZExtValue();
- if (Val < 1 || Val > 3)
- break;
- uint64_t NumBytes = 0;
- if (Ty && Ty->isSized()) {
- uint64_t NumBits = DL.getTypeSizeInBits(Ty);
- NumBytes = NumBits / 8;
- if (!isPowerOf2_64(NumBits))
- NumBytes = 0;
- }
- if (NumBytes != (1ULL << Val))
- break;
- Addr.setShift(Val);
- Addr.setExtendType(AArch64_AM::LSL);
- const Value *Src = U->getOperand(0);
- if (const auto *I = dyn_cast<Instruction>(Src)) {
- if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
- // Fold the zext or sext when it won't become a noop.
- if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
- if (!isIntExtFree(ZE) &&
- ZE->getOperand(0)->getType()->isIntegerTy(32)) {
- Addr.setExtendType(AArch64_AM::UXTW);
- Src = ZE->getOperand(0);
- }
- } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
- if (!isIntExtFree(SE) &&
- SE->getOperand(0)->getType()->isIntegerTy(32)) {
- Addr.setExtendType(AArch64_AM::SXTW);
- Src = SE->getOperand(0);
- }
- }
- }
- }
- if (const auto *AI = dyn_cast<BinaryOperator>(Src))
- if (AI->getOpcode() == Instruction::And) {
- const Value *LHS = AI->getOperand(0);
- const Value *RHS = AI->getOperand(1);
- if (const auto *C = dyn_cast<ConstantInt>(LHS))
- if (C->getValue() == 0xffffffff)
- std::swap(LHS, RHS);
- if (const auto *C = dyn_cast<ConstantInt>(RHS))
- if (C->getValue() == 0xffffffff) {
- Addr.setExtendType(AArch64_AM::UXTW);
- Register Reg = getRegForValue(LHS);
- if (!Reg)
- return false;
- Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
- Addr.setOffsetReg(Reg);
- return true;
- }
- }
- Register Reg = getRegForValue(Src);
- if (!Reg)
- return false;
- Addr.setOffsetReg(Reg);
- return true;
- }
- case Instruction::Mul: {
- if (Addr.getOffsetReg())
- break;
- if (!isMulPowOf2(U))
- break;
- const Value *LHS = U->getOperand(0);
- const Value *RHS = U->getOperand(1);
- // Canonicalize power-of-2 value to the RHS.
- if (const auto *C = dyn_cast<ConstantInt>(LHS))
- if (C->getValue().isPowerOf2())
- std::swap(LHS, RHS);
- assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
- const auto *C = cast<ConstantInt>(RHS);
- unsigned Val = C->getValue().logBase2();
- if (Val < 1 || Val > 3)
- break;
- uint64_t NumBytes = 0;
- if (Ty && Ty->isSized()) {
- uint64_t NumBits = DL.getTypeSizeInBits(Ty);
- NumBytes = NumBits / 8;
- if (!isPowerOf2_64(NumBits))
- NumBytes = 0;
- }
- if (NumBytes != (1ULL << Val))
- break;
- Addr.setShift(Val);
- Addr.setExtendType(AArch64_AM::LSL);
- const Value *Src = LHS;
- if (const auto *I = dyn_cast<Instruction>(Src)) {
- if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
- // Fold the zext or sext when it won't become a noop.
- if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
- if (!isIntExtFree(ZE) &&
- ZE->getOperand(0)->getType()->isIntegerTy(32)) {
- Addr.setExtendType(AArch64_AM::UXTW);
- Src = ZE->getOperand(0);
- }
- } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
- if (!isIntExtFree(SE) &&
- SE->getOperand(0)->getType()->isIntegerTy(32)) {
- Addr.setExtendType(AArch64_AM::SXTW);
- Src = SE->getOperand(0);
- }
- }
- }
- }
- Register Reg = getRegForValue(Src);
- if (!Reg)
- return false;
- Addr.setOffsetReg(Reg);
- return true;
- }
- case Instruction::And: {
- if (Addr.getOffsetReg())
- break;
- if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
- break;
- const Value *LHS = U->getOperand(0);
- const Value *RHS = U->getOperand(1);
- if (const auto *C = dyn_cast<ConstantInt>(LHS))
- if (C->getValue() == 0xffffffff)
- std::swap(LHS, RHS);
- if (const auto *C = dyn_cast<ConstantInt>(RHS))
- if (C->getValue() == 0xffffffff) {
- Addr.setShift(0);
- Addr.setExtendType(AArch64_AM::LSL);
- Addr.setExtendType(AArch64_AM::UXTW);
- Register Reg = getRegForValue(LHS);
- if (!Reg)
- return false;
- Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
- Addr.setOffsetReg(Reg);
- return true;
- }
- break;
- }
- case Instruction::SExt:
- case Instruction::ZExt: {
- if (!Addr.getReg() || Addr.getOffsetReg())
- break;
- const Value *Src = nullptr;
- // Fold the zext or sext when it won't become a noop.
- if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
- if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
- Addr.setExtendType(AArch64_AM::UXTW);
- Src = ZE->getOperand(0);
- }
- } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
- if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
- Addr.setExtendType(AArch64_AM::SXTW);
- Src = SE->getOperand(0);
- }
- }
- if (!Src)
- break;
- Addr.setShift(0);
- Register Reg = getRegForValue(Src);
- if (!Reg)
- return false;
- Addr.setOffsetReg(Reg);
- return true;
- }
- } // end switch
- if (Addr.isRegBase() && !Addr.getReg()) {
- Register Reg = getRegForValue(Obj);
- if (!Reg)
- return false;
- Addr.setReg(Reg);
- return true;
- }
- if (!Addr.getOffsetReg()) {
- Register Reg = getRegForValue(Obj);
- if (!Reg)
- return false;
- Addr.setOffsetReg(Reg);
- return true;
- }
- return false;
- }
- bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
- const User *U = nullptr;
- unsigned Opcode = Instruction::UserOp1;
- bool InMBB = true;
- if (const auto *I = dyn_cast<Instruction>(V)) {
- Opcode = I->getOpcode();
- U = I;
- InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
- } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
- Opcode = C->getOpcode();
- U = C;
- }
- switch (Opcode) {
- default: break;
- case Instruction::BitCast:
- // Look past bitcasts if its operand is in the same BB.
- if (InMBB)
- return computeCallAddress(U->getOperand(0), Addr);
- break;
- case Instruction::IntToPtr:
- // Look past no-op inttoptrs if its operand is in the same BB.
- if (InMBB &&
- TLI.getValueType(DL, U->getOperand(0)->getType()) ==
- TLI.getPointerTy(DL))
- return computeCallAddress(U->getOperand(0), Addr);
- break;
- case Instruction::PtrToInt:
- // Look past no-op ptrtoints if its operand is in the same BB.
- if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
- return computeCallAddress(U->getOperand(0), Addr);
- break;
- }
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- Addr.setGlobalValue(GV);
- return true;
- }
- // If all else fails, try to materialize the value in a register.
- if (!Addr.getGlobalValue()) {
- Addr.setReg(getRegForValue(V));
- return Addr.getReg() != 0;
- }
- return false;
- }
- bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
- EVT evt = TLI.getValueType(DL, Ty, true);
- if (Subtarget->isTargetILP32() && Ty->isPointerTy())
- return false;
- // Only handle simple types.
- if (evt == MVT::Other || !evt.isSimple())
- return false;
- VT = evt.getSimpleVT();
- // This is a legal type, but it's not something we handle in fast-isel.
- if (VT == MVT::f128)
- return false;
- // Handle all other legal types, i.e. a register that will directly hold this
- // value.
- return TLI.isTypeLegal(VT);
- }
- /// Determine if the value type is supported by FastISel.
- ///
- /// FastISel for AArch64 can handle more value types than are legal. This adds
- /// simple value type such as i1, i8, and i16.
- bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
- if (Ty->isVectorTy() && !IsVectorAllowed)
- return false;
- if (isTypeLegal(Ty, VT))
- return true;
- // If this is a type than can be sign or zero-extended to a basic operation
- // go ahead and accept it now.
- if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
- return true;
- return false;
- }
- bool AArch64FastISel::isValueAvailable(const Value *V) const {
- if (!isa<Instruction>(V))
- return true;
- const auto *I = cast<Instruction>(V);
- return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
- }
- bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
- if (Subtarget->isTargetILP32())
- return false;
- unsigned ScaleFactor = getImplicitScaleFactor(VT);
- if (!ScaleFactor)
- return false;
- bool ImmediateOffsetNeedsLowering = false;
- bool RegisterOffsetNeedsLowering = false;
- int64_t Offset = Addr.getOffset();
- if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
- ImmediateOffsetNeedsLowering = true;
- else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
- !isUInt<12>(Offset / ScaleFactor))
- ImmediateOffsetNeedsLowering = true;
- // Cannot encode an offset register and an immediate offset in the same
- // instruction. Fold the immediate offset into the load/store instruction and
- // emit an additional add to take care of the offset register.
- if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
- RegisterOffsetNeedsLowering = true;
- // Cannot encode zero register as base.
- if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
- RegisterOffsetNeedsLowering = true;
- // If this is a stack pointer and the offset needs to be simplified then put
- // the alloca address into a register, set the base type back to register and
- // continue. This should almost never happen.
- if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
- {
- Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
- ResultReg)
- .addFrameIndex(Addr.getFI())
- .addImm(0)
- .addImm(0);
- Addr.setKind(Address::RegBase);
- Addr.setReg(ResultReg);
- }
- if (RegisterOffsetNeedsLowering) {
- unsigned ResultReg = 0;
- if (Addr.getReg()) {
- if (Addr.getExtendType() == AArch64_AM::SXTW ||
- Addr.getExtendType() == AArch64_AM::UXTW )
- ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
- Addr.getOffsetReg(), Addr.getExtendType(),
- Addr.getShift());
- else
- ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
- Addr.getOffsetReg(), AArch64_AM::LSL,
- Addr.getShift());
- } else {
- if (Addr.getExtendType() == AArch64_AM::UXTW)
- ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
- Addr.getShift(), /*IsZExt=*/true);
- else if (Addr.getExtendType() == AArch64_AM::SXTW)
- ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
- Addr.getShift(), /*IsZExt=*/false);
- else
- ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
- Addr.getShift());
- }
- if (!ResultReg)
- return false;
- Addr.setReg(ResultReg);
- Addr.setOffsetReg(0);
- Addr.setShift(0);
- Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
- }
- // Since the offset is too large for the load/store instruction get the
- // reg+offset into a register.
- if (ImmediateOffsetNeedsLowering) {
- unsigned ResultReg;
- if (Addr.getReg())
- // Try to fold the immediate into the add instruction.
- ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
- else
- ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
- if (!ResultReg)
- return false;
- Addr.setReg(ResultReg);
- Addr.setOffset(0);
- }
- return true;
- }
- void AArch64FastISel::addLoadStoreOperands(Address &Addr,
- const MachineInstrBuilder &MIB,
- MachineMemOperand::Flags Flags,
- unsigned ScaleFactor,
- MachineMemOperand *MMO) {
- int64_t Offset = Addr.getOffset() / ScaleFactor;
- // Frame base works a bit differently. Handle it separately.
- if (Addr.isFIBase()) {
- int FI = Addr.getFI();
- // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
- // and alignment should be based on the VT.
- MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
- MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
- // Now add the rest of the operands.
- MIB.addFrameIndex(FI).addImm(Offset);
- } else {
- assert(Addr.isRegBase() && "Unexpected address kind.");
- const MCInstrDesc &II = MIB->getDesc();
- unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
- Addr.setReg(
- constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
- Addr.setOffsetReg(
- constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
- if (Addr.getOffsetReg()) {
- assert(Addr.getOffset() == 0 && "Unexpected offset");
- bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
- Addr.getExtendType() == AArch64_AM::SXTX;
- MIB.addReg(Addr.getReg());
- MIB.addReg(Addr.getOffsetReg());
- MIB.addImm(IsSigned);
- MIB.addImm(Addr.getShift() != 0);
- } else
- MIB.addReg(Addr.getReg()).addImm(Offset);
- }
- if (MMO)
- MIB.addMemOperand(MMO);
- }
- unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
- const Value *RHS, bool SetFlags,
- bool WantResult, bool IsZExt) {
- AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
- bool NeedExtend = false;
- switch (RetVT.SimpleTy) {
- default:
- return 0;
- case MVT::i1:
- NeedExtend = true;
- break;
- case MVT::i8:
- NeedExtend = true;
- ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
- break;
- case MVT::i16:
- NeedExtend = true;
- ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
- break;
- case MVT::i32: // fall-through
- case MVT::i64:
- break;
- }
- MVT SrcVT = RetVT;
- RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
- // Canonicalize immediates to the RHS first.
- if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
- std::swap(LHS, RHS);
- // Canonicalize mul by power of 2 to the RHS.
- if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
- if (isMulPowOf2(LHS))
- std::swap(LHS, RHS);
- // Canonicalize shift immediate to the RHS.
- if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
- if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
- if (isa<ConstantInt>(SI->getOperand(1)))
- if (SI->getOpcode() == Instruction::Shl ||
- SI->getOpcode() == Instruction::LShr ||
- SI->getOpcode() == Instruction::AShr )
- std::swap(LHS, RHS);
- Register LHSReg = getRegForValue(LHS);
- if (!LHSReg)
- return 0;
- if (NeedExtend)
- LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
- unsigned ResultReg = 0;
- if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
- uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
- if (C->isNegative())
- ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
- WantResult);
- else
- ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
- WantResult);
- } else if (const auto *C = dyn_cast<Constant>(RHS))
- if (C->isNullValue())
- ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
- if (ResultReg)
- return ResultReg;
- // Only extend the RHS within the instruction if there is a valid extend type.
- if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
- isValueAvailable(RHS)) {
- if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
- if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
- if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
- Register RHSReg = getRegForValue(SI->getOperand(0));
- if (!RHSReg)
- return 0;
- return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
- C->getZExtValue(), SetFlags, WantResult);
- }
- Register RHSReg = getRegForValue(RHS);
- if (!RHSReg)
- return 0;
- return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
- SetFlags, WantResult);
- }
- // Check if the mul can be folded into the instruction.
- if (RHS->hasOneUse() && isValueAvailable(RHS)) {
- if (isMulPowOf2(RHS)) {
- const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
- const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
- if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
- if (C->getValue().isPowerOf2())
- std::swap(MulLHS, MulRHS);
- assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
- uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
- Register RHSReg = getRegForValue(MulLHS);
- if (!RHSReg)
- return 0;
- ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
- ShiftVal, SetFlags, WantResult);
- if (ResultReg)
- return ResultReg;
- }
- }
- // Check if the shift can be folded into the instruction.
- if (RHS->hasOneUse() && isValueAvailable(RHS)) {
- if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
- if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
- AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
- switch (SI->getOpcode()) {
- default: break;
- case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
- case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
- case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
- }
- uint64_t ShiftVal = C->getZExtValue();
- if (ShiftType != AArch64_AM::InvalidShiftExtend) {
- Register RHSReg = getRegForValue(SI->getOperand(0));
- if (!RHSReg)
- return 0;
- ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
- ShiftVal, SetFlags, WantResult);
- if (ResultReg)
- return ResultReg;
- }
- }
- }
- }
- Register RHSReg = getRegForValue(RHS);
- if (!RHSReg)
- return 0;
- if (NeedExtend)
- RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
- return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
- }
- unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
- unsigned RHSReg, bool SetFlags,
- bool WantResult) {
- assert(LHSReg && RHSReg && "Invalid register number.");
- if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
- RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
- return 0;
- if (RetVT != MVT::i32 && RetVT != MVT::i64)
- return 0;
- static const unsigned OpcTable[2][2][2] = {
- { { AArch64::SUBWrr, AArch64::SUBXrr },
- { AArch64::ADDWrr, AArch64::ADDXrr } },
- { { AArch64::SUBSWrr, AArch64::SUBSXrr },
- { AArch64::ADDSWrr, AArch64::ADDSXrr } }
- };
- bool Is64Bit = RetVT == MVT::i64;
- unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
- const TargetRegisterClass *RC =
- Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- unsigned ResultReg;
- if (WantResult)
- ResultReg = createResultReg(RC);
- else
- ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
- const MCInstrDesc &II = TII.get(Opc);
- LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
- RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(LHSReg)
- .addReg(RHSReg);
- return ResultReg;
- }
- unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
- uint64_t Imm, bool SetFlags,
- bool WantResult) {
- assert(LHSReg && "Invalid register number.");
- if (RetVT != MVT::i32 && RetVT != MVT::i64)
- return 0;
- unsigned ShiftImm;
- if (isUInt<12>(Imm))
- ShiftImm = 0;
- else if ((Imm & 0xfff000) == Imm) {
- ShiftImm = 12;
- Imm >>= 12;
- } else
- return 0;
- static const unsigned OpcTable[2][2][2] = {
- { { AArch64::SUBWri, AArch64::SUBXri },
- { AArch64::ADDWri, AArch64::ADDXri } },
- { { AArch64::SUBSWri, AArch64::SUBSXri },
- { AArch64::ADDSWri, AArch64::ADDSXri } }
- };
- bool Is64Bit = RetVT == MVT::i64;
- unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
- const TargetRegisterClass *RC;
- if (SetFlags)
- RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- else
- RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
- unsigned ResultReg;
- if (WantResult)
- ResultReg = createResultReg(RC);
- else
- ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
- const MCInstrDesc &II = TII.get(Opc);
- LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(LHSReg)
- .addImm(Imm)
- .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
- return ResultReg;
- }
- unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
- unsigned RHSReg,
- AArch64_AM::ShiftExtendType ShiftType,
- uint64_t ShiftImm, bool SetFlags,
- bool WantResult) {
- assert(LHSReg && RHSReg && "Invalid register number.");
- assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
- RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
- if (RetVT != MVT::i32 && RetVT != MVT::i64)
- return 0;
- // Don't deal with undefined shifts.
- if (ShiftImm >= RetVT.getSizeInBits())
- return 0;
- static const unsigned OpcTable[2][2][2] = {
- { { AArch64::SUBWrs, AArch64::SUBXrs },
- { AArch64::ADDWrs, AArch64::ADDXrs } },
- { { AArch64::SUBSWrs, AArch64::SUBSXrs },
- { AArch64::ADDSWrs, AArch64::ADDSXrs } }
- };
- bool Is64Bit = RetVT == MVT::i64;
- unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
- const TargetRegisterClass *RC =
- Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- unsigned ResultReg;
- if (WantResult)
- ResultReg = createResultReg(RC);
- else
- ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
- const MCInstrDesc &II = TII.get(Opc);
- LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
- RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(LHSReg)
- .addReg(RHSReg)
- .addImm(getShifterImm(ShiftType, ShiftImm));
- return ResultReg;
- }
- unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
- unsigned RHSReg,
- AArch64_AM::ShiftExtendType ExtType,
- uint64_t ShiftImm, bool SetFlags,
- bool WantResult) {
- assert(LHSReg && RHSReg && "Invalid register number.");
- assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
- RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
- if (RetVT != MVT::i32 && RetVT != MVT::i64)
- return 0;
- if (ShiftImm >= 4)
- return 0;
- static const unsigned OpcTable[2][2][2] = {
- { { AArch64::SUBWrx, AArch64::SUBXrx },
- { AArch64::ADDWrx, AArch64::ADDXrx } },
- { { AArch64::SUBSWrx, AArch64::SUBSXrx },
- { AArch64::ADDSWrx, AArch64::ADDSXrx } }
- };
- bool Is64Bit = RetVT == MVT::i64;
- unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
- const TargetRegisterClass *RC = nullptr;
- if (SetFlags)
- RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- else
- RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
- unsigned ResultReg;
- if (WantResult)
- ResultReg = createResultReg(RC);
- else
- ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
- const MCInstrDesc &II = TII.get(Opc);
- LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
- RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
- .addReg(LHSReg)
- .addReg(RHSReg)
- .addImm(getArithExtendImm(ExtType, ShiftImm));
- return ResultReg;
- }
- bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
- Type *Ty = LHS->getType();
- EVT EVT = TLI.getValueType(DL, Ty, true);
- if (!EVT.isSimple())
- return false;
- MVT VT = EVT.getSimpleVT();
- switch (VT.SimpleTy) {
- default:
- return false;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- case MVT::i64:
- return emitICmp(VT, LHS, RHS, IsZExt);
- case MVT::f32:
- case MVT::f64:
- return emitFCmp(VT, LHS, RHS);
- }
- }
- bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
- bool IsZExt) {
- return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
- IsZExt) != 0;
- }
- bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
- return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
- /*SetFlags=*/true, /*WantResult=*/false) != 0;
- }
- bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
- if (RetVT != MVT::f32 && RetVT != MVT::f64)
- return false;
- // Check to see if the 2nd operand is a constant that we can encode directly
- // in the compare.
- bool UseImm = false;
- if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
- if (CFP->isZero() && !CFP->isNegative())
- UseImm = true;
- Register LHSReg = getRegForValue(LHS);
- if (!LHSReg)
- return false;
- if (UseImm) {
- unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
- .addReg(LHSReg);
- return true;
- }
- Register RHSReg = getRegForValue(RHS);
- if (!RHSReg)
- return false;
- unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
- .addReg(LHSReg)
- .addReg(RHSReg);
- return true;
- }
- unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
- bool SetFlags, bool WantResult, bool IsZExt) {
- return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
- IsZExt);
- }
- /// This method is a wrapper to simplify add emission.
- ///
- /// First try to emit an add with an immediate operand using emitAddSub_ri. If
- /// that fails, then try to materialize the immediate into a register and use
- /// emitAddSub_rr instead.
- unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
- unsigned ResultReg;
- if (Imm < 0)
- ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
- else
- ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
- if (ResultReg)
- return ResultReg;
- unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
- if (!CReg)
- return 0;
- ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
- return ResultReg;
- }
- unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
- bool SetFlags, bool WantResult, bool IsZExt) {
- return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
- IsZExt);
- }
- unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
- unsigned RHSReg, bool WantResult) {
- return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
- /*SetFlags=*/true, WantResult);
- }
- unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
- unsigned RHSReg,
- AArch64_AM::ShiftExtendType ShiftType,
- uint64_t ShiftImm, bool WantResult) {
- return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
- ShiftImm, /*SetFlags=*/true, WantResult);
- }
- unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
- const Value *LHS, const Value *RHS) {
- // Canonicalize immediates to the RHS first.
- if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
- std::swap(LHS, RHS);
- // Canonicalize mul by power-of-2 to the RHS.
- if (LHS->hasOneUse() && isValueAvailable(LHS))
- if (isMulPowOf2(LHS))
- std::swap(LHS, RHS);
- // Canonicalize shift immediate to the RHS.
- if (LHS->hasOneUse() && isValueAvailable(LHS))
- if (const auto *SI = dyn_cast<ShlOperator>(LHS))
- if (isa<ConstantInt>(SI->getOperand(1)))
- std::swap(LHS, RHS);
- Register LHSReg = getRegForValue(LHS);
- if (!LHSReg)
- return 0;
- unsigned ResultReg = 0;
- if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
- uint64_t Imm = C->getZExtValue();
- ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
- }
- if (ResultReg)
- return ResultReg;
- // Check if the mul can be folded into the instruction.
- if (RHS->hasOneUse() && isValueAvailable(RHS)) {
- if (isMulPowOf2(RHS)) {
- const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
- const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
- if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
- if (C->getValue().isPowerOf2())
- std::swap(MulLHS, MulRHS);
- assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
- uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
- Register RHSReg = getRegForValue(MulLHS);
- if (!RHSReg)
- return 0;
- ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
- if (ResultReg)
- return ResultReg;
- }
- }
- // Check if the shift can be folded into the instruction.
- if (RHS->hasOneUse() && isValueAvailable(RHS)) {
- if (const auto *SI = dyn_cast<ShlOperator>(RHS))
- if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
- uint64_t ShiftVal = C->getZExtValue();
- Register RHSReg = getRegForValue(SI->getOperand(0));
- if (!RHSReg)
- return 0;
- ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
- if (ResultReg)
- return ResultReg;
- }
- }
- Register RHSReg = getRegForValue(RHS);
- if (!RHSReg)
- return 0;
- MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
- ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
- if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
- uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
- ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
- }
- return ResultReg;
- }
- unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
- unsigned LHSReg, uint64_t Imm) {
- static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
- "ISD nodes are not consecutive!");
- static const unsigned OpcTable[3][2] = {
- { AArch64::ANDWri, AArch64::ANDXri },
- { AArch64::ORRWri, AArch64::ORRXri },
- { AArch64::EORWri, AArch64::EORXri }
- };
- const TargetRegisterClass *RC;
- unsigned Opc;
- unsigned RegSize;
- switch (RetVT.SimpleTy) {
- default:
- return 0;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32: {
- unsigned Idx = ISDOpc - ISD::AND;
- Opc = OpcTable[Idx][0];
- RC = &AArch64::GPR32spRegClass;
- RegSize = 32;
- break;
- }
- case MVT::i64:
- Opc = OpcTable[ISDOpc - ISD::AND][1];
- RC = &AArch64::GPR64spRegClass;
- RegSize = 64;
- break;
- }
- if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
- return 0;
- Register ResultReg =
- fastEmitInst_ri(Opc, RC, LHSReg,
- AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
- if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
- uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
- ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
- }
- return ResultReg;
- }
- unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
- unsigned LHSReg, unsigned RHSReg,
- uint64_t ShiftImm) {
- static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
- "ISD nodes are not consecutive!");
- static const unsigned OpcTable[3][2] = {
- { AArch64::ANDWrs, AArch64::ANDXrs },
- { AArch64::ORRWrs, AArch64::ORRXrs },
- { AArch64::EORWrs, AArch64::EORXrs }
- };
- // Don't deal with undefined shifts.
- if (ShiftImm >= RetVT.getSizeInBits())
- return 0;
- const TargetRegisterClass *RC;
- unsigned Opc;
- switch (RetVT.SimpleTy) {
- default:
- return 0;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- Opc = OpcTable[ISDOpc - ISD::AND][0];
- RC = &AArch64::GPR32RegClass;
- break;
- case MVT::i64:
- Opc = OpcTable[ISDOpc - ISD::AND][1];
- RC = &AArch64::GPR64RegClass;
- break;
- }
- Register ResultReg =
- fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
- AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
- if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
- uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
- ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
- }
- return ResultReg;
- }
- unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
- uint64_t Imm) {
- return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
- }
- unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
- bool WantZExt, MachineMemOperand *MMO) {
- if (!TLI.allowsMisalignedMemoryAccesses(VT))
- return 0;
- // Simplify this down to something we can handle.
- if (!simplifyAddress(Addr, VT))
- return 0;
- unsigned ScaleFactor = getImplicitScaleFactor(VT);
- if (!ScaleFactor)
- llvm_unreachable("Unexpected value type.");
- // Negative offsets require unscaled, 9-bit, signed immediate offsets.
- // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
- bool UseScaled = true;
- if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
- UseScaled = false;
- ScaleFactor = 1;
- }
- static const unsigned GPOpcTable[2][8][4] = {
- // Sign-extend.
- { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
- AArch64::LDURXi },
- { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
- AArch64::LDURXi },
- { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
- AArch64::LDRXui },
- { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
- AArch64::LDRXui },
- { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
- AArch64::LDRXroX },
- { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
- AArch64::LDRXroX },
- { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
- AArch64::LDRXroW },
- { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
- AArch64::LDRXroW }
- },
- // Zero-extend.
- { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
- AArch64::LDURXi },
- { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
- AArch64::LDURXi },
- { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
- AArch64::LDRXui },
- { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
- AArch64::LDRXui },
- { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
- AArch64::LDRXroX },
- { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
- AArch64::LDRXroX },
- { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
- AArch64::LDRXroW },
- { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
- AArch64::LDRXroW }
- }
- };
- static const unsigned FPOpcTable[4][2] = {
- { AArch64::LDURSi, AArch64::LDURDi },
- { AArch64::LDRSui, AArch64::LDRDui },
- { AArch64::LDRSroX, AArch64::LDRDroX },
- { AArch64::LDRSroW, AArch64::LDRDroW }
- };
- unsigned Opc;
- const TargetRegisterClass *RC;
- bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
- Addr.getOffsetReg();
- unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
- if (Addr.getExtendType() == AArch64_AM::UXTW ||
- Addr.getExtendType() == AArch64_AM::SXTW)
- Idx++;
- bool IsRet64Bit = RetVT == MVT::i64;
- switch (VT.SimpleTy) {
- default:
- llvm_unreachable("Unexpected value type.");
- case MVT::i1: // Intentional fall-through.
- case MVT::i8:
- Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
- RC = (IsRet64Bit && !WantZExt) ?
- &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
- break;
- case MVT::i16:
- Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
- RC = (IsRet64Bit && !WantZExt) ?
- &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
- break;
- case MVT::i32:
- Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
- RC = (IsRet64Bit && !WantZExt) ?
- &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
- break;
- case MVT::i64:
- Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
- RC = &AArch64::GPR64RegClass;
- break;
- case MVT::f32:
- Opc = FPOpcTable[Idx][0];
- RC = &AArch64::FPR32RegClass;
- break;
- case MVT::f64:
- Opc = FPOpcTable[Idx][1];
- RC = &AArch64::FPR64RegClass;
- break;
- }
- // Create the base instruction, then add the operands.
- Register ResultReg = createResultReg(RC);
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Opc), ResultReg);
- addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
- // Loading an i1 requires special handling.
- if (VT == MVT::i1) {
- unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
- assert(ANDReg && "Unexpected AND instruction emission failure.");
- ResultReg = ANDReg;
- }
- // For zero-extending loads to 64bit we emit a 32bit load and then convert
- // the 32bit reg to a 64bit reg.
- if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
- Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::SUBREG_TO_REG), Reg64)
- .addImm(0)
- .addReg(ResultReg, getKillRegState(true))
- .addImm(AArch64::sub_32);
- ResultReg = Reg64;
- }
- return ResultReg;
- }
- bool AArch64FastISel::selectAddSub(const Instruction *I) {
- MVT VT;
- if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
- return false;
- if (VT.isVector())
- return selectOperator(I, I->getOpcode());
- unsigned ResultReg;
- switch (I->getOpcode()) {
- default:
- llvm_unreachable("Unexpected instruction.");
- case Instruction::Add:
- ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
- break;
- case Instruction::Sub:
- ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
- break;
- }
- if (!ResultReg)
- return false;
- updateValueMap(I, ResultReg);
- return true;
- }
- bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
- MVT VT;
- if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
- return false;
- if (VT.isVector())
- return selectOperator(I, I->getOpcode());
- unsigned ResultReg;
- switch (I->getOpcode()) {
- default:
- llvm_unreachable("Unexpected instruction.");
- case Instruction::And:
- ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
- break;
- case Instruction::Or:
- ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
- break;
- case Instruction::Xor:
- ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
- break;
- }
- if (!ResultReg)
- return false;
- updateValueMap(I, ResultReg);
- return true;
- }
- bool AArch64FastISel::selectLoad(const Instruction *I) {
- MVT VT;
- // Verify we have a legal type before going any further. Currently, we handle
- // simple types that will directly fit in a register (i32/f32/i64/f64) or
- // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
- if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
- cast<LoadInst>(I)->isAtomic())
- return false;
- const Value *SV = I->getOperand(0);
- if (TLI.supportSwiftError()) {
- // Swifterror values can come from either a function parameter with
- // swifterror attribute or an alloca with swifterror attribute.
- if (const Argument *Arg = dyn_cast<Argument>(SV)) {
- if (Arg->hasSwiftErrorAttr())
- return false;
- }
- if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
- if (Alloca->isSwiftError())
- return false;
- }
- }
- // See if we can handle this address.
- Address Addr;
- if (!computeAddress(I->getOperand(0), Addr, I->getType()))
- return false;
- // Fold the following sign-/zero-extend into the load instruction.
- bool WantZExt = true;
- MVT RetVT = VT;
- const Value *IntExtVal = nullptr;
- if (I->hasOneUse()) {
- if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
- if (isTypeSupported(ZE->getType(), RetVT))
- IntExtVal = ZE;
- else
- RetVT = VT;
- } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
- if (isTypeSupported(SE->getType(), RetVT))
- IntExtVal = SE;
- else
- RetVT = VT;
- WantZExt = false;
- }
- }
- unsigned ResultReg =
- emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
- if (!ResultReg)
- return false;
- // There are a few different cases we have to handle, because the load or the
- // sign-/zero-extend might not be selected by FastISel if we fall-back to
- // SelectionDAG. There is also an ordering issue when both instructions are in
- // different basic blocks.
- // 1.) The load instruction is selected by FastISel, but the integer extend
- // not. This usually happens when the integer extend is in a different
- // basic block and SelectionDAG took over for that basic block.
- // 2.) The load instruction is selected before the integer extend. This only
- // happens when the integer extend is in a different basic block.
- // 3.) The load instruction is selected by SelectionDAG and the integer extend
- // by FastISel. This happens if there are instructions between the load
- // and the integer extend that couldn't be selected by FastISel.
- if (IntExtVal) {
- // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
- // could select it. Emit a copy to subreg if necessary. FastISel will remove
- // it when it selects the integer extend.
- Register Reg = lookUpRegForValue(IntExtVal);
- auto *MI = MRI.getUniqueVRegDef(Reg);
- if (!MI) {
- if (RetVT == MVT::i64 && VT <= MVT::i32) {
- if (WantZExt) {
- // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
- MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
- ResultReg = std::prev(I)->getOperand(0).getReg();
- removeDeadCode(I, std::next(I));
- } else
- ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
- AArch64::sub_32);
- }
- updateValueMap(I, ResultReg);
- return true;
- }
- // The integer extend has already been emitted - delete all the instructions
- // that have been emitted by the integer extend lowering code and use the
- // result from the load instruction directly.
- while (MI) {
- Reg = 0;
- for (auto &Opnd : MI->uses()) {
- if (Opnd.isReg()) {
- Reg = Opnd.getReg();
- break;
- }
- }
- MachineBasicBlock::iterator I(MI);
- removeDeadCode(I, std::next(I));
- MI = nullptr;
- if (Reg)
- MI = MRI.getUniqueVRegDef(Reg);
- }
- updateValueMap(IntExtVal, ResultReg);
- return true;
- }
- updateValueMap(I, ResultReg);
- return true;
- }
- bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
- unsigned AddrReg,
- MachineMemOperand *MMO) {
- unsigned Opc;
- switch (VT.SimpleTy) {
- default: return false;
- case MVT::i8: Opc = AArch64::STLRB; break;
- case MVT::i16: Opc = AArch64::STLRH; break;
- case MVT::i32: Opc = AArch64::STLRW; break;
- case MVT::i64: Opc = AArch64::STLRX; break;
- }
- const MCInstrDesc &II = TII.get(Opc);
- SrcReg = constrainOperandRegClass(II, SrcReg, 0);
- AddrReg = constrainOperandRegClass(II, AddrReg, 1);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(SrcReg)
- .addReg(AddrReg)
- .addMemOperand(MMO);
- return true;
- }
- bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
- MachineMemOperand *MMO) {
- if (!TLI.allowsMisalignedMemoryAccesses(VT))
- return false;
- // Simplify this down to something we can handle.
- if (!simplifyAddress(Addr, VT))
- return false;
- unsigned ScaleFactor = getImplicitScaleFactor(VT);
- if (!ScaleFactor)
- llvm_unreachable("Unexpected value type.");
- // Negative offsets require unscaled, 9-bit, signed immediate offsets.
- // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
- bool UseScaled = true;
- if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
- UseScaled = false;
- ScaleFactor = 1;
- }
- static const unsigned OpcTable[4][6] = {
- { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
- AArch64::STURSi, AArch64::STURDi },
- { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
- AArch64::STRSui, AArch64::STRDui },
- { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
- AArch64::STRSroX, AArch64::STRDroX },
- { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
- AArch64::STRSroW, AArch64::STRDroW }
- };
- unsigned Opc;
- bool VTIsi1 = false;
- bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
- Addr.getOffsetReg();
- unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
- if (Addr.getExtendType() == AArch64_AM::UXTW ||
- Addr.getExtendType() == AArch64_AM::SXTW)
- Idx++;
- switch (VT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type.");
- case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH;
- case MVT::i8: Opc = OpcTable[Idx][0]; break;
- case MVT::i16: Opc = OpcTable[Idx][1]; break;
- case MVT::i32: Opc = OpcTable[Idx][2]; break;
- case MVT::i64: Opc = OpcTable[Idx][3]; break;
- case MVT::f32: Opc = OpcTable[Idx][4]; break;
- case MVT::f64: Opc = OpcTable[Idx][5]; break;
- }
- // Storing an i1 requires special handling.
- if (VTIsi1 && SrcReg != AArch64::WZR) {
- unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
- assert(ANDReg && "Unexpected AND instruction emission failure.");
- SrcReg = ANDReg;
- }
- // Create the base instruction, then add the operands.
- const MCInstrDesc &II = TII.get(Opc);
- SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
- MachineInstrBuilder MIB =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
- addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
- return true;
- }
- bool AArch64FastISel::selectStore(const Instruction *I) {
- MVT VT;
- const Value *Op0 = I->getOperand(0);
- // Verify we have a legal type before going any further. Currently, we handle
- // simple types that will directly fit in a register (i32/f32/i64/f64) or
- // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
- if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
- return false;
- const Value *PtrV = I->getOperand(1);
- if (TLI.supportSwiftError()) {
- // Swifterror values can come from either a function parameter with
- // swifterror attribute or an alloca with swifterror attribute.
- if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
- if (Arg->hasSwiftErrorAttr())
- return false;
- }
- if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
- if (Alloca->isSwiftError())
- return false;
- }
- }
- // Get the value to be stored into a register. Use the zero register directly
- // when possible to avoid an unnecessary copy and a wasted register.
- unsigned SrcReg = 0;
- if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
- if (CI->isZero())
- SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
- } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
- if (CF->isZero() && !CF->isNegative()) {
- VT = MVT::getIntegerVT(VT.getSizeInBits());
- SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
- }
- }
- if (!SrcReg)
- SrcReg = getRegForValue(Op0);
- if (!SrcReg)
- return false;
- auto *SI = cast<StoreInst>(I);
- // Try to emit a STLR for seq_cst/release.
- if (SI->isAtomic()) {
- AtomicOrdering Ord = SI->getOrdering();
- // The non-atomic instructions are sufficient for relaxed stores.
- if (isReleaseOrStronger(Ord)) {
- // The STLR addressing mode only supports a base reg; pass that directly.
- Register AddrReg = getRegForValue(PtrV);
- return emitStoreRelease(VT, SrcReg, AddrReg,
- createMachineMemOperandFor(I));
- }
- }
- // See if we can handle this address.
- Address Addr;
- if (!computeAddress(PtrV, Addr, Op0->getType()))
- return false;
- if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
- return false;
- return true;
- }
- static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
- switch (Pred) {
- case CmpInst::FCMP_ONE:
- case CmpInst::FCMP_UEQ:
- default:
- // AL is our "false" for now. The other two need more compares.
- return AArch64CC::AL;
- case CmpInst::ICMP_EQ:
- case CmpInst::FCMP_OEQ:
- return AArch64CC::EQ;
- case CmpInst::ICMP_SGT:
- case CmpInst::FCMP_OGT:
- return AArch64CC::GT;
- case CmpInst::ICMP_SGE:
- case CmpInst::FCMP_OGE:
- return AArch64CC::GE;
- case CmpInst::ICMP_UGT:
- case CmpInst::FCMP_UGT:
- return AArch64CC::HI;
- case CmpInst::FCMP_OLT:
- return AArch64CC::MI;
- case CmpInst::ICMP_ULE:
- case CmpInst::FCMP_OLE:
- return AArch64CC::LS;
- case CmpInst::FCMP_ORD:
- return AArch64CC::VC;
- case CmpInst::FCMP_UNO:
- return AArch64CC::VS;
- case CmpInst::FCMP_UGE:
- return AArch64CC::PL;
- case CmpInst::ICMP_SLT:
- case CmpInst::FCMP_ULT:
- return AArch64CC::LT;
- case CmpInst::ICMP_SLE:
- case CmpInst::FCMP_ULE:
- return AArch64CC::LE;
- case CmpInst::FCMP_UNE:
- case CmpInst::ICMP_NE:
- return AArch64CC::NE;
- case CmpInst::ICMP_UGE:
- return AArch64CC::HS;
- case CmpInst::ICMP_ULT:
- return AArch64CC::LO;
- }
- }
- /// Try to emit a combined compare-and-branch instruction.
- bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
- // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
- // will not be produced, as they are conditional branch instructions that do
- // not set flags.
- if (FuncInfo.MF->getFunction().hasFnAttribute(
- Attribute::SpeculativeLoadHardening))
- return false;
- assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
- const CmpInst *CI = cast<CmpInst>(BI->getCondition());
- CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
- const Value *LHS = CI->getOperand(0);
- const Value *RHS = CI->getOperand(1);
- MVT VT;
- if (!isTypeSupported(LHS->getType(), VT))
- return false;
- unsigned BW = VT.getSizeInBits();
- if (BW > 64)
- return false;
- MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
- MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
- // Try to take advantage of fallthrough opportunities.
- if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
- std::swap(TBB, FBB);
- Predicate = CmpInst::getInversePredicate(Predicate);
- }
- int TestBit = -1;
- bool IsCmpNE;
- switch (Predicate) {
- default:
- return false;
- case CmpInst::ICMP_EQ:
- case CmpInst::ICMP_NE:
- if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
- std::swap(LHS, RHS);
- if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
- return false;
- if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
- if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
- const Value *AndLHS = AI->getOperand(0);
- const Value *AndRHS = AI->getOperand(1);
- if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
- if (C->getValue().isPowerOf2())
- std::swap(AndLHS, AndRHS);
- if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
- if (C->getValue().isPowerOf2()) {
- TestBit = C->getValue().logBase2();
- LHS = AndLHS;
- }
- }
- if (VT == MVT::i1)
- TestBit = 0;
- IsCmpNE = Predicate == CmpInst::ICMP_NE;
- break;
- case CmpInst::ICMP_SLT:
- case CmpInst::ICMP_SGE:
- if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
- return false;
- TestBit = BW - 1;
- IsCmpNE = Predicate == CmpInst::ICMP_SLT;
- break;
- case CmpInst::ICMP_SGT:
- case CmpInst::ICMP_SLE:
- if (!isa<ConstantInt>(RHS))
- return false;
- if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
- return false;
- TestBit = BW - 1;
- IsCmpNE = Predicate == CmpInst::ICMP_SLE;
- break;
- } // end switch
- static const unsigned OpcTable[2][2][2] = {
- { {AArch64::CBZW, AArch64::CBZX },
- {AArch64::CBNZW, AArch64::CBNZX} },
- { {AArch64::TBZW, AArch64::TBZX },
- {AArch64::TBNZW, AArch64::TBNZX} }
- };
- bool IsBitTest = TestBit != -1;
- bool Is64Bit = BW == 64;
- if (TestBit < 32 && TestBit >= 0)
- Is64Bit = false;
- unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
- const MCInstrDesc &II = TII.get(Opc);
- Register SrcReg = getRegForValue(LHS);
- if (!SrcReg)
- return false;
- if (BW == 64 && !Is64Bit)
- SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
- if ((BW < 32) && !IsBitTest)
- SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
- // Emit the combined compare and branch instruction.
- SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
- MachineInstrBuilder MIB =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
- .addReg(SrcReg);
- if (IsBitTest)
- MIB.addImm(TestBit);
- MIB.addMBB(TBB);
- finishCondBranch(BI->getParent(), TBB, FBB);
- return true;
- }
- bool AArch64FastISel::selectBranch(const Instruction *I) {
- const BranchInst *BI = cast<BranchInst>(I);
- if (BI->isUnconditional()) {
- MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
- fastEmitBranch(MSucc, BI->getDebugLoc());
- return true;
- }
- MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
- MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
- if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
- if (CI->hasOneUse() && isValueAvailable(CI)) {
- // Try to optimize or fold the cmp.
- CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
- switch (Predicate) {
- default:
- break;
- case CmpInst::FCMP_FALSE:
- fastEmitBranch(FBB, DbgLoc);
- return true;
- case CmpInst::FCMP_TRUE:
- fastEmitBranch(TBB, DbgLoc);
- return true;
- }
- // Try to emit a combined compare-and-branch first.
- if (emitCompareAndBranch(BI))
- return true;
- // Try to take advantage of fallthrough opportunities.
- if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
- std::swap(TBB, FBB);
- Predicate = CmpInst::getInversePredicate(Predicate);
- }
- // Emit the cmp.
- if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
- return false;
- // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
- // instruction.
- AArch64CC::CondCode CC = getCompareCC(Predicate);
- AArch64CC::CondCode ExtraCC = AArch64CC::AL;
- switch (Predicate) {
- default:
- break;
- case CmpInst::FCMP_UEQ:
- ExtraCC = AArch64CC::EQ;
- CC = AArch64CC::VS;
- break;
- case CmpInst::FCMP_ONE:
- ExtraCC = AArch64CC::MI;
- CC = AArch64CC::GT;
- break;
- }
- assert((CC != AArch64CC::AL) && "Unexpected condition code.");
- // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
- if (ExtraCC != AArch64CC::AL) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
- .addImm(ExtraCC)
- .addMBB(TBB);
- }
- // Emit the branch.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
- .addImm(CC)
- .addMBB(TBB);
- finishCondBranch(BI->getParent(), TBB, FBB);
- return true;
- }
- } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
- uint64_t Imm = CI->getZExtValue();
- MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
- .addMBB(Target);
- // Obtain the branch probability and add the target to the successor list.
- if (FuncInfo.BPI) {
- auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
- BI->getParent(), Target->getBasicBlock());
- FuncInfo.MBB->addSuccessor(Target, BranchProbability);
- } else
- FuncInfo.MBB->addSuccessorWithoutProb(Target);
- return true;
- } else {
- AArch64CC::CondCode CC = AArch64CC::NE;
- if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
- // Fake request the condition, otherwise the intrinsic might be completely
- // optimized away.
- Register CondReg = getRegForValue(BI->getCondition());
- if (!CondReg)
- return false;
- // Emit the branch.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
- .addImm(CC)
- .addMBB(TBB);
- finishCondBranch(BI->getParent(), TBB, FBB);
- return true;
- }
- }
- Register CondReg = getRegForValue(BI->getCondition());
- if (CondReg == 0)
- return false;
- // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
- unsigned Opcode = AArch64::TBNZW;
- if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
- std::swap(TBB, FBB);
- Opcode = AArch64::TBZW;
- }
- const MCInstrDesc &II = TII.get(Opcode);
- Register ConstrainedCondReg
- = constrainOperandRegClass(II, CondReg, II.getNumDefs());
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(ConstrainedCondReg)
- .addImm(0)
- .addMBB(TBB);
- finishCondBranch(BI->getParent(), TBB, FBB);
- return true;
- }
- bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
- const IndirectBrInst *BI = cast<IndirectBrInst>(I);
- Register AddrReg = getRegForValue(BI->getOperand(0));
- if (AddrReg == 0)
- return false;
- // Emit the indirect branch.
- const MCInstrDesc &II = TII.get(AArch64::BR);
- AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
- // Make sure the CFG is up-to-date.
- for (auto *Succ : BI->successors())
- FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
- return true;
- }
- bool AArch64FastISel::selectCmp(const Instruction *I) {
- const CmpInst *CI = cast<CmpInst>(I);
- // Vectors of i1 are weird: bail out.
- if (CI->getType()->isVectorTy())
- return false;
- // Try to optimize or fold the cmp.
- CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
- unsigned ResultReg = 0;
- switch (Predicate) {
- default:
- break;
- case CmpInst::FCMP_FALSE:
- ResultReg = createResultReg(&AArch64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(AArch64::WZR, getKillRegState(true));
- break;
- case CmpInst::FCMP_TRUE:
- ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
- break;
- }
- if (ResultReg) {
- updateValueMap(I, ResultReg);
- return true;
- }
- // Emit the cmp.
- if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
- return false;
- ResultReg = createResultReg(&AArch64::GPR32RegClass);
- // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
- // condition codes are inverted, because they are used by CSINC.
- static unsigned CondCodeTable[2][2] = {
- { AArch64CC::NE, AArch64CC::VC },
- { AArch64CC::PL, AArch64CC::LE }
- };
- unsigned *CondCodes = nullptr;
- switch (Predicate) {
- default:
- break;
- case CmpInst::FCMP_UEQ:
- CondCodes = &CondCodeTable[0][0];
- break;
- case CmpInst::FCMP_ONE:
- CondCodes = &CondCodeTable[1][0];
- break;
- }
- if (CondCodes) {
- Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
- TmpReg1)
- .addReg(AArch64::WZR, getKillRegState(true))
- .addReg(AArch64::WZR, getKillRegState(true))
- .addImm(CondCodes[0]);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
- ResultReg)
- .addReg(TmpReg1, getKillRegState(true))
- .addReg(AArch64::WZR, getKillRegState(true))
- .addImm(CondCodes[1]);
- updateValueMap(I, ResultReg);
- return true;
- }
- // Now set a register based on the comparison.
- AArch64CC::CondCode CC = getCompareCC(Predicate);
- assert((CC != AArch64CC::AL) && "Unexpected condition code.");
- AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
- ResultReg)
- .addReg(AArch64::WZR, getKillRegState(true))
- .addReg(AArch64::WZR, getKillRegState(true))
- .addImm(invertedCC);
- updateValueMap(I, ResultReg);
- return true;
- }
- /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
- /// value.
- bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
- if (!SI->getType()->isIntegerTy(1))
- return false;
- const Value *Src1Val, *Src2Val;
- unsigned Opc = 0;
- bool NeedExtraOp = false;
- if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
- if (CI->isOne()) {
- Src1Val = SI->getCondition();
- Src2Val = SI->getFalseValue();
- Opc = AArch64::ORRWrr;
- } else {
- assert(CI->isZero());
- Src1Val = SI->getFalseValue();
- Src2Val = SI->getCondition();
- Opc = AArch64::BICWrr;
- }
- } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
- if (CI->isOne()) {
- Src1Val = SI->getCondition();
- Src2Val = SI->getTrueValue();
- Opc = AArch64::ORRWrr;
- NeedExtraOp = true;
- } else {
- assert(CI->isZero());
- Src1Val = SI->getCondition();
- Src2Val = SI->getTrueValue();
- Opc = AArch64::ANDWrr;
- }
- }
- if (!Opc)
- return false;
- Register Src1Reg = getRegForValue(Src1Val);
- if (!Src1Reg)
- return false;
- Register Src2Reg = getRegForValue(Src2Val);
- if (!Src2Reg)
- return false;
- if (NeedExtraOp)
- Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
- Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
- Src2Reg);
- updateValueMap(SI, ResultReg);
- return true;
- }
- bool AArch64FastISel::selectSelect(const Instruction *I) {
- assert(isa<SelectInst>(I) && "Expected a select instruction.");
- MVT VT;
- if (!isTypeSupported(I->getType(), VT))
- return false;
- unsigned Opc;
- const TargetRegisterClass *RC;
- switch (VT.SimpleTy) {
- default:
- return false;
- case MVT::i1:
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- Opc = AArch64::CSELWr;
- RC = &AArch64::GPR32RegClass;
- break;
- case MVT::i64:
- Opc = AArch64::CSELXr;
- RC = &AArch64::GPR64RegClass;
- break;
- case MVT::f32:
- Opc = AArch64::FCSELSrrr;
- RC = &AArch64::FPR32RegClass;
- break;
- case MVT::f64:
- Opc = AArch64::FCSELDrrr;
- RC = &AArch64::FPR64RegClass;
- break;
- }
- const SelectInst *SI = cast<SelectInst>(I);
- const Value *Cond = SI->getCondition();
- AArch64CC::CondCode CC = AArch64CC::NE;
- AArch64CC::CondCode ExtraCC = AArch64CC::AL;
- if (optimizeSelect(SI))
- return true;
- // Try to pickup the flags, so we don't have to emit another compare.
- if (foldXALUIntrinsic(CC, I, Cond)) {
- // Fake request the condition to force emission of the XALU intrinsic.
- Register CondReg = getRegForValue(Cond);
- if (!CondReg)
- return false;
- } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
- isValueAvailable(Cond)) {
- const auto *Cmp = cast<CmpInst>(Cond);
- // Try to optimize or fold the cmp.
- CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
- const Value *FoldSelect = nullptr;
- switch (Predicate) {
- default:
- break;
- case CmpInst::FCMP_FALSE:
- FoldSelect = SI->getFalseValue();
- break;
- case CmpInst::FCMP_TRUE:
- FoldSelect = SI->getTrueValue();
- break;
- }
- if (FoldSelect) {
- Register SrcReg = getRegForValue(FoldSelect);
- if (!SrcReg)
- return false;
- updateValueMap(I, SrcReg);
- return true;
- }
- // Emit the cmp.
- if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
- return false;
- // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
- CC = getCompareCC(Predicate);
- switch (Predicate) {
- default:
- break;
- case CmpInst::FCMP_UEQ:
- ExtraCC = AArch64CC::EQ;
- CC = AArch64CC::VS;
- break;
- case CmpInst::FCMP_ONE:
- ExtraCC = AArch64CC::MI;
- CC = AArch64CC::GT;
- break;
- }
- assert((CC != AArch64CC::AL) && "Unexpected condition code.");
- } else {
- Register CondReg = getRegForValue(Cond);
- if (!CondReg)
- return false;
- const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
- CondReg = constrainOperandRegClass(II, CondReg, 1);
- // Emit a TST instruction (ANDS wzr, reg, #imm).
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
- AArch64::WZR)
- .addReg(CondReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
- }
- Register Src1Reg = getRegForValue(SI->getTrueValue());
- Register Src2Reg = getRegForValue(SI->getFalseValue());
- if (!Src1Reg || !Src2Reg)
- return false;
- if (ExtraCC != AArch64CC::AL)
- Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
- Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
- updateValueMap(I, ResultReg);
- return true;
- }
- bool AArch64FastISel::selectFPExt(const Instruction *I) {
- Value *V = I->getOperand(0);
- if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
- return false;
- Register Op = getRegForValue(V);
- if (Op == 0)
- return false;
- Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
- ResultReg).addReg(Op);
- updateValueMap(I, ResultReg);
- return true;
- }
- bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
- Value *V = I->getOperand(0);
- if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
- return false;
- Register Op = getRegForValue(V);
- if (Op == 0)
- return false;
- Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
- ResultReg).addReg(Op);
- updateValueMap(I, ResultReg);
- return true;
- }
- // FPToUI and FPToSI
- bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
- MVT DestVT;
- if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
- return false;
- Register SrcReg = getRegForValue(I->getOperand(0));
- if (SrcReg == 0)
- return false;
- EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
- if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
- return false;
- unsigned Opc;
- if (SrcVT == MVT::f64) {
- if (Signed)
- Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
- else
- Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
- } else {
- if (Signed)
- Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
- else
- Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
- }
- Register ResultReg = createResultReg(
- DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(SrcReg);
- updateValueMap(I, ResultReg);
- return true;
- }
- bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
- MVT DestVT;
- if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
- return false;
- // Let regular ISEL handle FP16
- if (DestVT == MVT::f16)
- return false;
- assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
- "Unexpected value type.");
- Register SrcReg = getRegForValue(I->getOperand(0));
- if (!SrcReg)
- return false;
- EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
- // Handle sign-extension.
- if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
- SrcReg =
- emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
- if (!SrcReg)
- return false;
- }
- unsigned Opc;
- if (SrcVT == MVT::i64) {
- if (Signed)
- Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
- else
- Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
- } else {
- if (Signed)
- Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
- else
- Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
- }
- Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
- updateValueMap(I, ResultReg);
- return true;
- }
- bool AArch64FastISel::fastLowerArguments() {
- if (!FuncInfo.CanLowerReturn)
- return false;
- const Function *F = FuncInfo.Fn;
- if (F->isVarArg())
- return false;
- CallingConv::ID CC = F->getCallingConv();
- if (CC != CallingConv::C && CC != CallingConv::Swift)
- return false;
- if (Subtarget->hasCustomCallingConv())
- return false;
- // Only handle simple cases of up to 8 GPR and FPR each.
- unsigned GPRCnt = 0;
- unsigned FPRCnt = 0;
- for (auto const &Arg : F->args()) {
- if (Arg.hasAttribute(Attribute::ByVal) ||
- Arg.hasAttribute(Attribute::InReg) ||
- Arg.hasAttribute(Attribute::StructRet) ||
- Arg.hasAttribute(Attribute::SwiftSelf) ||
- Arg.hasAttribute(Attribute::SwiftAsync) ||
- Arg.hasAttribute(Attribute::SwiftError) ||
- Arg.hasAttribute(Attribute::Nest))
- return false;
- Type *ArgTy = Arg.getType();
- if (ArgTy->isStructTy() || ArgTy->isArrayTy())
- return false;
- EVT ArgVT = TLI.getValueType(DL, ArgTy);
- if (!ArgVT.isSimple())
- return false;
- MVT VT = ArgVT.getSimpleVT().SimpleTy;
- if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
- return false;
- if (VT.isVector() &&
- (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
- return false;
- if (VT >= MVT::i1 && VT <= MVT::i64)
- ++GPRCnt;
- else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
- VT.is128BitVector())
- ++FPRCnt;
- else
- return false;
- if (GPRCnt > 8 || FPRCnt > 8)
- return false;
- }
- static const MCPhysReg Registers[6][8] = {
- { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
- AArch64::W5, AArch64::W6, AArch64::W7 },
- { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
- AArch64::X5, AArch64::X6, AArch64::X7 },
- { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
- AArch64::H5, AArch64::H6, AArch64::H7 },
- { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
- AArch64::S5, AArch64::S6, AArch64::S7 },
- { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
- AArch64::D5, AArch64::D6, AArch64::D7 },
- { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
- AArch64::Q5, AArch64::Q6, AArch64::Q7 }
- };
- unsigned GPRIdx = 0;
- unsigned FPRIdx = 0;
- for (auto const &Arg : F->args()) {
- MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
- unsigned SrcReg;
- const TargetRegisterClass *RC;
- if (VT >= MVT::i1 && VT <= MVT::i32) {
- SrcReg = Registers[0][GPRIdx++];
- RC = &AArch64::GPR32RegClass;
- VT = MVT::i32;
- } else if (VT == MVT::i64) {
- SrcReg = Registers[1][GPRIdx++];
- RC = &AArch64::GPR64RegClass;
- } else if (VT == MVT::f16) {
- SrcReg = Registers[2][FPRIdx++];
- RC = &AArch64::FPR16RegClass;
- } else if (VT == MVT::f32) {
- SrcReg = Registers[3][FPRIdx++];
- RC = &AArch64::FPR32RegClass;
- } else if ((VT == MVT::f64) || VT.is64BitVector()) {
- SrcReg = Registers[4][FPRIdx++];
- RC = &AArch64::FPR64RegClass;
- } else if (VT.is128BitVector()) {
- SrcReg = Registers[5][FPRIdx++];
- RC = &AArch64::FPR128RegClass;
- } else
- llvm_unreachable("Unexpected value type.");
- Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
- // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
- // Without this, EmitLiveInCopies may eliminate the livein if its only
- // use is a bitcast (which isn't turned into an instruction).
- Register ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(DstReg, getKillRegState(true));
- updateValueMap(&Arg, ResultReg);
- }
- return true;
- }
- bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
- SmallVectorImpl<MVT> &OutVTs,
- unsigned &NumBytes) {
- CallingConv::ID CC = CLI.CallConv;
- SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
- CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
- // Get a count of how many bytes are to be pushed on the stack.
- NumBytes = CCInfo.getNextStackOffset();
- // Issue CALLSEQ_START
- unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
- .addImm(NumBytes).addImm(0);
- // Process the args.
- for (CCValAssign &VA : ArgLocs) {
- const Value *ArgVal = CLI.OutVals[VA.getValNo()];
- MVT ArgVT = OutVTs[VA.getValNo()];
- Register ArgReg = getRegForValue(ArgVal);
- if (!ArgReg)
- return false;
- // Handle arg promotion: SExt, ZExt, AExt.
- switch (VA.getLocInfo()) {
- case CCValAssign::Full:
- break;
- case CCValAssign::SExt: {
- MVT DestVT = VA.getLocVT();
- MVT SrcVT = ArgVT;
- ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
- if (!ArgReg)
- return false;
- break;
- }
- case CCValAssign::AExt:
- // Intentional fall-through.
- case CCValAssign::ZExt: {
- MVT DestVT = VA.getLocVT();
- MVT SrcVT = ArgVT;
- ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
- if (!ArgReg)
- return false;
- break;
- }
- default:
- llvm_unreachable("Unknown arg promotion!");
- }
- // Now copy/store arg to correct locations.
- if (VA.isRegLoc() && !VA.needsCustom()) {
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
- CLI.OutRegs.push_back(VA.getLocReg());
- } else if (VA.needsCustom()) {
- // FIXME: Handle custom args.
- return false;
- } else {
- assert(VA.isMemLoc() && "Assuming store on stack.");
- // Don't emit stores for undef values.
- if (isa<UndefValue>(ArgVal))
- continue;
- // Need to store on the stack.
- unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
- unsigned BEAlign = 0;
- if (ArgSize < 8 && !Subtarget->isLittleEndian())
- BEAlign = 8 - ArgSize;
- Address Addr;
- Addr.setKind(Address::RegBase);
- Addr.setReg(AArch64::SP);
- Addr.setOffset(VA.getLocMemOffset() + BEAlign);
- Align Alignment = DL.getABITypeAlign(ArgVal->getType());
- MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
- MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
- MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
- if (!emitStore(ArgVT, ArgReg, Addr, MMO))
- return false;
- }
- }
- return true;
- }
- bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
- unsigned NumBytes) {
- CallingConv::ID CC = CLI.CallConv;
- // Issue CALLSEQ_END
- unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
- .addImm(NumBytes).addImm(0);
- // Now the return value.
- if (RetVT != MVT::isVoid) {
- SmallVector<CCValAssign, 16> RVLocs;
- CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
- CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
- // Only handle a single return value.
- if (RVLocs.size() != 1)
- return false;
- // Copy all of the result registers out of their specified physreg.
- MVT CopyVT = RVLocs[0].getValVT();
- // TODO: Handle big-endian results
- if (CopyVT.isVector() && !Subtarget->isLittleEndian())
- return false;
- Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(RVLocs[0].getLocReg());
- CLI.InRegs.push_back(RVLocs[0].getLocReg());
- CLI.ResultReg = ResultReg;
- CLI.NumResultRegs = 1;
- }
- return true;
- }
- bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
- CallingConv::ID CC = CLI.CallConv;
- bool IsTailCall = CLI.IsTailCall;
- bool IsVarArg = CLI.IsVarArg;
- const Value *Callee = CLI.Callee;
- MCSymbol *Symbol = CLI.Symbol;
- if (!Callee && !Symbol)
- return false;
- // Allow SelectionDAG isel to handle calls to functions like setjmp that need
- // a bti instruction following the call.
- if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
- !Subtarget->noBTIAtReturnTwice() &&
- MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
- return false;
- // Allow SelectionDAG isel to handle tail calls.
- if (IsTailCall)
- return false;
- // FIXME: we could and should support this, but for now correctness at -O0 is
- // more important.
- if (Subtarget->isTargetILP32())
- return false;
- CodeModel::Model CM = TM.getCodeModel();
- // Only support the small-addressing and large code models.
- if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
- return false;
- // FIXME: Add large code model support for ELF.
- if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
- return false;
- // Let SDISel handle vararg functions.
- if (IsVarArg)
- return false;
- // FIXME: Only handle *simple* calls for now.
- MVT RetVT;
- if (CLI.RetTy->isVoidTy())
- RetVT = MVT::isVoid;
- else if (!isTypeLegal(CLI.RetTy, RetVT))
- return false;
- for (auto Flag : CLI.OutFlags)
- if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
- Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
- return false;
- // Set up the argument vectors.
- SmallVector<MVT, 16> OutVTs;
- OutVTs.reserve(CLI.OutVals.size());
- for (auto *Val : CLI.OutVals) {
- MVT VT;
- if (!isTypeLegal(Val->getType(), VT) &&
- !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
- return false;
- // We don't handle vector parameters yet.
- if (VT.isVector() || VT.getSizeInBits() > 64)
- return false;
- OutVTs.push_back(VT);
- }
- Address Addr;
- if (Callee && !computeCallAddress(Callee, Addr))
- return false;
- // The weak function target may be zero; in that case we must use indirect
- // addressing via a stub on windows as it may be out of range for a
- // PC-relative jump.
- if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
- Addr.getGlobalValue()->hasExternalWeakLinkage())
- return false;
- // Handle the arguments now that we've gotten them.
- unsigned NumBytes;
- if (!processCallArgs(CLI, OutVTs, NumBytes))
- return false;
- const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
- if (RegInfo->isAnyArgRegReserved(*MF))
- RegInfo->emitReservedArgRegCallError(*MF);
- // Issue the call.
- MachineInstrBuilder MIB;
- if (Subtarget->useSmallAddressing()) {
- const MCInstrDesc &II =
- TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
- if (Symbol)
- MIB.addSym(Symbol, 0);
- else if (Addr.getGlobalValue())
- MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
- else if (Addr.getReg()) {
- Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
- MIB.addReg(Reg);
- } else
- return false;
- } else {
- unsigned CallReg = 0;
- if (Symbol) {
- Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
- ADRPReg)
- .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
- CallReg = createResultReg(&AArch64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::LDRXui), CallReg)
- .addReg(ADRPReg)
- .addSym(Symbol,
- AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
- } else if (Addr.getGlobalValue())
- CallReg = materializeGV(Addr.getGlobalValue());
- else if (Addr.getReg())
- CallReg = Addr.getReg();
- if (!CallReg)
- return false;
- const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
- CallReg = constrainOperandRegClass(II, CallReg, 0);
- MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
- }
- // Add implicit physical register uses to the call.
- for (auto Reg : CLI.OutRegs)
- MIB.addReg(Reg, RegState::Implicit);
- // Add a register mask with the call-preserved registers.
- // Proper defs for return values will be added by setPhysRegsDeadExcept().
- MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
- CLI.Call = MIB;
- // Finish off the call including any return values.
- return finishCall(CLI, RetVT, NumBytes);
- }
- bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
- if (Alignment)
- return Len / Alignment <= 4;
- else
- return Len < 32;
- }
- bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
- uint64_t Len, unsigned Alignment) {
- // Make sure we don't bloat code by inlining very large memcpy's.
- if (!isMemCpySmall(Len, Alignment))
- return false;
- int64_t UnscaledOffset = 0;
- Address OrigDest = Dest;
- Address OrigSrc = Src;
- while (Len) {
- MVT VT;
- if (!Alignment || Alignment >= 8) {
- if (Len >= 8)
- VT = MVT::i64;
- else if (Len >= 4)
- VT = MVT::i32;
- else if (Len >= 2)
- VT = MVT::i16;
- else {
- VT = MVT::i8;
- }
- } else {
- // Bound based on alignment.
- if (Len >= 4 && Alignment == 4)
- VT = MVT::i32;
- else if (Len >= 2 && Alignment == 2)
- VT = MVT::i16;
- else {
- VT = MVT::i8;
- }
- }
- unsigned ResultReg = emitLoad(VT, VT, Src);
- if (!ResultReg)
- return false;
- if (!emitStore(VT, ResultReg, Dest))
- return false;
- int64_t Size = VT.getSizeInBits() / 8;
- Len -= Size;
- UnscaledOffset += Size;
- // We need to recompute the unscaled offset for each iteration.
- Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
- Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
- }
- return true;
- }
- /// Check if it is possible to fold the condition from the XALU intrinsic
- /// into the user. The condition code will only be updated on success.
- bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
- const Instruction *I,
- const Value *Cond) {
- if (!isa<ExtractValueInst>(Cond))
- return false;
- const auto *EV = cast<ExtractValueInst>(Cond);
- if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
- return false;
- const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
- MVT RetVT;
- const Function *Callee = II->getCalledFunction();
- Type *RetTy =
- cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
- if (!isTypeLegal(RetTy, RetVT))
- return false;
- if (RetVT != MVT::i32 && RetVT != MVT::i64)
- return false;
- const Value *LHS = II->getArgOperand(0);
- const Value *RHS = II->getArgOperand(1);
- // Canonicalize immediate to the RHS.
- if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
- std::swap(LHS, RHS);
- // Simplify multiplies.
- Intrinsic::ID IID = II->getIntrinsicID();
- switch (IID) {
- default:
- break;
- case Intrinsic::smul_with_overflow:
- if (const auto *C = dyn_cast<ConstantInt>(RHS))
- if (C->getValue() == 2)
- IID = Intrinsic::sadd_with_overflow;
- break;
- case Intrinsic::umul_with_overflow:
- if (const auto *C = dyn_cast<ConstantInt>(RHS))
- if (C->getValue() == 2)
- IID = Intrinsic::uadd_with_overflow;
- break;
- }
- AArch64CC::CondCode TmpCC;
- switch (IID) {
- default:
- return false;
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::ssub_with_overflow:
- TmpCC = AArch64CC::VS;
- break;
- case Intrinsic::uadd_with_overflow:
- TmpCC = AArch64CC::HS;
- break;
- case Intrinsic::usub_with_overflow:
- TmpCC = AArch64CC::LO;
- break;
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow:
- TmpCC = AArch64CC::NE;
- break;
- }
- // Check if both instructions are in the same basic block.
- if (!isValueAvailable(II))
- return false;
- // Make sure nothing is in the way
- BasicBlock::const_iterator Start(I);
- BasicBlock::const_iterator End(II);
- for (auto Itr = std::prev(Start); Itr != End; --Itr) {
- // We only expect extractvalue instructions between the intrinsic and the
- // instruction to be selected.
- if (!isa<ExtractValueInst>(Itr))
- return false;
- // Check that the extractvalue operand comes from the intrinsic.
- const auto *EVI = cast<ExtractValueInst>(Itr);
- if (EVI->getAggregateOperand() != II)
- return false;
- }
- CC = TmpCC;
- return true;
- }
- bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
- // FIXME: Handle more intrinsics.
- switch (II->getIntrinsicID()) {
- default: return false;
- case Intrinsic::frameaddress: {
- MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
- MFI.setFrameAddressIsTaken(true);
- const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
- Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
- Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
- // Recursively load frame address
- // ldr x0, [fp]
- // ldr x0, [x0]
- // ldr x0, [x0]
- // ...
- unsigned DestReg;
- unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
- while (Depth--) {
- DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
- SrcReg, 0);
- assert(DestReg && "Unexpected LDR instruction emission failure.");
- SrcReg = DestReg;
- }
- updateValueMap(II, SrcReg);
- return true;
- }
- case Intrinsic::sponentry: {
- MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
- // SP = FP + Fixed Object + 16
- int FI = MFI.CreateFixedObject(4, 0, false);
- Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::ADDXri), ResultReg)
- .addFrameIndex(FI)
- .addImm(0)
- .addImm(0);
- updateValueMap(II, ResultReg);
- return true;
- }
- case Intrinsic::memcpy:
- case Intrinsic::memmove: {
- const auto *MTI = cast<MemTransferInst>(II);
- // Don't handle volatile.
- if (MTI->isVolatile())
- return false;
- // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
- // we would emit dead code because we don't currently handle memmoves.
- bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
- if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
- // Small memcpy's are common enough that we want to do them without a call
- // if possible.
- uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
- unsigned Alignment = MinAlign(MTI->getDestAlignment(),
- MTI->getSourceAlignment());
- if (isMemCpySmall(Len, Alignment)) {
- Address Dest, Src;
- if (!computeAddress(MTI->getRawDest(), Dest) ||
- !computeAddress(MTI->getRawSource(), Src))
- return false;
- if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
- return true;
- }
- }
- if (!MTI->getLength()->getType()->isIntegerTy(64))
- return false;
- if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
- // Fast instruction selection doesn't support the special
- // address spaces.
- return false;
- const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
- return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
- }
- case Intrinsic::memset: {
- const MemSetInst *MSI = cast<MemSetInst>(II);
- // Don't handle volatile.
- if (MSI->isVolatile())
- return false;
- if (!MSI->getLength()->getType()->isIntegerTy(64))
- return false;
- if (MSI->getDestAddressSpace() > 255)
- // Fast instruction selection doesn't support the special
- // address spaces.
- return false;
- return lowerCallTo(II, "memset", II->arg_size() - 1);
- }
- case Intrinsic::sin:
- case Intrinsic::cos:
- case Intrinsic::pow: {
- MVT RetVT;
- if (!isTypeLegal(II->getType(), RetVT))
- return false;
- if (RetVT != MVT::f32 && RetVT != MVT::f64)
- return false;
- static const RTLIB::Libcall LibCallTable[3][2] = {
- { RTLIB::SIN_F32, RTLIB::SIN_F64 },
- { RTLIB::COS_F32, RTLIB::COS_F64 },
- { RTLIB::POW_F32, RTLIB::POW_F64 }
- };
- RTLIB::Libcall LC;
- bool Is64Bit = RetVT == MVT::f64;
- switch (II->getIntrinsicID()) {
- default:
- llvm_unreachable("Unexpected intrinsic.");
- case Intrinsic::sin:
- LC = LibCallTable[0][Is64Bit];
- break;
- case Intrinsic::cos:
- LC = LibCallTable[1][Is64Bit];
- break;
- case Intrinsic::pow:
- LC = LibCallTable[2][Is64Bit];
- break;
- }
- ArgListTy Args;
- Args.reserve(II->arg_size());
- // Populate the argument list.
- for (auto &Arg : II->args()) {
- ArgListEntry Entry;
- Entry.Val = Arg;
- Entry.Ty = Arg->getType();
- Args.push_back(Entry);
- }
- CallLoweringInfo CLI;
- MCContext &Ctx = MF->getContext();
- CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
- TLI.getLibcallName(LC), std::move(Args));
- if (!lowerCallTo(CLI))
- return false;
- updateValueMap(II, CLI.ResultReg);
- return true;
- }
- case Intrinsic::fabs: {
- MVT VT;
- if (!isTypeLegal(II->getType(), VT))
- return false;
- unsigned Opc;
- switch (VT.SimpleTy) {
- default:
- return false;
- case MVT::f32:
- Opc = AArch64::FABSSr;
- break;
- case MVT::f64:
- Opc = AArch64::FABSDr;
- break;
- }
- Register SrcReg = getRegForValue(II->getOperand(0));
- if (!SrcReg)
- return false;
- Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
- .addReg(SrcReg);
- updateValueMap(II, ResultReg);
- return true;
- }
- case Intrinsic::trap:
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
- .addImm(1);
- return true;
- case Intrinsic::debugtrap:
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
- .addImm(0xF000);
- return true;
- case Intrinsic::sqrt: {
- Type *RetTy = II->getCalledFunction()->getReturnType();
- MVT VT;
- if (!isTypeLegal(RetTy, VT))
- return false;
- Register Op0Reg = getRegForValue(II->getOperand(0));
- if (!Op0Reg)
- return false;
- unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
- if (!ResultReg)
- return false;
- updateValueMap(II, ResultReg);
- return true;
- }
- case Intrinsic::sadd_with_overflow:
- case Intrinsic::uadd_with_overflow:
- case Intrinsic::ssub_with_overflow:
- case Intrinsic::usub_with_overflow:
- case Intrinsic::smul_with_overflow:
- case Intrinsic::umul_with_overflow: {
- // This implements the basic lowering of the xalu with overflow intrinsics.
- const Function *Callee = II->getCalledFunction();
- auto *Ty = cast<StructType>(Callee->getReturnType());
- Type *RetTy = Ty->getTypeAtIndex(0U);
- MVT VT;
- if (!isTypeLegal(RetTy, VT))
- return false;
- if (VT != MVT::i32 && VT != MVT::i64)
- return false;
- const Value *LHS = II->getArgOperand(0);
- const Value *RHS = II->getArgOperand(1);
- // Canonicalize immediate to the RHS.
- if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
- std::swap(LHS, RHS);
- // Simplify multiplies.
- Intrinsic::ID IID = II->getIntrinsicID();
- switch (IID) {
- default:
- break;
- case Intrinsic::smul_with_overflow:
- if (const auto *C = dyn_cast<ConstantInt>(RHS))
- if (C->getValue() == 2) {
- IID = Intrinsic::sadd_with_overflow;
- RHS = LHS;
- }
- break;
- case Intrinsic::umul_with_overflow:
- if (const auto *C = dyn_cast<ConstantInt>(RHS))
- if (C->getValue() == 2) {
- IID = Intrinsic::uadd_with_overflow;
- RHS = LHS;
- }
- break;
- }
- unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
- AArch64CC::CondCode CC = AArch64CC::Invalid;
- switch (IID) {
- default: llvm_unreachable("Unexpected intrinsic!");
- case Intrinsic::sadd_with_overflow:
- ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
- CC = AArch64CC::VS;
- break;
- case Intrinsic::uadd_with_overflow:
- ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
- CC = AArch64CC::HS;
- break;
- case Intrinsic::ssub_with_overflow:
- ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
- CC = AArch64CC::VS;
- break;
- case Intrinsic::usub_with_overflow:
- ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
- CC = AArch64CC::LO;
- break;
- case Intrinsic::smul_with_overflow: {
- CC = AArch64CC::NE;
- Register LHSReg = getRegForValue(LHS);
- if (!LHSReg)
- return false;
- Register RHSReg = getRegForValue(RHS);
- if (!RHSReg)
- return false;
- if (VT == MVT::i32) {
- MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
- Register MulSubReg =
- fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
- // cmp xreg, wreg, sxtw
- emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
- AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
- /*WantResult=*/false);
- MulReg = MulSubReg;
- } else {
- assert(VT == MVT::i64 && "Unexpected value type.");
- // LHSReg and RHSReg cannot be killed by this Mul, since they are
- // reused in the next instruction.
- MulReg = emitMul_rr(VT, LHSReg, RHSReg);
- unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
- emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
- /*WantResult=*/false);
- }
- break;
- }
- case Intrinsic::umul_with_overflow: {
- CC = AArch64CC::NE;
- Register LHSReg = getRegForValue(LHS);
- if (!LHSReg)
- return false;
- Register RHSReg = getRegForValue(RHS);
- if (!RHSReg)
- return false;
- if (VT == MVT::i32) {
- MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
- // tst xreg, #0xffffffff00000000
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::ANDSXri), AArch64::XZR)
- .addReg(MulReg)
- .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
- MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
- } else {
- assert(VT == MVT::i64 && "Unexpected value type.");
- // LHSReg and RHSReg cannot be killed by this Mul, since they are
- // reused in the next instruction.
- MulReg = emitMul_rr(VT, LHSReg, RHSReg);
- unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
- emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
- }
- break;
- }
- }
- if (MulReg) {
- ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
- }
- if (!ResultReg1)
- return false;
- ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
- AArch64::WZR, AArch64::WZR,
- getInvertedCondCode(CC));
- (void)ResultReg2;
- assert((ResultReg1 + 1) == ResultReg2 &&
- "Nonconsecutive result registers.");
- updateValueMap(II, ResultReg1, 2);
- return true;
- }
- }
- return false;
- }
- bool AArch64FastISel::selectRet(const Instruction *I) {
- const ReturnInst *Ret = cast<ReturnInst>(I);
- const Function &F = *I->getParent()->getParent();
- if (!FuncInfo.CanLowerReturn)
- return false;
- if (F.isVarArg())
- return false;
- if (TLI.supportSwiftError() &&
- F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
- return false;
- if (TLI.supportSplitCSR(FuncInfo.MF))
- return false;
- // Build a list of return value registers.
- SmallVector<unsigned, 4> RetRegs;
- if (Ret->getNumOperands() > 0) {
- CallingConv::ID CC = F.getCallingConv();
- SmallVector<ISD::OutputArg, 4> Outs;
- GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
- // Analyze operands of the call, assigning locations to each operand.
- SmallVector<CCValAssign, 16> ValLocs;
- CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
- CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
- : RetCC_AArch64_AAPCS;
- CCInfo.AnalyzeReturn(Outs, RetCC);
- // Only handle a single return value for now.
- if (ValLocs.size() != 1)
- return false;
- CCValAssign &VA = ValLocs[0];
- const Value *RV = Ret->getOperand(0);
- // Don't bother handling odd stuff for now.
- if ((VA.getLocInfo() != CCValAssign::Full) &&
- (VA.getLocInfo() != CCValAssign::BCvt))
- return false;
- // Only handle register returns for now.
- if (!VA.isRegLoc())
- return false;
- Register Reg = getRegForValue(RV);
- if (Reg == 0)
- return false;
- unsigned SrcReg = Reg + VA.getValNo();
- Register DestReg = VA.getLocReg();
- // Avoid a cross-class copy. This is very unlikely.
- if (!MRI.getRegClass(SrcReg)->contains(DestReg))
- return false;
- EVT RVEVT = TLI.getValueType(DL, RV->getType());
- if (!RVEVT.isSimple())
- return false;
- // Vectors (of > 1 lane) in big endian need tricky handling.
- if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
- !Subtarget->isLittleEndian())
- return false;
- MVT RVVT = RVEVT.getSimpleVT();
- if (RVVT == MVT::f128)
- return false;
- MVT DestVT = VA.getValVT();
- // Special handling for extended integers.
- if (RVVT != DestVT) {
- if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
- return false;
- if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
- return false;
- bool IsZExt = Outs[0].Flags.isZExt();
- SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
- if (SrcReg == 0)
- return false;
- }
- // "Callee" (i.e. value producer) zero extends pointers at function
- // boundary.
- if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
- SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
- // Make the copy.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
- // Add register to return instruction.
- RetRegs.push_back(VA.getLocReg());
- }
- MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::RET_ReallyLR));
- for (unsigned RetReg : RetRegs)
- MIB.addReg(RetReg, RegState::Implicit);
- return true;
- }
- bool AArch64FastISel::selectTrunc(const Instruction *I) {
- Type *DestTy = I->getType();
- Value *Op = I->getOperand(0);
- Type *SrcTy = Op->getType();
- EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
- EVT DestEVT = TLI.getValueType(DL, DestTy, true);
- if (!SrcEVT.isSimple())
- return false;
- if (!DestEVT.isSimple())
- return false;
- MVT SrcVT = SrcEVT.getSimpleVT();
- MVT DestVT = DestEVT.getSimpleVT();
- if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
- SrcVT != MVT::i8)
- return false;
- if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
- DestVT != MVT::i1)
- return false;
- Register SrcReg = getRegForValue(Op);
- if (!SrcReg)
- return false;
- // If we're truncating from i64 to a smaller non-legal type then generate an
- // AND. Otherwise, we know the high bits are undefined and a truncate only
- // generate a COPY. We cannot mark the source register also as result
- // register, because this can incorrectly transfer the kill flag onto the
- // source register.
- unsigned ResultReg;
- if (SrcVT == MVT::i64) {
- uint64_t Mask = 0;
- switch (DestVT.SimpleTy) {
- default:
- // Trunc i64 to i32 is handled by the target-independent fast-isel.
- return false;
- case MVT::i1:
- Mask = 0x1;
- break;
- case MVT::i8:
- Mask = 0xff;
- break;
- case MVT::i16:
- Mask = 0xffff;
- break;
- }
- // Issue an extract_subreg to get the lower 32-bits.
- Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
- AArch64::sub_32);
- // Create the AND instruction which performs the actual truncation.
- ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
- assert(ResultReg && "Unexpected AND instruction emission failure.");
- } else {
- ResultReg = createResultReg(&AArch64::GPR32RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(SrcReg);
- }
- updateValueMap(I, ResultReg);
- return true;
- }
- unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
- assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
- DestVT == MVT::i64) &&
- "Unexpected value type.");
- // Handle i8 and i16 as i32.
- if (DestVT == MVT::i8 || DestVT == MVT::i16)
- DestVT = MVT::i32;
- if (IsZExt) {
- unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
- assert(ResultReg && "Unexpected AND instruction emission failure.");
- if (DestVT == MVT::i64) {
- // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
- // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
- Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::SUBREG_TO_REG), Reg64)
- .addImm(0)
- .addReg(ResultReg)
- .addImm(AArch64::sub_32);
- ResultReg = Reg64;
- }
- return ResultReg;
- } else {
- if (DestVT == MVT::i64) {
- // FIXME: We're SExt i1 to i64.
- return 0;
- }
- return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
- 0, 0);
- }
- }
- unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
- unsigned Opc, ZReg;
- switch (RetVT.SimpleTy) {
- default: return 0;
- case MVT::i8:
- case MVT::i16:
- case MVT::i32:
- RetVT = MVT::i32;
- Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
- case MVT::i64:
- Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
- }
- const TargetRegisterClass *RC =
- (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
- }
- unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
- if (RetVT != MVT::i64)
- return 0;
- return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
- Op0, Op1, AArch64::XZR);
- }
- unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
- if (RetVT != MVT::i64)
- return 0;
- return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
- Op0, Op1, AArch64::XZR);
- }
- unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
- unsigned Op1Reg) {
- unsigned Opc = 0;
- bool NeedTrunc = false;
- uint64_t Mask = 0;
- switch (RetVT.SimpleTy) {
- default: return 0;
- case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
- case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
- case MVT::i32: Opc = AArch64::LSLVWr; break;
- case MVT::i64: Opc = AArch64::LSLVXr; break;
- }
- const TargetRegisterClass *RC =
- (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- if (NeedTrunc)
- Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
- Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
- if (NeedTrunc)
- ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
- return ResultReg;
- }
- unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
- uint64_t Shift, bool IsZExt) {
- assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
- "Unexpected source/return type pair.");
- assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
- SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
- "Unexpected source value type.");
- assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
- RetVT == MVT::i64) && "Unexpected return value type.");
- bool Is64Bit = (RetVT == MVT::i64);
- unsigned RegSize = Is64Bit ? 64 : 32;
- unsigned DstBits = RetVT.getSizeInBits();
- unsigned SrcBits = SrcVT.getSizeInBits();
- const TargetRegisterClass *RC =
- Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- // Just emit a copy for "zero" shifts.
- if (Shift == 0) {
- if (RetVT == SrcVT) {
- Register ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(Op0);
- return ResultReg;
- } else
- return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
- }
- // Don't deal with undefined shifts.
- if (Shift >= DstBits)
- return 0;
- // For immediate shifts we can fold the zero-/sign-extension into the shift.
- // {S|U}BFM Wd, Wn, #r, #s
- // Wd<32+s-r,32-r> = Wn<s:0> when r > s
- // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
- // %2 = shl i16 %1, 4
- // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
- // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
- // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
- // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
- // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
- // %2 = shl i16 %1, 8
- // Wd<32+7-24,32-24> = Wn<7:0>
- // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
- // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
- // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
- // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
- // %2 = shl i16 %1, 12
- // Wd<32+3-20,32-20> = Wn<3:0>
- // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
- // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
- // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
- unsigned ImmR = RegSize - Shift;
- // Limit the width to the length of the source type.
- unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
- static const unsigned OpcTable[2][2] = {
- {AArch64::SBFMWri, AArch64::SBFMXri},
- {AArch64::UBFMWri, AArch64::UBFMXri}
- };
- unsigned Opc = OpcTable[IsZExt][Is64Bit];
- if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
- Register TmpReg = MRI.createVirtualRegister(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::SUBREG_TO_REG), TmpReg)
- .addImm(0)
- .addReg(Op0)
- .addImm(AArch64::sub_32);
- Op0 = TmpReg;
- }
- return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
- }
- unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
- unsigned Op1Reg) {
- unsigned Opc = 0;
- bool NeedTrunc = false;
- uint64_t Mask = 0;
- switch (RetVT.SimpleTy) {
- default: return 0;
- case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
- case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
- case MVT::i32: Opc = AArch64::LSRVWr; break;
- case MVT::i64: Opc = AArch64::LSRVXr; break;
- }
- const TargetRegisterClass *RC =
- (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- if (NeedTrunc) {
- Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
- Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
- }
- Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
- if (NeedTrunc)
- ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
- return ResultReg;
- }
- unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
- uint64_t Shift, bool IsZExt) {
- assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
- "Unexpected source/return type pair.");
- assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
- SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
- "Unexpected source value type.");
- assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
- RetVT == MVT::i64) && "Unexpected return value type.");
- bool Is64Bit = (RetVT == MVT::i64);
- unsigned RegSize = Is64Bit ? 64 : 32;
- unsigned DstBits = RetVT.getSizeInBits();
- unsigned SrcBits = SrcVT.getSizeInBits();
- const TargetRegisterClass *RC =
- Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- // Just emit a copy for "zero" shifts.
- if (Shift == 0) {
- if (RetVT == SrcVT) {
- Register ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(Op0);
- return ResultReg;
- } else
- return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
- }
- // Don't deal with undefined shifts.
- if (Shift >= DstBits)
- return 0;
- // For immediate shifts we can fold the zero-/sign-extension into the shift.
- // {S|U}BFM Wd, Wn, #r, #s
- // Wd<s-r:0> = Wn<s:r> when r <= s
- // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
- // %2 = lshr i16 %1, 4
- // Wd<7-4:0> = Wn<7:4>
- // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
- // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
- // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
- // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
- // %2 = lshr i16 %1, 8
- // Wd<7-7,0> = Wn<7:7>
- // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
- // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
- // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
- // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
- // %2 = lshr i16 %1, 12
- // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
- // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
- // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
- // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
- if (Shift >= SrcBits && IsZExt)
- return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
- // It is not possible to fold a sign-extend into the LShr instruction. In this
- // case emit a sign-extend.
- if (!IsZExt) {
- Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
- if (!Op0)
- return 0;
- SrcVT = RetVT;
- SrcBits = SrcVT.getSizeInBits();
- IsZExt = true;
- }
- unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
- unsigned ImmS = SrcBits - 1;
- static const unsigned OpcTable[2][2] = {
- {AArch64::SBFMWri, AArch64::SBFMXri},
- {AArch64::UBFMWri, AArch64::UBFMXri}
- };
- unsigned Opc = OpcTable[IsZExt][Is64Bit];
- if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
- Register TmpReg = MRI.createVirtualRegister(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::SUBREG_TO_REG), TmpReg)
- .addImm(0)
- .addReg(Op0)
- .addImm(AArch64::sub_32);
- Op0 = TmpReg;
- }
- return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
- }
- unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
- unsigned Op1Reg) {
- unsigned Opc = 0;
- bool NeedTrunc = false;
- uint64_t Mask = 0;
- switch (RetVT.SimpleTy) {
- default: return 0;
- case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
- case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
- case MVT::i32: Opc = AArch64::ASRVWr; break;
- case MVT::i64: Opc = AArch64::ASRVXr; break;
- }
- const TargetRegisterClass *RC =
- (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- if (NeedTrunc) {
- Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
- Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
- }
- Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
- if (NeedTrunc)
- ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
- return ResultReg;
- }
- unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
- uint64_t Shift, bool IsZExt) {
- assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
- "Unexpected source/return type pair.");
- assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
- SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
- "Unexpected source value type.");
- assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
- RetVT == MVT::i64) && "Unexpected return value type.");
- bool Is64Bit = (RetVT == MVT::i64);
- unsigned RegSize = Is64Bit ? 64 : 32;
- unsigned DstBits = RetVT.getSizeInBits();
- unsigned SrcBits = SrcVT.getSizeInBits();
- const TargetRegisterClass *RC =
- Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- // Just emit a copy for "zero" shifts.
- if (Shift == 0) {
- if (RetVT == SrcVT) {
- Register ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::COPY), ResultReg)
- .addReg(Op0);
- return ResultReg;
- } else
- return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
- }
- // Don't deal with undefined shifts.
- if (Shift >= DstBits)
- return 0;
- // For immediate shifts we can fold the zero-/sign-extension into the shift.
- // {S|U}BFM Wd, Wn, #r, #s
- // Wd<s-r:0> = Wn<s:r> when r <= s
- // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
- // %2 = ashr i16 %1, 4
- // Wd<7-4:0> = Wn<7:4>
- // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
- // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
- // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
- // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
- // %2 = ashr i16 %1, 8
- // Wd<7-7,0> = Wn<7:7>
- // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
- // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
- // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
- // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
- // %2 = ashr i16 %1, 12
- // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
- // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
- // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
- // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
- if (Shift >= SrcBits && IsZExt)
- return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
- unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
- unsigned ImmS = SrcBits - 1;
- static const unsigned OpcTable[2][2] = {
- {AArch64::SBFMWri, AArch64::SBFMXri},
- {AArch64::UBFMWri, AArch64::UBFMXri}
- };
- unsigned Opc = OpcTable[IsZExt][Is64Bit];
- if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
- Register TmpReg = MRI.createVirtualRegister(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::SUBREG_TO_REG), TmpReg)
- .addImm(0)
- .addReg(Op0)
- .addImm(AArch64::sub_32);
- Op0 = TmpReg;
- }
- return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
- }
- unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
- bool IsZExt) {
- assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
- // FastISel does not have plumbing to deal with extensions where the SrcVT or
- // DestVT are odd things, so test to make sure that they are both types we can
- // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
- // bail out to SelectionDAG.
- if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
- (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
- ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
- (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
- return 0;
- unsigned Opc;
- unsigned Imm = 0;
- switch (SrcVT.SimpleTy) {
- default:
- return 0;
- case MVT::i1:
- return emiti1Ext(SrcReg, DestVT, IsZExt);
- case MVT::i8:
- if (DestVT == MVT::i64)
- Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
- else
- Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
- Imm = 7;
- break;
- case MVT::i16:
- if (DestVT == MVT::i64)
- Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
- else
- Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
- Imm = 15;
- break;
- case MVT::i32:
- assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
- Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
- Imm = 31;
- break;
- }
- // Handle i8 and i16 as i32.
- if (DestVT == MVT::i8 || DestVT == MVT::i16)
- DestVT = MVT::i32;
- else if (DestVT == MVT::i64) {
- Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::SUBREG_TO_REG), Src64)
- .addImm(0)
- .addReg(SrcReg)
- .addImm(AArch64::sub_32);
- SrcReg = Src64;
- }
- const TargetRegisterClass *RC =
- (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
- }
- static bool isZExtLoad(const MachineInstr *LI) {
- switch (LI->getOpcode()) {
- default:
- return false;
- case AArch64::LDURBBi:
- case AArch64::LDURHHi:
- case AArch64::LDURWi:
- case AArch64::LDRBBui:
- case AArch64::LDRHHui:
- case AArch64::LDRWui:
- case AArch64::LDRBBroX:
- case AArch64::LDRHHroX:
- case AArch64::LDRWroX:
- case AArch64::LDRBBroW:
- case AArch64::LDRHHroW:
- case AArch64::LDRWroW:
- return true;
- }
- }
- static bool isSExtLoad(const MachineInstr *LI) {
- switch (LI->getOpcode()) {
- default:
- return false;
- case AArch64::LDURSBWi:
- case AArch64::LDURSHWi:
- case AArch64::LDURSBXi:
- case AArch64::LDURSHXi:
- case AArch64::LDURSWi:
- case AArch64::LDRSBWui:
- case AArch64::LDRSHWui:
- case AArch64::LDRSBXui:
- case AArch64::LDRSHXui:
- case AArch64::LDRSWui:
- case AArch64::LDRSBWroX:
- case AArch64::LDRSHWroX:
- case AArch64::LDRSBXroX:
- case AArch64::LDRSHXroX:
- case AArch64::LDRSWroX:
- case AArch64::LDRSBWroW:
- case AArch64::LDRSHWroW:
- case AArch64::LDRSBXroW:
- case AArch64::LDRSHXroW:
- case AArch64::LDRSWroW:
- return true;
- }
- }
- bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
- MVT SrcVT) {
- const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
- if (!LI || !LI->hasOneUse())
- return false;
- // Check if the load instruction has already been selected.
- Register Reg = lookUpRegForValue(LI);
- if (!Reg)
- return false;
- MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
- if (!MI)
- return false;
- // Check if the correct load instruction has been emitted - SelectionDAG might
- // have emitted a zero-extending load, but we need a sign-extending load.
- bool IsZExt = isa<ZExtInst>(I);
- const auto *LoadMI = MI;
- if (LoadMI->getOpcode() == TargetOpcode::COPY &&
- LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
- Register LoadReg = MI->getOperand(1).getReg();
- LoadMI = MRI.getUniqueVRegDef(LoadReg);
- assert(LoadMI && "Expected valid instruction");
- }
- if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
- return false;
- // Nothing to be done.
- if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
- updateValueMap(I, Reg);
- return true;
- }
- if (IsZExt) {
- Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::SUBREG_TO_REG), Reg64)
- .addImm(0)
- .addReg(Reg, getKillRegState(true))
- .addImm(AArch64::sub_32);
- Reg = Reg64;
- } else {
- assert((MI->getOpcode() == TargetOpcode::COPY &&
- MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
- "Expected copy instruction");
- Reg = MI->getOperand(1).getReg();
- MachineBasicBlock::iterator I(MI);
- removeDeadCode(I, std::next(I));
- }
- updateValueMap(I, Reg);
- return true;
- }
- bool AArch64FastISel::selectIntExt(const Instruction *I) {
- assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
- "Unexpected integer extend instruction.");
- MVT RetVT;
- MVT SrcVT;
- if (!isTypeSupported(I->getType(), RetVT))
- return false;
- if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
- return false;
- // Try to optimize already sign-/zero-extended values from load instructions.
- if (optimizeIntExtLoad(I, RetVT, SrcVT))
- return true;
- Register SrcReg = getRegForValue(I->getOperand(0));
- if (!SrcReg)
- return false;
- // Try to optimize already sign-/zero-extended values from function arguments.
- bool IsZExt = isa<ZExtInst>(I);
- if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
- if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
- if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
- Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(AArch64::SUBREG_TO_REG), ResultReg)
- .addImm(0)
- .addReg(SrcReg)
- .addImm(AArch64::sub_32);
- SrcReg = ResultReg;
- }
- updateValueMap(I, SrcReg);
- return true;
- }
- }
- unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
- if (!ResultReg)
- return false;
- updateValueMap(I, ResultReg);
- return true;
- }
- bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
- EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
- if (!DestEVT.isSimple())
- return false;
- MVT DestVT = DestEVT.getSimpleVT();
- if (DestVT != MVT::i64 && DestVT != MVT::i32)
- return false;
- unsigned DivOpc;
- bool Is64bit = (DestVT == MVT::i64);
- switch (ISDOpcode) {
- default:
- return false;
- case ISD::SREM:
- DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
- break;
- case ISD::UREM:
- DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
- break;
- }
- unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
- Register Src0Reg = getRegForValue(I->getOperand(0));
- if (!Src0Reg)
- return false;
- Register Src1Reg = getRegForValue(I->getOperand(1));
- if (!Src1Reg)
- return false;
- const TargetRegisterClass *RC =
- (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
- Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
- assert(QuotReg && "Unexpected DIV instruction emission failure.");
- // The remainder is computed as numerator - (quotient * denominator) using the
- // MSUB instruction.
- Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
- updateValueMap(I, ResultReg);
- return true;
- }
- bool AArch64FastISel::selectMul(const Instruction *I) {
- MVT VT;
- if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
- return false;
- if (VT.isVector())
- return selectBinaryOp(I, ISD::MUL);
- const Value *Src0 = I->getOperand(0);
- const Value *Src1 = I->getOperand(1);
- if (const auto *C = dyn_cast<ConstantInt>(Src0))
- if (C->getValue().isPowerOf2())
- std::swap(Src0, Src1);
- // Try to simplify to a shift instruction.
- if (const auto *C = dyn_cast<ConstantInt>(Src1))
- if (C->getValue().isPowerOf2()) {
- uint64_t ShiftVal = C->getValue().logBase2();
- MVT SrcVT = VT;
- bool IsZExt = true;
- if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
- if (!isIntExtFree(ZExt)) {
- MVT VT;
- if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
- SrcVT = VT;
- IsZExt = true;
- Src0 = ZExt->getOperand(0);
- }
- }
- } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
- if (!isIntExtFree(SExt)) {
- MVT VT;
- if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
- SrcVT = VT;
- IsZExt = false;
- Src0 = SExt->getOperand(0);
- }
- }
- }
- Register Src0Reg = getRegForValue(Src0);
- if (!Src0Reg)
- return false;
- unsigned ResultReg =
- emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
- if (ResultReg) {
- updateValueMap(I, ResultReg);
- return true;
- }
- }
- Register Src0Reg = getRegForValue(I->getOperand(0));
- if (!Src0Reg)
- return false;
- Register Src1Reg = getRegForValue(I->getOperand(1));
- if (!Src1Reg)
- return false;
- unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
- if (!ResultReg)
- return false;
- updateValueMap(I, ResultReg);
- return true;
- }
- bool AArch64FastISel::selectShift(const Instruction *I) {
- MVT RetVT;
- if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
- return false;
- if (RetVT.isVector())
- return selectOperator(I, I->getOpcode());
- if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
- unsigned ResultReg = 0;
- uint64_t ShiftVal = C->getZExtValue();
- MVT SrcVT = RetVT;
- bool IsZExt = I->getOpcode() != Instruction::AShr;
- const Value *Op0 = I->getOperand(0);
- if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
- if (!isIntExtFree(ZExt)) {
- MVT TmpVT;
- if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
- SrcVT = TmpVT;
- IsZExt = true;
- Op0 = ZExt->getOperand(0);
- }
- }
- } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
- if (!isIntExtFree(SExt)) {
- MVT TmpVT;
- if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
- SrcVT = TmpVT;
- IsZExt = false;
- Op0 = SExt->getOperand(0);
- }
- }
- }
- Register Op0Reg = getRegForValue(Op0);
- if (!Op0Reg)
- return false;
- switch (I->getOpcode()) {
- default: llvm_unreachable("Unexpected instruction.");
- case Instruction::Shl:
- ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
- break;
- case Instruction::AShr:
- ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
- break;
- case Instruction::LShr:
- ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
- break;
- }
- if (!ResultReg)
- return false;
- updateValueMap(I, ResultReg);
- return true;
- }
- Register Op0Reg = getRegForValue(I->getOperand(0));
- if (!Op0Reg)
- return false;
- Register Op1Reg = getRegForValue(I->getOperand(1));
- if (!Op1Reg)
- return false;
- unsigned ResultReg = 0;
- switch (I->getOpcode()) {
- default: llvm_unreachable("Unexpected instruction.");
- case Instruction::Shl:
- ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
- break;
- case Instruction::AShr:
- ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
- break;
- case Instruction::LShr:
- ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
- break;
- }
- if (!ResultReg)
- return false;
- updateValueMap(I, ResultReg);
- return true;
- }
- bool AArch64FastISel::selectBitCast(const Instruction *I) {
- MVT RetVT, SrcVT;
- if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
- return false;
- if (!isTypeLegal(I->getType(), RetVT))
- return false;
- unsigned Opc;
- if (RetVT == MVT::f32 && SrcVT == MVT::i32)
- Opc = AArch64::FMOVWSr;
- else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
- Opc = AArch64::FMOVXDr;
- else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
- Opc = AArch64::FMOVSWr;
- else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
- Opc = AArch64::FMOVDXr;
- else
- return false;
- const TargetRegisterClass *RC = nullptr;
- switch (RetVT.SimpleTy) {
- default: llvm_unreachable("Unexpected value type.");
- case MVT::i32: RC = &AArch64::GPR32RegClass; break;
- case MVT::i64: RC = &AArch64::GPR64RegClass; break;
- case MVT::f32: RC = &AArch64::FPR32RegClass; break;
- case MVT::f64: RC = &AArch64::FPR64RegClass; break;
- }
- Register Op0Reg = getRegForValue(I->getOperand(0));
- if (!Op0Reg)
- return false;
- Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
- if (!ResultReg)
- return false;
- updateValueMap(I, ResultReg);
- return true;
- }
- bool AArch64FastISel::selectFRem(const Instruction *I) {
- MVT RetVT;
- if (!isTypeLegal(I->getType(), RetVT))
- return false;
- RTLIB::Libcall LC;
- switch (RetVT.SimpleTy) {
- default:
- return false;
- case MVT::f32:
- LC = RTLIB::REM_F32;
- break;
- case MVT::f64:
- LC = RTLIB::REM_F64;
- break;
- }
- ArgListTy Args;
- Args.reserve(I->getNumOperands());
- // Populate the argument list.
- for (auto &Arg : I->operands()) {
- ArgListEntry Entry;
- Entry.Val = Arg;
- Entry.Ty = Arg->getType();
- Args.push_back(Entry);
- }
- CallLoweringInfo CLI;
- MCContext &Ctx = MF->getContext();
- CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
- TLI.getLibcallName(LC), std::move(Args));
- if (!lowerCallTo(CLI))
- return false;
- updateValueMap(I, CLI.ResultReg);
- return true;
- }
- bool AArch64FastISel::selectSDiv(const Instruction *I) {
- MVT VT;
- if (!isTypeLegal(I->getType(), VT))
- return false;
- if (!isa<ConstantInt>(I->getOperand(1)))
- return selectBinaryOp(I, ISD::SDIV);
- const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
- if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
- !(C.isPowerOf2() || C.isNegatedPowerOf2()))
- return selectBinaryOp(I, ISD::SDIV);
- unsigned Lg2 = C.countTrailingZeros();
- Register Src0Reg = getRegForValue(I->getOperand(0));
- if (!Src0Reg)
- return false;
- if (cast<BinaryOperator>(I)->isExact()) {
- unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
- if (!ResultReg)
- return false;
- updateValueMap(I, ResultReg);
- return true;
- }
- int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
- unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
- if (!AddReg)
- return false;
- // (Src0 < 0) ? Pow2 - 1 : 0;
- if (!emitICmp_ri(VT, Src0Reg, 0))
- return false;
- unsigned SelectOpc;
- const TargetRegisterClass *RC;
- if (VT == MVT::i64) {
- SelectOpc = AArch64::CSELXr;
- RC = &AArch64::GPR64RegClass;
- } else {
- SelectOpc = AArch64::CSELWr;
- RC = &AArch64::GPR32RegClass;
- }
- Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
- AArch64CC::LT);
- if (!SelectReg)
- return false;
- // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
- // negate the result.
- unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
- unsigned ResultReg;
- if (C.isNegative())
- ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
- AArch64_AM::ASR, Lg2);
- else
- ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
- if (!ResultReg)
- return false;
- updateValueMap(I, ResultReg);
- return true;
- }
- /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
- /// have to duplicate it for AArch64, because otherwise we would fail during the
- /// sign-extend emission.
- unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
- Register IdxN = getRegForValue(Idx);
- if (IdxN == 0)
- // Unhandled operand. Halt "fast" selection and bail.
- return 0;
- // If the index is smaller or larger than intptr_t, truncate or extend it.
- MVT PtrVT = TLI.getPointerTy(DL);
- EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
- if (IdxVT.bitsLT(PtrVT)) {
- IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
- } else if (IdxVT.bitsGT(PtrVT))
- llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
- return IdxN;
- }
- /// This is mostly a copy of the existing FastISel GEP code, but we have to
- /// duplicate it for AArch64, because otherwise we would bail out even for
- /// simple cases. This is because the standard fastEmit functions don't cover
- /// MUL at all and ADD is lowered very inefficientily.
- bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
- if (Subtarget->isTargetILP32())
- return false;
- Register N = getRegForValue(I->getOperand(0));
- if (!N)
- return false;
- // Keep a running tab of the total offset to coalesce multiple N = N + Offset
- // into a single N = N + TotalOffset.
- uint64_t TotalOffs = 0;
- MVT VT = TLI.getPointerTy(DL);
- for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
- GTI != E; ++GTI) {
- const Value *Idx = GTI.getOperand();
- if (auto *StTy = GTI.getStructTypeOrNull()) {
- unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
- // N = N + Offset
- if (Field)
- TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
- } else {
- Type *Ty = GTI.getIndexedType();
- // If this is a constant subscript, handle it quickly.
- if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
- if (CI->isZero())
- continue;
- // N = N + Offset
- TotalOffs +=
- DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
- continue;
- }
- if (TotalOffs) {
- N = emitAdd_ri_(VT, N, TotalOffs);
- if (!N)
- return false;
- TotalOffs = 0;
- }
- // N = N + Idx * ElementSize;
- uint64_t ElementSize = DL.getTypeAllocSize(Ty);
- unsigned IdxN = getRegForGEPIndex(Idx);
- if (!IdxN)
- return false;
- if (ElementSize != 1) {
- unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
- if (!C)
- return false;
- IdxN = emitMul_rr(VT, IdxN, C);
- if (!IdxN)
- return false;
- }
- N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
- if (!N)
- return false;
- }
- }
- if (TotalOffs) {
- N = emitAdd_ri_(VT, N, TotalOffs);
- if (!N)
- return false;
- }
- updateValueMap(I, N);
- return true;
- }
- bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
- assert(TM.getOptLevel() == CodeGenOpt::None &&
- "cmpxchg survived AtomicExpand at optlevel > -O0");
- auto *RetPairTy = cast<StructType>(I->getType());
- Type *RetTy = RetPairTy->getTypeAtIndex(0U);
- assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
- "cmpxchg has a non-i1 status result");
- MVT VT;
- if (!isTypeLegal(RetTy, VT))
- return false;
- const TargetRegisterClass *ResRC;
- unsigned Opc, CmpOpc;
- // This only supports i32/i64, because i8/i16 aren't legal, and the generic
- // extractvalue selection doesn't support that.
- if (VT == MVT::i32) {
- Opc = AArch64::CMP_SWAP_32;
- CmpOpc = AArch64::SUBSWrs;
- ResRC = &AArch64::GPR32RegClass;
- } else if (VT == MVT::i64) {
- Opc = AArch64::CMP_SWAP_64;
- CmpOpc = AArch64::SUBSXrs;
- ResRC = &AArch64::GPR64RegClass;
- } else {
- return false;
- }
- const MCInstrDesc &II = TII.get(Opc);
- const Register AddrReg = constrainOperandRegClass(
- II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
- const Register DesiredReg = constrainOperandRegClass(
- II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
- const Register NewReg = constrainOperandRegClass(
- II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
- const Register ResultReg1 = createResultReg(ResRC);
- const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
- const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
- // FIXME: MachineMemOperand doesn't support cmpxchg yet.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addDef(ResultReg1)
- .addDef(ScratchReg)
- .addUse(AddrReg)
- .addUse(DesiredReg)
- .addUse(NewReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
- .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
- .addUse(ResultReg1)
- .addUse(DesiredReg)
- .addImm(0);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
- .addDef(ResultReg2)
- .addUse(AArch64::WZR)
- .addUse(AArch64::WZR)
- .addImm(AArch64CC::NE);
- assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
- updateValueMap(I, ResultReg1, 2);
- return true;
- }
- bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
- switch (I->getOpcode()) {
- default:
- break;
- case Instruction::Add:
- case Instruction::Sub:
- return selectAddSub(I);
- case Instruction::Mul:
- return selectMul(I);
- case Instruction::SDiv:
- return selectSDiv(I);
- case Instruction::SRem:
- if (!selectBinaryOp(I, ISD::SREM))
- return selectRem(I, ISD::SREM);
- return true;
- case Instruction::URem:
- if (!selectBinaryOp(I, ISD::UREM))
- return selectRem(I, ISD::UREM);
- return true;
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- return selectShift(I);
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- return selectLogicalOp(I);
- case Instruction::Br:
- return selectBranch(I);
- case Instruction::IndirectBr:
- return selectIndirectBr(I);
- case Instruction::BitCast:
- if (!FastISel::selectBitCast(I))
- return selectBitCast(I);
- return true;
- case Instruction::FPToSI:
- if (!selectCast(I, ISD::FP_TO_SINT))
- return selectFPToInt(I, /*Signed=*/true);
- return true;
- case Instruction::FPToUI:
- return selectFPToInt(I, /*Signed=*/false);
- case Instruction::ZExt:
- case Instruction::SExt:
- return selectIntExt(I);
- case Instruction::Trunc:
- if (!selectCast(I, ISD::TRUNCATE))
- return selectTrunc(I);
- return true;
- case Instruction::FPExt:
- return selectFPExt(I);
- case Instruction::FPTrunc:
- return selectFPTrunc(I);
- case Instruction::SIToFP:
- if (!selectCast(I, ISD::SINT_TO_FP))
- return selectIntToFP(I, /*Signed=*/true);
- return true;
- case Instruction::UIToFP:
- return selectIntToFP(I, /*Signed=*/false);
- case Instruction::Load:
- return selectLoad(I);
- case Instruction::Store:
- return selectStore(I);
- case Instruction::FCmp:
- case Instruction::ICmp:
- return selectCmp(I);
- case Instruction::Select:
- return selectSelect(I);
- case Instruction::Ret:
- return selectRet(I);
- case Instruction::FRem:
- return selectFRem(I);
- case Instruction::GetElementPtr:
- return selectGetElementPtr(I);
- case Instruction::AtomicCmpXchg:
- return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
- }
- // fall-back to target-independent instruction selection.
- return selectOperator(I, I->getOpcode());
- }
- FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
- const TargetLibraryInfo *LibInfo) {
- return new AArch64FastISel(FuncInfo, LibInfo);
- }
|