//===- ForwardOpTree.h ------------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Move instructions between statements. // //===----------------------------------------------------------------------===// #include "polly/ForwardOpTree.h" #include "polly/Options.h" #include "polly/ScopBuilder.h" #include "polly/ScopInfo.h" #include "polly/ScopPass.h" #include "polly/Support/GICHelper.h" #include "polly/Support/ISLOStream.h" #include "polly/Support/ISLTools.h" #include "polly/Support/VirtualInstruction.h" #include "polly/ZoneAlgo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Value.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "isl/ctx.h" #include "isl/isl-noexceptions.h" #include #include #define DEBUG_TYPE "polly-optree" using namespace llvm; using namespace polly; static cl::opt AnalyzeKnown("polly-optree-analyze-known", cl::desc("Analyze array contents for load forwarding"), cl::cat(PollyCategory), cl::init(true), cl::Hidden); static cl::opt NormalizePHIs("polly-optree-normalize-phi", cl::desc("Replace PHIs by their incoming values"), cl::cat(PollyCategory), cl::init(false), cl::Hidden); static cl::opt MaxOps("polly-optree-max-ops", cl::desc("Maximum number of ISL operations to invest for known " "analysis; 0=no limit"), cl::init(1000000), cl::cat(PollyCategory), cl::Hidden); STATISTIC(KnownAnalyzed, "Number of successfully analyzed SCoPs"); STATISTIC(KnownOutOfQuota, "Analyses aborted because max_operations was reached"); STATISTIC(TotalInstructionsCopied, "Number of copied instructions"); STATISTIC(TotalKnownLoadsForwarded, "Number of forwarded loads because their value was known"); STATISTIC(TotalReloads, "Number of reloaded values"); STATISTIC(TotalReadOnlyCopied, "Number of copied read-only accesses"); STATISTIC(TotalForwardedTrees, "Number of forwarded operand trees"); STATISTIC(TotalModifiedStmts, "Number of statements with at least one forwarded tree"); STATISTIC(ScopsModified, "Number of SCoPs with at least one forwarded tree"); STATISTIC(NumValueWrites, "Number of scalar value writes after OpTree"); STATISTIC(NumValueWritesInLoops, "Number of scalar value writes nested in affine loops after OpTree"); STATISTIC(NumPHIWrites, "Number of scalar phi writes after OpTree"); STATISTIC(NumPHIWritesInLoops, "Number of scalar phi writes nested in affine loops after OpTree"); STATISTIC(NumSingletonWrites, "Number of singleton writes after OpTree"); STATISTIC(NumSingletonWritesInLoops, "Number of singleton writes nested in affine loops after OpTree"); namespace { /// The state of whether an operand tree was/can be forwarded. /// /// The items apply to an instructions and its operand tree with the instruction /// as the root element. If the value in question is not an instruction in the /// SCoP, it can be a leaf of an instruction's operand tree. enum ForwardingDecision { /// An uninitialized value. FD_Unknown, /// The root instruction or value cannot be forwarded at all. FD_CannotForward, /// The root instruction or value can be forwarded as a leaf of a larger /// operand tree. /// It does not make sense to move the value itself, it would just replace it /// by a use of itself. For instance, a constant "5" used in a statement can /// be forwarded, but it would just replace it by the same constant "5". /// However, it makes sense to move as an operand of /// /// %add = add 5, 5 /// /// where "5" is moved as part of a larger operand tree. "5" would be placed /// (disregarding for a moment that literal constants don't have a location /// and can be used anywhere) into the same statement as %add would. FD_CanForwardLeaf, /// The root instruction can be forwarded and doing so avoids a scalar /// dependency. /// /// This can be either because the operand tree can be moved to the target /// statement, or a memory access is redirected to read from a different /// location. FD_CanForwardProfitably, /// A forwarding method cannot be applied to the operand tree. /// The difference to FD_CannotForward is that there might be other methods /// that can handle it. FD_NotApplicable }; /// Represents the evaluation of and action to taken when forwarding a value /// from an operand tree. struct ForwardingAction { using KeyTy = std::pair; /// Evaluation of forwarding a value. ForwardingDecision Decision = FD_Unknown; /// Callback to execute the forwarding. /// Returning true allows deleting the polly::MemoryAccess if the value is the /// root of the operand tree (and its elimination the reason why the /// forwarding is done). Return false if the MemoryAccess is reused or there /// might be other users of the read accesses. In the letter case the /// polly::SimplifyPass can remove dead MemoryAccesses. std::function Execute = []() -> bool { llvm_unreachable("unspecified how to forward"); }; /// Other values that need to be forwarded if this action is executed. Their /// actions are executed after this one. SmallVector Depends; /// Named ctor: The method creating this object does not apply to the kind of /// value, but other methods may. static ForwardingAction notApplicable() { ForwardingAction Result; Result.Decision = FD_NotApplicable; return Result; } /// Named ctor: The value cannot be forwarded. static ForwardingAction cannotForward() { ForwardingAction Result; Result.Decision = FD_CannotForward; return Result; } /// Named ctor: The value can just be used without any preparation. static ForwardingAction triviallyForwardable(bool IsProfitable, Value *Val) { ForwardingAction Result; Result.Decision = IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf; Result.Execute = [=]() { LLVM_DEBUG(dbgs() << " trivially forwarded: " << *Val << "\n"); return true; }; return Result; } /// Name ctor: The value can be forwarded by executing an action. static ForwardingAction canForward(std::function Execute, ArrayRef Depends, bool IsProfitable) { ForwardingAction Result; Result.Decision = IsProfitable ? FD_CanForwardProfitably : FD_CanForwardLeaf; Result.Execute = std::move(Execute); Result.Depends.append(Depends.begin(), Depends.end()); return Result; } }; /// Implementation of operand tree forwarding for a specific SCoP. /// /// For a statement that requires a scalar value (through a value read /// MemoryAccess), see if its operand can be moved into the statement. If so, /// the MemoryAccess is removed and the all the operand tree instructions are /// moved into the statement. All original instructions are left in the source /// statements. The simplification pass can clean these up. class ForwardOpTreeImpl : ZoneAlgorithm { private: using MemoizationTy = DenseMap; /// Scope guard to limit the number of isl operations for this pass. IslMaxOperationsGuard &MaxOpGuard; /// How many instructions have been copied to other statements. int NumInstructionsCopied = 0; /// Number of loads forwarded because their value was known. int NumKnownLoadsForwarded = 0; /// Number of values reloaded from known array elements. int NumReloads = 0; /// How many read-only accesses have been copied. int NumReadOnlyCopied = 0; /// How many operand trees have been forwarded. int NumForwardedTrees = 0; /// Number of statements with at least one forwarded operand tree. int NumModifiedStmts = 0; /// Whether we carried out at least one change to the SCoP. bool Modified = false; /// Cache of how to forward values. /// The key of this map is the llvm::Value to be forwarded and the /// polly::ScopStmt it is forwarded from. This is because the same llvm::Value /// can evaluate differently depending on where it is evaluate. For instance, /// a synthesizable Scev represents a recurrence with an loop but the loop's /// exit value if evaluated after the loop. /// The cached results are only valid for the current TargetStmt. /// CHECKME: ScalarEvolution::getScevAtScope should take care for getting the /// exit value when instantiated outside of the loop. The primary concern is /// ambiguity when crossing PHI nodes, which currently is not supported. MemoizationTy ForwardingActions; /// Contains the zones where array elements are known to contain a specific /// value. /// { [Element[] -> Zone[]] -> ValInst[] } /// @see computeKnown() isl::union_map Known; /// Translator for newly introduced ValInsts to already existing ValInsts such /// that new introduced load instructions can reuse the Known analysis of its /// original load. { ValInst[] -> ValInst[] } isl::union_map Translator; /// Get list of array elements that do contain the same ValInst[] at Domain[]. /// /// @param ValInst { Domain[] -> ValInst[] } /// The values for which we search for alternative locations, /// per statement instance. /// /// @return { Domain[] -> Element[] } /// For each statement instance, the array elements that contain the /// same ValInst. isl::union_map findSameContentElements(isl::union_map ValInst) { assert(!ValInst.is_single_valued().is_false()); // { Domain[] } isl::union_set Domain = ValInst.domain(); // { Domain[] -> Scatter[] } isl::union_map Schedule = getScatterFor(Domain); // { Element[] -> [Scatter[] -> ValInst[]] } isl::union_map MustKnownCurried = convertZoneToTimepoints(Known, isl::dim::in, false, true).curry(); // { [Domain[] -> ValInst[]] -> Scatter[] } isl::union_map DomValSched = ValInst.domain_map().apply_range(Schedule); // { [Scatter[] -> ValInst[]] -> [Domain[] -> ValInst[]] } isl::union_map SchedValDomVal = DomValSched.range_product(ValInst.range_map()).reverse(); // { Element[] -> [Domain[] -> ValInst[]] } isl::union_map MustKnownInst = MustKnownCurried.apply_range(SchedValDomVal); // { Domain[] -> Element[] } isl::union_map MustKnownMap = MustKnownInst.uncurry().domain().unwrap().reverse(); simplify(MustKnownMap); return MustKnownMap; } /// Find a single array element for each statement instance, within a single /// array. /// /// @param MustKnown { Domain[] -> Element[] } /// Set of candidate array elements. /// @param Domain { Domain[] } /// The statement instance for which we need elements for. /// /// @return { Domain[] -> Element[] } /// For each statement instance, an array element out of @p MustKnown. /// All array elements must be in the same array (Polly does not yet /// support reading from different accesses using the same /// MemoryAccess). If no mapping for all of @p Domain exists, returns /// null. isl::map singleLocation(isl::union_map MustKnown, isl::set Domain) { // { Domain[] -> Element[] } isl::map Result; // Make irrelevant elements not interfere. Domain = Domain.intersect_params(S->getContext()); // MemoryAccesses can read only elements from a single array // (i.e. not: { Dom[0] -> A[0]; Dom[1] -> B[1] }). // Look through all spaces until we find one that contains at least the // wanted statement instance.s for (isl::map Map : MustKnown.get_map_list()) { // Get the array this is accessing. isl::id ArrayId = Map.get_tuple_id(isl::dim::out); ScopArrayInfo *SAI = static_cast(ArrayId.get_user()); // No support for generation of indirect array accesses. if (SAI->getBasePtrOriginSAI()) continue; // Determine whether this map contains all wanted values. isl::set MapDom = Map.domain(); if (!Domain.is_subset(MapDom).is_true()) continue; // There might be multiple array elements that contain the same value, but // choose only one of them. lexmin is used because it returns a one-value // mapping, we do not care about which one. // TODO: Get the simplest access function. Result = Map.lexmin(); break; } return Result; } public: ForwardOpTreeImpl(Scop *S, LoopInfo *LI, IslMaxOperationsGuard &MaxOpGuard) : ZoneAlgorithm("polly-optree", S, LI), MaxOpGuard(MaxOpGuard) {} /// Compute the zones of known array element contents. /// /// @return True if the computed #Known is usable. bool computeKnownValues() { isl::union_map MustKnown, KnownFromLoad, KnownFromInit; // Check that nothing strange occurs. collectCompatibleElts(); { IslQuotaScope QuotaScope = MaxOpGuard.enter(); computeCommon(); if (NormalizePHIs) computeNormalizedPHIs(); Known = computeKnown(true, true); // Preexisting ValInsts use the known content analysis of themselves. Translator = makeIdentityMap(Known.range(), false); } if (Known.is_null() || Translator.is_null() || NormalizeMap.is_null()) { assert(isl_ctx_last_error(IslCtx.get()) == isl_error_quota); Known = {}; Translator = {}; NormalizeMap = {}; LLVM_DEBUG(dbgs() << "Known analysis exceeded max_operations\n"); return false; } KnownAnalyzed++; LLVM_DEBUG(dbgs() << "All known: " << Known << "\n"); return true; } void printStatistics(raw_ostream &OS, int Indent = 0) { OS.indent(Indent) << "Statistics {\n"; OS.indent(Indent + 4) << "Instructions copied: " << NumInstructionsCopied << '\n'; OS.indent(Indent + 4) << "Known loads forwarded: " << NumKnownLoadsForwarded << '\n'; OS.indent(Indent + 4) << "Reloads: " << NumReloads << '\n'; OS.indent(Indent + 4) << "Read-only accesses copied: " << NumReadOnlyCopied << '\n'; OS.indent(Indent + 4) << "Operand trees forwarded: " << NumForwardedTrees << '\n'; OS.indent(Indent + 4) << "Statements with forwarded operand trees: " << NumModifiedStmts << '\n'; OS.indent(Indent) << "}\n"; } void printStatements(raw_ostream &OS, int Indent = 0) const { OS.indent(Indent) << "After statements {\n"; for (auto &Stmt : *S) { OS.indent(Indent + 4) << Stmt.getBaseName() << "\n"; for (auto *MA : Stmt) MA->print(OS); OS.indent(Indent + 12); Stmt.printInstructions(OS); } OS.indent(Indent) << "}\n"; } /// Create a new MemoryAccess of type read and MemoryKind::Array. /// /// @param Stmt The statement in which the access occurs. /// @param LI The instruction that does the access. /// @param AccessRelation The array element that each statement instance /// accesses. /// /// @param The newly created access. MemoryAccess *makeReadArrayAccess(ScopStmt *Stmt, LoadInst *LI, isl::map AccessRelation) { isl::id ArrayId = AccessRelation.get_tuple_id(isl::dim::out); ScopArrayInfo *SAI = reinterpret_cast(ArrayId.get_user()); // Create a dummy SCEV access, to be replaced anyway. SmallVector Sizes; Sizes.reserve(SAI->getNumberOfDimensions()); SmallVector Subscripts; Subscripts.reserve(SAI->getNumberOfDimensions()); for (unsigned i = 0; i < SAI->getNumberOfDimensions(); i += 1) { Sizes.push_back(SAI->getDimensionSize(i)); Subscripts.push_back(nullptr); } MemoryAccess *Access = new MemoryAccess(Stmt, LI, MemoryAccess::READ, SAI->getBasePtr(), LI->getType(), true, {}, Sizes, LI, MemoryKind::Array); S->addAccessFunction(Access); Stmt->addAccess(Access, true); Access->setNewAccessRelation(AccessRelation); return Access; } /// Forward a load by reading from an array element that contains the same /// value. Typically the location it was loaded from. /// /// @param TargetStmt The statement the operand tree will be copied to. /// @param Inst The (possibly speculatable) instruction to forward. /// @param UseStmt The statement that uses @p Inst. /// @param UseLoop The loop @p Inst is used in. /// @param DefStmt The statement @p Inst is defined in. /// @param DefLoop The loop which contains @p Inst. /// /// @return A ForwardingAction object describing the feasibility and /// profitability evaluation and the callback carrying-out the value /// forwarding. ForwardingAction forwardKnownLoad(ScopStmt *TargetStmt, Instruction *Inst, ScopStmt *UseStmt, Loop *UseLoop, ScopStmt *DefStmt, Loop *DefLoop) { // Cannot do anything without successful known analysis. if (Known.is_null() || Translator.is_null() || MaxOpGuard.hasQuotaExceeded()) return ForwardingAction::notApplicable(); LoadInst *LI = dyn_cast(Inst); if (!LI) return ForwardingAction::notApplicable(); ForwardingDecision OpDecision = forwardTree(TargetStmt, LI->getPointerOperand(), DefStmt, DefLoop); switch (OpDecision) { case FD_CanForwardProfitably: case FD_CanForwardLeaf: break; case FD_CannotForward: return ForwardingAction::cannotForward(); default: llvm_unreachable("Shouldn't return this"); } MemoryAccess *Access = TargetStmt->getArrayAccessOrNULLFor(LI); if (Access) { // If the load is already in the statement, no forwarding is necessary. // However, it might happen that the LoadInst is already present in the // statement's instruction list. In that case we do as follows: // - For the evaluation, we can trivially forward it as it is // benefit of forwarding an already present instruction. // - For the execution, prepend the instruction (to make it // available to all instructions following in the instruction list), but // do not add another MemoryAccess. auto ExecAction = [this, TargetStmt, LI, Access]() -> bool { TargetStmt->prependInstruction(LI); LLVM_DEBUG( dbgs() << " forwarded known load with preexisting MemoryAccess" << Access << "\n"); (void)Access; NumKnownLoadsForwarded++; TotalKnownLoadsForwarded++; return true; }; return ForwardingAction::canForward( ExecAction, {{LI->getPointerOperand(), DefStmt}}, true); } // Allow the following Isl calculations (until we return the // ForwardingAction, excluding the code inside the lambda that will be // executed later) to fail. IslQuotaScope QuotaScope = MaxOpGuard.enter(); // { DomainDef[] -> ValInst[] } isl::map ExpectedVal = makeValInst(Inst, UseStmt, UseLoop); assert(!isNormalized(ExpectedVal).is_false() && "LoadInsts are always normalized"); // { DomainUse[] -> DomainTarget[] } isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); // { DomainTarget[] -> ValInst[] } isl::map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); isl::union_map TranslatedExpectedVal = isl::union_map(TargetExpectedVal).apply_range(Translator); // { DomainTarget[] -> Element[] } isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); if (SameVal.is_null()) return ForwardingAction::notApplicable(); LLVM_DEBUG(dbgs() << " expected values where " << TargetExpectedVal << "\n"); LLVM_DEBUG(dbgs() << " candidate elements where " << Candidates << "\n"); // { ValInst[] } isl::space ValInstSpace = ExpectedVal.get_space().range(); // After adding a new load to the SCoP, also update the Known content // about it. The new load will have a known ValInst of // { [DomainTarget[] -> Value[]] } // but which -- because it is a copy of it -- has same value as the // { [DomainDef[] -> Value[]] } // that it replicates. Instead of cloning the known content of // [DomainDef[] -> Value[]] // for DomainTarget[], we add a 'translator' that maps // [DomainTarget[] -> Value[]] to [DomainDef[] -> Value[]] // before comparing to the known content. // TODO: 'Translator' could also be used to map PHINodes to their incoming // ValInsts. isl::map LocalTranslator; if (!ValInstSpace.is_wrapping().is_false()) { // { DefDomain[] -> Value[] } isl::map ValInsts = ExpectedVal.range().unwrap(); // { DefDomain[] } isl::set DefDomain = ValInsts.domain(); // { Value[] } isl::space ValSpace = ValInstSpace.unwrap().range(); // { Value[] -> Value[] } isl::map ValToVal = isl::map::identity(ValSpace.map_from_domain_and_range(ValSpace)); // { DomainDef[] -> DomainTarget[] } isl::map DefToTarget = getDefToTarget(DefStmt, TargetStmt); // { [TargetDomain[] -> Value[]] -> [DefDomain[] -> Value] } LocalTranslator = DefToTarget.reverse().product(ValToVal); LLVM_DEBUG(dbgs() << " local translator is " << LocalTranslator << "\n"); if (LocalTranslator.is_null()) return ForwardingAction::notApplicable(); } auto ExecAction = [this, TargetStmt, LI, SameVal, LocalTranslator]() -> bool { TargetStmt->prependInstruction(LI); MemoryAccess *Access = makeReadArrayAccess(TargetStmt, LI, SameVal); LLVM_DEBUG(dbgs() << " forwarded known load with new MemoryAccess" << Access << "\n"); (void)Access; if (!LocalTranslator.is_null()) Translator = Translator.unite(LocalTranslator); NumKnownLoadsForwarded++; TotalKnownLoadsForwarded++; return true; }; return ForwardingAction::canForward( ExecAction, {{LI->getPointerOperand(), DefStmt}}, true); } /// Forward a scalar by redirecting the access to an array element that stores /// the same value. /// /// @param TargetStmt The statement the operand tree will be copied to. /// @param Inst The scalar to forward. /// @param UseStmt The statement that uses @p Inst. /// @param UseLoop The loop @p Inst is used in. /// @param DefStmt The statement @p Inst is defined in. /// @param DefLoop The loop which contains @p Inst. /// /// @return A ForwardingAction object describing the feasibility and /// profitability evaluation and the callback carrying-out the value /// forwarding. ForwardingAction reloadKnownContent(ScopStmt *TargetStmt, Instruction *Inst, ScopStmt *UseStmt, Loop *UseLoop, ScopStmt *DefStmt, Loop *DefLoop) { // Cannot do anything without successful known analysis. if (Known.is_null() || Translator.is_null() || MaxOpGuard.hasQuotaExceeded()) return ForwardingAction::notApplicable(); // Don't spend too much time analyzing whether it can be reloaded. IslQuotaScope QuotaScope = MaxOpGuard.enter(); // { DomainDef[] -> ValInst[] } isl::union_map ExpectedVal = makeNormalizedValInst(Inst, UseStmt, UseLoop); // { DomainUse[] -> DomainTarget[] } isl::map UseToTarget = getDefToTarget(UseStmt, TargetStmt); // { DomainTarget[] -> ValInst[] } isl::union_map TargetExpectedVal = ExpectedVal.apply_domain(UseToTarget); isl::union_map TranslatedExpectedVal = TargetExpectedVal.apply_range(Translator); // { DomainTarget[] -> Element[] } isl::union_map Candidates = findSameContentElements(TranslatedExpectedVal); isl::map SameVal = singleLocation(Candidates, getDomainFor(TargetStmt)); simplify(SameVal); if (SameVal.is_null()) return ForwardingAction::notApplicable(); auto ExecAction = [this, TargetStmt, Inst, SameVal]() { MemoryAccess *Access = TargetStmt->lookupInputAccessOf(Inst); if (!Access) Access = TargetStmt->ensureValueRead(Inst); Access->setNewAccessRelation(SameVal); LLVM_DEBUG(dbgs() << " forwarded known content of " << *Inst << " which is " << SameVal << "\n"); TotalReloads++; NumReloads++; return false; }; return ForwardingAction::canForward(ExecAction, {}, true); } /// Forwards a speculatively executable instruction. /// /// @param TargetStmt The statement the operand tree will be copied to. /// @param UseInst The (possibly speculatable) instruction to forward. /// @param DefStmt The statement @p UseInst is defined in. /// @param DefLoop The loop which contains @p UseInst. /// /// @return A ForwardingAction object describing the feasibility and /// profitability evaluation and the callback carrying-out the value /// forwarding. ForwardingAction forwardSpeculatable(ScopStmt *TargetStmt, Instruction *UseInst, ScopStmt *DefStmt, Loop *DefLoop) { // PHIs, unless synthesizable, are not yet supported. if (isa(UseInst)) return ForwardingAction::notApplicable(); // Compatible instructions must satisfy the following conditions: // 1. Idempotent (instruction will be copied, not moved; although its // original instance might be removed by simplification) // 2. Not access memory (There might be memory writes between) // 3. Not cause undefined behaviour (we might copy to a location when the // original instruction was no executed; this is currently not possible // because we do not forward PHINodes) // 4. Not leak memory if executed multiple times (i.e. malloc) // // Instruction::mayHaveSideEffects is not sufficient because it considers // malloc to not have side-effects. llvm::isSafeToSpeculativelyExecute is // not sufficient because it allows memory accesses. if (mayBeMemoryDependent(*UseInst)) return ForwardingAction::notApplicable(); SmallVector Depends; Depends.reserve(UseInst->getNumOperands()); for (Value *OpVal : UseInst->operand_values()) { ForwardingDecision OpDecision = forwardTree(TargetStmt, OpVal, DefStmt, DefLoop); switch (OpDecision) { case FD_CannotForward: return ForwardingAction::cannotForward(); case FD_CanForwardLeaf: case FD_CanForwardProfitably: Depends.emplace_back(OpVal, DefStmt); break; case FD_NotApplicable: case FD_Unknown: llvm_unreachable( "forwardTree should never return FD_NotApplicable/FD_Unknown"); } } auto ExecAction = [this, TargetStmt, UseInst]() { // To ensure the right order, prepend this instruction before its // operands. This ensures that its operands are inserted before the // instruction using them. TargetStmt->prependInstruction(UseInst); LLVM_DEBUG(dbgs() << " forwarded speculable instruction: " << *UseInst << "\n"); NumInstructionsCopied++; TotalInstructionsCopied++; return true; }; return ForwardingAction::canForward(ExecAction, Depends, true); } /// Determines whether an operand tree can be forwarded and returns /// instructions how to do so in the form of a ForwardingAction object. /// /// @param TargetStmt The statement the operand tree will be copied to. /// @param UseVal The value (usually an instruction) which is root of an /// operand tree. /// @param UseStmt The statement that uses @p UseVal. /// @param UseLoop The loop @p UseVal is used in. /// /// @return A ForwardingAction object describing the feasibility and /// profitability evaluation and the callback carrying-out the value /// forwarding. ForwardingAction forwardTreeImpl(ScopStmt *TargetStmt, Value *UseVal, ScopStmt *UseStmt, Loop *UseLoop) { ScopStmt *DefStmt = nullptr; Loop *DefLoop = nullptr; // { DefDomain[] -> TargetDomain[] } isl::map DefToTarget; VirtualUse VUse = VirtualUse::create(UseStmt, UseLoop, UseVal, true); switch (VUse.getKind()) { case VirtualUse::Constant: case VirtualUse::Block: case VirtualUse::Hoisted: // These can be used anywhere without special considerations. return ForwardingAction::triviallyForwardable(false, UseVal); case VirtualUse::Synthesizable: { // Check if the value is synthesizable at the new location as well. This // might be possible when leaving a loop for which ScalarEvolution is // unable to derive the exit value for. // TODO: If there is a LCSSA PHI at the loop exit, use that one. // If the SCEV contains a SCEVAddRecExpr, we currently depend on that we // do not forward past its loop header. This would require us to use a // previous loop induction variable instead the current one. We currently // do not allow forwarding PHI nodes, thus this should never occur (the // only exception where no phi is necessary being an unreachable loop // without edge from the outside). VirtualUse TargetUse = VirtualUse::create( S, TargetStmt, TargetStmt->getSurroundingLoop(), UseVal, true); if (TargetUse.getKind() == VirtualUse::Synthesizable) return ForwardingAction::triviallyForwardable(false, UseVal); LLVM_DEBUG( dbgs() << " Synthesizable would not be synthesizable anymore: " << *UseVal << "\n"); return ForwardingAction::cannotForward(); } case VirtualUse::ReadOnly: { if (!ModelReadOnlyScalars) return ForwardingAction::triviallyForwardable(false, UseVal); // If we model read-only scalars, we need to create a MemoryAccess for it. auto ExecAction = [this, TargetStmt, UseVal]() { TargetStmt->ensureValueRead(UseVal); LLVM_DEBUG(dbgs() << " forwarded read-only value " << *UseVal << "\n"); NumReadOnlyCopied++; TotalReadOnlyCopied++; // Note that we cannot return true here. With a operand tree // depth of 0, UseVal is the use in TargetStmt that we try to replace. // With -polly-analyze-read-only-scalars=true we would ensure the // existence of a MemoryAccess (which already exists for a leaf) and be // removed again by tryForwardTree because it's goal is to remove this // scalar MemoryAccess. It interprets FD_CanForwardTree as the // permission to do so. return false; }; return ForwardingAction::canForward(ExecAction, {}, false); } case VirtualUse::Intra: // Knowing that UseStmt and DefStmt are the same statement instance, just // reuse the information about UseStmt for DefStmt DefStmt = UseStmt; LLVM_FALLTHROUGH; case VirtualUse::Inter: Instruction *Inst = cast(UseVal); if (!DefStmt) { DefStmt = S->getStmtFor(Inst); if (!DefStmt) return ForwardingAction::cannotForward(); } DefLoop = LI->getLoopFor(Inst->getParent()); ForwardingAction SpeculativeResult = forwardSpeculatable(TargetStmt, Inst, DefStmt, DefLoop); if (SpeculativeResult.Decision != FD_NotApplicable) return SpeculativeResult; ForwardingAction KnownResult = forwardKnownLoad( TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop); if (KnownResult.Decision != FD_NotApplicable) return KnownResult; ForwardingAction ReloadResult = reloadKnownContent( TargetStmt, Inst, UseStmt, UseLoop, DefStmt, DefLoop); if (ReloadResult.Decision != FD_NotApplicable) return ReloadResult; // When no method is found to forward the operand tree, we effectively // cannot handle it. LLVM_DEBUG(dbgs() << " Cannot forward instruction: " << *Inst << "\n"); return ForwardingAction::cannotForward(); } llvm_unreachable("Case unhandled"); } /// Determines whether an operand tree can be forwarded. Previous evaluations /// are cached. /// /// @param TargetStmt The statement the operand tree will be copied to. /// @param UseVal The value (usually an instruction) which is root of an /// operand tree. /// @param UseStmt The statement that uses @p UseVal. /// @param UseLoop The loop @p UseVal is used in. /// /// @return FD_CannotForward if @p UseVal cannot be forwarded. /// FD_CanForwardLeaf if @p UseVal is forwardable, but not /// profitable. /// FD_CanForwardProfitably if @p UseVal is forwardable and useful to /// do. ForwardingDecision forwardTree(ScopStmt *TargetStmt, Value *UseVal, ScopStmt *UseStmt, Loop *UseLoop) { // Lookup any cached evaluation. auto It = ForwardingActions.find({UseVal, UseStmt}); if (It != ForwardingActions.end()) return It->second.Decision; // Make a new evaluation. ForwardingAction Action = forwardTreeImpl(TargetStmt, UseVal, UseStmt, UseLoop); ForwardingDecision Result = Action.Decision; // Remember for the next time. assert(!ForwardingActions.count({UseVal, UseStmt}) && "circular dependency?"); ForwardingActions.insert({{UseVal, UseStmt}, std::move(Action)}); return Result; } /// Forward an operand tree using cached actions. /// /// @param Stmt Statement the operand tree is moved into. /// @param UseVal Root of the operand tree within @p Stmt. /// @param RA The MemoryAccess for @p UseVal that the forwarding intends /// to remove. void applyForwardingActions(ScopStmt *Stmt, Value *UseVal, MemoryAccess *RA) { using ChildItTy = decltype(std::declval().Depends.begin()); using EdgeTy = std::pair; DenseSet Visited; SmallVector Stack; SmallVector Ordered; // Seed the tree search using the root value. assert(ForwardingActions.count({UseVal, Stmt})); ForwardingAction *RootAction = &ForwardingActions[{UseVal, Stmt}]; Stack.emplace_back(RootAction, RootAction->Depends.begin()); // Compute the postorder of the operand tree: all operands of an instruction // must be visited before the instruction itself. As an additional // requirement, the topological ordering must be 'compact': Any subtree node // must not be interleaved with nodes from a non-shared subtree. This is // because the same llvm::Instruction can be materialized multiple times as // used at different ScopStmts which might be different values. Intersecting // these lifetimes may result in miscompilations. // FIXME: Intersecting lifetimes might still be possible for the roots // themselves, since instructions are just prepended to a ScopStmt's // instruction list. while (!Stack.empty()) { EdgeTy &Top = Stack.back(); ForwardingAction *TopAction = Top.first; ChildItTy &TopEdge = Top.second; if (TopEdge == TopAction->Depends.end()) { // Postorder sorting Ordered.push_back(TopAction); Stack.pop_back(); continue; } ForwardingAction::KeyTy Key = *TopEdge; // Next edge for this level ++TopEdge; auto VisitIt = Visited.insert(Key); if (!VisitIt.second) continue; assert(ForwardingActions.count(Key) && "Must not insert new actions during execution phase"); ForwardingAction *ChildAction = &ForwardingActions[Key]; Stack.emplace_back(ChildAction, ChildAction->Depends.begin()); } // Actually, we need the reverse postorder because actions prepend new // instructions. Therefore, the first one will always be the action for the // operand tree's root. assert(Ordered.back() == RootAction); if (RootAction->Execute()) Stmt->removeSingleMemoryAccess(RA); Ordered.pop_back(); for (auto DepAction : reverse(Ordered)) { assert(DepAction->Decision != FD_Unknown && DepAction->Decision != FD_CannotForward); assert(DepAction != RootAction); DepAction->Execute(); } } /// Try to forward an operand tree rooted in @p RA. bool tryForwardTree(MemoryAccess *RA) { assert(RA->isLatestScalarKind()); LLVM_DEBUG(dbgs() << "Trying to forward operand tree " << RA << "...\n"); ScopStmt *Stmt = RA->getStatement(); Loop *InLoop = Stmt->getSurroundingLoop(); isl::map TargetToUse; if (!Known.is_null()) { isl::space DomSpace = Stmt->getDomainSpace(); TargetToUse = isl::map::identity(DomSpace.map_from_domain_and_range(DomSpace)); } ForwardingDecision Assessment = forwardTree(Stmt, RA->getAccessValue(), Stmt, InLoop); // If considered feasible and profitable, forward it. bool Changed = false; if (Assessment == FD_CanForwardProfitably) { applyForwardingActions(Stmt, RA->getAccessValue(), RA); Changed = true; } ForwardingActions.clear(); return Changed; } /// Return which SCoP this instance is processing. Scop *getScop() const { return S; } /// Run the algorithm: Use value read accesses as operand tree roots and try /// to forward them into the statement. bool forwardOperandTrees() { for (ScopStmt &Stmt : *S) { bool StmtModified = false; // Because we are modifying the MemoryAccess list, collect them first to // avoid iterator invalidation. SmallVector Accs(Stmt.begin(), Stmt.end()); for (MemoryAccess *RA : Accs) { if (!RA->isRead()) continue; if (!RA->isLatestScalarKind()) continue; if (tryForwardTree(RA)) { Modified = true; StmtModified = true; NumForwardedTrees++; TotalForwardedTrees++; } } if (StmtModified) { NumModifiedStmts++; TotalModifiedStmts++; } } if (Modified) { ScopsModified++; S->realignParams(); } return Modified; } /// Print the pass result, performed transformations and the SCoP after the /// transformation. void print(raw_ostream &OS, int Indent = 0) { printStatistics(OS, Indent); if (!Modified) { // This line can easily be checked in regression tests. OS << "ForwardOpTree executed, but did not modify anything\n"; return; } printStatements(OS, Indent); } bool isModified() const { return Modified; } }; static std::unique_ptr runForwardOpTree(Scop &S, LoopInfo &LI) { std::unique_ptr Impl; { IslMaxOperationsGuard MaxOpGuard(S.getIslCtx().get(), MaxOps, false); Impl = std::make_unique(&S, &LI, MaxOpGuard); if (AnalyzeKnown) { LLVM_DEBUG(dbgs() << "Prepare forwarders...\n"); Impl->computeKnownValues(); } LLVM_DEBUG(dbgs() << "Forwarding operand trees...\n"); Impl->forwardOperandTrees(); if (MaxOpGuard.hasQuotaExceeded()) { LLVM_DEBUG(dbgs() << "Not all operations completed because of " "max_operations exceeded\n"); KnownOutOfQuota++; } } LLVM_DEBUG(dbgs() << "\nFinal Scop:\n"); LLVM_DEBUG(dbgs() << S); // Update statistics Scop::ScopStatistics ScopStats = S.getStatistics(); NumValueWrites += ScopStats.NumValueWrites; NumValueWritesInLoops += ScopStats.NumValueWritesInLoops; NumPHIWrites += ScopStats.NumPHIWrites; NumPHIWritesInLoops += ScopStats.NumPHIWritesInLoops; NumSingletonWrites += ScopStats.NumSingletonWrites; NumSingletonWritesInLoops += ScopStats.NumSingletonWritesInLoops; return Impl; } static PreservedAnalyses runForwardOpTreeUsingNPM(Scop &S, ScopAnalysisManager &SAM, ScopStandardAnalysisResults &SAR, SPMUpdater &U, raw_ostream *OS) { LoopInfo &LI = SAR.LI; std::unique_ptr Impl = runForwardOpTree(S, LI); if (OS) { *OS << "Printing analysis 'Polly - Forward operand tree' for region: '" << S.getName() << "' in function '" << S.getFunction().getName() << "':\n"; if (Impl) { assert(Impl->getScop() == &S); Impl->print(*OS); } } if (!Impl->isModified()) return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserveSet>(); PA.preserveSet>(); PA.preserveSet>(); return PA; } /// Pass that redirects scalar reads to array elements that are known to contain /// the same value. /// /// This reduces the number of scalar accesses and therefore potentially /// increases the freedom of the scheduler. In the ideal case, all reads of a /// scalar definition are redirected (We currently do not care about removing /// the write in this case). This is also useful for the main DeLICM pass as /// there are less scalars to be mapped. class ForwardOpTreeWrapperPass : public ScopPass { private: /// The pass implementation, also holding per-scop data. std::unique_ptr Impl; public: static char ID; explicit ForwardOpTreeWrapperPass() : ScopPass(ID) {} ForwardOpTreeWrapperPass(const ForwardOpTreeWrapperPass &) = delete; ForwardOpTreeWrapperPass & operator=(const ForwardOpTreeWrapperPass &) = delete; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequiredTransitive(); AU.addRequired(); AU.setPreservesAll(); } bool runOnScop(Scop &S) override { // Free resources for previous SCoP's computation, if not yet done. releaseMemory(); LoopInfo &LI = getAnalysis().getLoopInfo(); Impl = runForwardOpTree(S, LI); return false; } void printScop(raw_ostream &OS, Scop &S) const override { if (!Impl) return; assert(Impl->getScop() == &S); Impl->print(OS); } void releaseMemory() override { Impl.reset(); } }; // class ForwardOpTree char ForwardOpTreeWrapperPass::ID; } // namespace Pass *polly::createForwardOpTreeWrapperPass() { return new ForwardOpTreeWrapperPass(); } INITIALIZE_PASS_BEGIN(ForwardOpTreeWrapperPass, "polly-optree", "Polly - Forward operand tree", false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(ForwardOpTreeWrapperPass, "polly-optree", "Polly - Forward operand tree", false, false) llvm::PreservedAnalyses ForwardOpTreePass::run(Scop &S, ScopAnalysisManager &SAM, ScopStandardAnalysisResults &SAR, SPMUpdater &U) { return runForwardOpTreeUsingNPM(S, SAM, SAR, U, nullptr); } llvm::PreservedAnalyses ForwardOpTreePrinterPass::run(Scop &S, ScopAnalysisManager &SAM, ScopStandardAnalysisResults &SAR, SPMUpdater &U) { return runForwardOpTreeUsingNPM(S, SAM, SAR, U, &OS); }