| //===- ScopBuilder.cpp ----------------------------------------------------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Create a polyhedral description for a static control flow region. |
| // |
| // The pass creates a polyhedral description of the Scops detected by the SCoP |
| // detection derived from their LLVM-IR code. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "polly/ScopBuilder.h" |
| #include "polly/Options.h" |
| #include "polly/ScopDetection.h" |
| #include "polly/ScopDetectionDiagnostic.h" |
| #include "polly/ScopInfo.h" |
| #include "polly/Support/GICHelper.h" |
| #include "polly/Support/SCEVValidator.h" |
| #include "polly/Support/ScopHelper.h" |
| #include "polly/Support/VirtualInstruction.h" |
| #include "llvm/ADT/APInt.h" |
| #include "llvm/ADT/ArrayRef.h" |
| #include "llvm/ADT/DenseMap.h" |
| #include "llvm/ADT/EquivalenceClasses.h" |
| #include "llvm/ADT/SetVector.h" |
| #include "llvm/ADT/Statistic.h" |
| #include "llvm/Analysis/AliasAnalysis.h" |
| #include "llvm/Analysis/LoopInfo.h" |
| #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
| #include "llvm/Analysis/RegionInfo.h" |
| #include "llvm/Analysis/RegionIterator.h" |
| #include "llvm/Analysis/ScalarEvolution.h" |
| #include "llvm/Analysis/ScalarEvolutionExpressions.h" |
| #include "llvm/IR/BasicBlock.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/DataLayout.h" |
| #include "llvm/IR/DebugLoc.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/DiagnosticInfo.h" |
| #include "llvm/IR/Dominators.h" |
| #include "llvm/IR/Function.h" |
| #include "llvm/IR/InstrTypes.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/IntrinsicInst.h" |
| #include "llvm/IR/Operator.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/Use.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Compiler.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include <cassert> |
| #include <string> |
| #include <tuple> |
| #include <vector> |
| |
| using namespace llvm; |
| using namespace polly; |
| |
| #define DEBUG_TYPE "polly-scops" |
| |
| STATISTIC(ScopFound, "Number of valid Scops"); |
| STATISTIC(RichScopFound, "Number of Scops containing a loop"); |
| STATISTIC(InfeasibleScops, |
| "Number of SCoPs with statically infeasible context."); |
| |
| bool polly::ModelReadOnlyScalars; |
| |
| static cl::opt<bool, true> XModelReadOnlyScalars( |
| "polly-analyze-read-only-scalars", |
| cl::desc("Model read-only scalar values in the scop description"), |
| cl::location(ModelReadOnlyScalars), cl::Hidden, cl::ZeroOrMore, |
| cl::init(true), cl::cat(PollyCategory)); |
| |
| static cl::opt<bool> UnprofitableScalarAccs( |
| "polly-unprofitable-scalar-accs", |
| cl::desc("Count statements with scalar accesses as not optimizable"), |
| cl::Hidden, cl::init(false), cl::cat(PollyCategory)); |
| |
| static cl::opt<bool> DetectFortranArrays( |
| "polly-detect-fortran-arrays", |
| cl::desc("Detect Fortran arrays and use this for code generation"), |
| cl::Hidden, cl::init(false), cl::cat(PollyCategory)); |
| |
| static cl::opt<bool> DetectReductions("polly-detect-reductions", |
| cl::desc("Detect and exploit reductions"), |
| cl::Hidden, cl::ZeroOrMore, |
| cl::init(true), cl::cat(PollyCategory)); |
| |
| // Multiplicative reductions can be disabled separately as these kind of |
| // operations can overflow easily. Additive reductions and bit operations |
| // are in contrast pretty stable. |
| static cl::opt<bool> DisableMultiplicativeReductions( |
| "polly-disable-multiplicative-reductions", |
| cl::desc("Disable multiplicative reductions"), cl::Hidden, cl::ZeroOrMore, |
| cl::init(false), cl::cat(PollyCategory)); |
| |
| enum class GranularityChoice { BasicBlocks, ScalarIndependence, Stores }; |
| |
| static cl::opt<GranularityChoice> StmtGranularity( |
| "polly-stmt-granularity", |
| cl::desc( |
| "Algorithm to use for splitting basic blocks into multiple statements"), |
| cl::values(clEnumValN(GranularityChoice::BasicBlocks, "bb", |
| "One statement per basic block"), |
| clEnumValN(GranularityChoice::ScalarIndependence, "scalar-indep", |
| "Scalar independence heuristic"), |
| clEnumValN(GranularityChoice::Stores, "store", |
| "Store-level granularity")), |
| cl::init(GranularityChoice::ScalarIndependence), cl::cat(PollyCategory)); |
| |
| void ScopBuilder::buildPHIAccesses(ScopStmt *PHIStmt, PHINode *PHI, |
| Region *NonAffineSubRegion, |
| bool IsExitBlock) { |
| // PHI nodes that are in the exit block of the region, hence if IsExitBlock is |
| // true, are not modeled as ordinary PHI nodes as they are not part of the |
| // region. However, we model the operands in the predecessor blocks that are |
| // part of the region as regular scalar accesses. |
| |
| // If we can synthesize a PHI we can skip it, however only if it is in |
| // the region. If it is not it can only be in the exit block of the region. |
| // In this case we model the operands but not the PHI itself. |
| auto *Scope = LI.getLoopFor(PHI->getParent()); |
| if (!IsExitBlock && canSynthesize(PHI, *scop, &SE, Scope)) |
| return; |
| |
| // PHI nodes are modeled as if they had been demoted prior to the SCoP |
| // detection. Hence, the PHI is a load of a new memory location in which the |
| // incoming value was written at the end of the incoming basic block. |
| bool OnlyNonAffineSubRegionOperands = true; |
| for (unsigned u = 0; u < PHI->getNumIncomingValues(); u++) { |
| Value *Op = PHI->getIncomingValue(u); |
| BasicBlock *OpBB = PHI->getIncomingBlock(u); |
| ScopStmt *OpStmt = scop->getIncomingStmtFor(PHI->getOperandUse(u)); |
| |
| // Do not build PHI dependences inside a non-affine subregion, but make |
| // sure that the necessary scalar values are still made available. |
| if (NonAffineSubRegion && NonAffineSubRegion->contains(OpBB)) { |
| auto *OpInst = dyn_cast<Instruction>(Op); |
| if (!OpInst || !NonAffineSubRegion->contains(OpInst)) |
| ensureValueRead(Op, OpStmt); |
| continue; |
| } |
| |
| OnlyNonAffineSubRegionOperands = false; |
| ensurePHIWrite(PHI, OpStmt, OpBB, Op, IsExitBlock); |
| } |
| |
| if (!OnlyNonAffineSubRegionOperands && !IsExitBlock) { |
| addPHIReadAccess(PHIStmt, PHI); |
| } |
| } |
| |
| void ScopBuilder::buildScalarDependences(ScopStmt *UserStmt, |
| Instruction *Inst) { |
| assert(!isa<PHINode>(Inst)); |
| |
| // Pull-in required operands. |
| for (Use &Op : Inst->operands()) |
| ensureValueRead(Op.get(), UserStmt); |
| } |
| |
| void ScopBuilder::buildEscapingDependences(Instruction *Inst) { |
| // Check for uses of this instruction outside the scop. Because we do not |
| // iterate over such instructions and therefore did not "ensure" the existence |
| // of a write, we must determine such use here. |
| if (scop->isEscaping(Inst)) |
| ensureValueWrite(Inst); |
| } |
| |
| /// Check that a value is a Fortran Array descriptor. |
| /// |
| /// We check if V has the following structure: |
| /// %"struct.array1_real(kind=8)" = type { i8*, i<zz>, i<zz>, |
| /// [<num> x %struct.descriptor_dimension] } |
| /// |
| /// |
| /// %struct.descriptor_dimension = type { i<zz>, i<zz>, i<zz> } |
| /// |
| /// 1. V's type name starts with "struct.array" |
| /// 2. V's type has layout as shown. |
| /// 3. Final member of V's type has name "struct.descriptor_dimension", |
| /// 4. "struct.descriptor_dimension" has layout as shown. |
| /// 5. Consistent use of i<zz> where <zz> is some fixed integer number. |
| /// |
| /// We are interested in such types since this is the code that dragonegg |
| /// generates for Fortran array descriptors. |
| /// |
| /// @param V the Value to be checked. |
| /// |
| /// @returns True if V is a Fortran array descriptor, False otherwise. |
| bool isFortranArrayDescriptor(Value *V) { |
| PointerType *PTy = dyn_cast<PointerType>(V->getType()); |
| |
| if (!PTy) |
| return false; |
| |
| Type *Ty = PTy->getElementType(); |
| assert(Ty && "Ty expected to be initialized"); |
| auto *StructArrTy = dyn_cast<StructType>(Ty); |
| |
| if (!(StructArrTy && StructArrTy->hasName())) |
| return false; |
| |
| if (!StructArrTy->getName().startswith("struct.array")) |
| return false; |
| |
| if (StructArrTy->getNumElements() != 4) |
| return false; |
| |
| const ArrayRef<Type *> ArrMemberTys = StructArrTy->elements(); |
| |
| // i8* match |
| if (ArrMemberTys[0] != Type::getInt8PtrTy(V->getContext())) |
| return false; |
| |
| // Get a reference to the int type and check that all the members |
| // share the same int type |
| Type *IntTy = ArrMemberTys[1]; |
| if (ArrMemberTys[2] != IntTy) |
| return false; |
| |
| // type: [<num> x %struct.descriptor_dimension] |
| ArrayType *DescriptorDimArrayTy = dyn_cast<ArrayType>(ArrMemberTys[3]); |
| if (!DescriptorDimArrayTy) |
| return false; |
| |
| // type: %struct.descriptor_dimension := type { ixx, ixx, ixx } |
| StructType *DescriptorDimTy = |
| dyn_cast<StructType>(DescriptorDimArrayTy->getElementType()); |
| |
| if (!(DescriptorDimTy && DescriptorDimTy->hasName())) |
| return false; |
| |
| if (DescriptorDimTy->getName() != "struct.descriptor_dimension") |
| return false; |
| |
| if (DescriptorDimTy->getNumElements() != 3) |
| return false; |
| |
| for (auto MemberTy : DescriptorDimTy->elements()) { |
| if (MemberTy != IntTy) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| Value *ScopBuilder::findFADAllocationVisible(MemAccInst Inst) { |
| // match: 4.1 & 4.2 store/load |
| if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst)) |
| return nullptr; |
| |
| // match: 4 |
| if (Inst.getAlignment() != 8) |
| return nullptr; |
| |
| Value *Address = Inst.getPointerOperand(); |
| |
| const BitCastInst *Bitcast = nullptr; |
| // [match: 3] |
| if (auto *Slot = dyn_cast<GetElementPtrInst>(Address)) { |
| Value *TypedMem = Slot->getPointerOperand(); |
| // match: 2 |
| Bitcast = dyn_cast<BitCastInst>(TypedMem); |
| } else { |
| // match: 2 |
| Bitcast = dyn_cast<BitCastInst>(Address); |
| } |
| |
| if (!Bitcast) |
| return nullptr; |
| |
| auto *MallocMem = Bitcast->getOperand(0); |
| |
| // match: 1 |
| auto *MallocCall = dyn_cast<CallInst>(MallocMem); |
| if (!MallocCall) |
| return nullptr; |
| |
| Function *MallocFn = MallocCall->getCalledFunction(); |
| if (!(MallocFn && MallocFn->hasName() && MallocFn->getName() == "malloc")) |
| return nullptr; |
| |
| // Find all uses the malloc'd memory. |
| // We are looking for a "store" into a struct with the type being the Fortran |
| // descriptor type |
| for (auto user : MallocMem->users()) { |
| /// match: 5 |
| auto *MallocStore = dyn_cast<StoreInst>(user); |
| if (!MallocStore) |
| continue; |
| |
| auto *DescriptorGEP = |
| dyn_cast<GEPOperator>(MallocStore->getPointerOperand()); |
| if (!DescriptorGEP) |
| continue; |
| |
| // match: 5 |
| auto DescriptorType = |
| dyn_cast<StructType>(DescriptorGEP->getSourceElementType()); |
| if (!(DescriptorType && DescriptorType->hasName())) |
| continue; |
| |
| Value *Descriptor = dyn_cast<Value>(DescriptorGEP->getPointerOperand()); |
| |
| if (!Descriptor) |
| continue; |
| |
| if (!isFortranArrayDescriptor(Descriptor)) |
| continue; |
| |
| return Descriptor; |
| } |
| |
| return nullptr; |
| } |
| |
| Value *ScopBuilder::findFADAllocationInvisible(MemAccInst Inst) { |
| // match: 3 |
| if (!isa<LoadInst>(Inst) && !isa<StoreInst>(Inst)) |
| return nullptr; |
| |
| Value *Slot = Inst.getPointerOperand(); |
| |
| LoadInst *MemLoad = nullptr; |
| // [match: 2] |
| if (auto *SlotGEP = dyn_cast<GetElementPtrInst>(Slot)) { |
| // match: 1 |
| MemLoad = dyn_cast<LoadInst>(SlotGEP->getPointerOperand()); |
| } else { |
| // match: 1 |
| MemLoad = dyn_cast<LoadInst>(Slot); |
| } |
| |
| if (!MemLoad) |
| return nullptr; |
| |
| auto *BitcastOperator = |
| dyn_cast<BitCastOperator>(MemLoad->getPointerOperand()); |
| if (!BitcastOperator) |
| return nullptr; |
| |
| Value *Descriptor = dyn_cast<Value>(BitcastOperator->getOperand(0)); |
| if (!Descriptor) |
| return nullptr; |
| |
| if (!isFortranArrayDescriptor(Descriptor)) |
| return nullptr; |
| |
| return Descriptor; |
| } |
| |
| bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, ScopStmt *Stmt) { |
| Value *Val = Inst.getValueOperand(); |
| Type *ElementType = Val->getType(); |
| Value *Address = Inst.getPointerOperand(); |
| const SCEV *AccessFunction = |
| SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent())); |
| const SCEVUnknown *BasePointer = |
| dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); |
| enum MemoryAccess::AccessType AccType = |
| isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; |
| |
| if (auto *BitCast = dyn_cast<BitCastInst>(Address)) { |
| auto *Src = BitCast->getOperand(0); |
| auto *SrcTy = Src->getType(); |
| auto *DstTy = BitCast->getType(); |
| // Do not try to delinearize non-sized (opaque) pointers. |
| if ((SrcTy->isPointerTy() && !SrcTy->getPointerElementType()->isSized()) || |
| (DstTy->isPointerTy() && !DstTy->getPointerElementType()->isSized())) { |
| return false; |
| } |
| if (SrcTy->isPointerTy() && DstTy->isPointerTy() && |
| DL.getTypeAllocSize(SrcTy->getPointerElementType()) == |
| DL.getTypeAllocSize(DstTy->getPointerElementType())) |
| Address = Src; |
| } |
| |
| auto *GEP = dyn_cast<GetElementPtrInst>(Address); |
| if (!GEP) |
| return false; |
| |
| std::vector<const SCEV *> Subscripts; |
| std::vector<int> Sizes; |
| std::tie(Subscripts, Sizes) = getIndexExpressionsFromGEP(GEP, SE); |
| auto *BasePtr = GEP->getOperand(0); |
| |
| if (auto *BasePtrCast = dyn_cast<BitCastInst>(BasePtr)) |
| BasePtr = BasePtrCast->getOperand(0); |
| |
| // Check for identical base pointers to ensure that we do not miss index |
| // offsets that have been added before this GEP is applied. |
| if (BasePtr != BasePointer->getValue()) |
| return false; |
| |
| std::vector<const SCEV *> SizesSCEV; |
| |
| const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); |
| |
| Loop *SurroundingLoop = Stmt->getSurroundingLoop(); |
| for (auto *Subscript : Subscripts) { |
| InvariantLoadsSetTy AccessILS; |
| if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE, |
| &AccessILS)) |
| return false; |
| |
| for (LoadInst *LInst : AccessILS) |
| if (!ScopRIL.count(LInst)) |
| return false; |
| } |
| |
| if (Sizes.empty()) |
| return false; |
| |
| SizesSCEV.push_back(nullptr); |
| |
| for (auto V : Sizes) |
| SizesSCEV.push_back(SE.getSCEV( |
| ConstantInt::get(IntegerType::getInt64Ty(BasePtr->getContext()), V))); |
| |
| addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, |
| true, Subscripts, SizesSCEV, Val); |
| return true; |
| } |
| |
| bool ScopBuilder::buildAccessMultiDimParam(MemAccInst Inst, ScopStmt *Stmt) { |
| if (!PollyDelinearize) |
| return false; |
| |
| Value *Address = Inst.getPointerOperand(); |
| Value *Val = Inst.getValueOperand(); |
| Type *ElementType = Val->getType(); |
| unsigned ElementSize = DL.getTypeAllocSize(ElementType); |
| enum MemoryAccess::AccessType AccType = |
| isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; |
| |
| const SCEV *AccessFunction = |
| SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent())); |
| const SCEVUnknown *BasePointer = |
| dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); |
| |
| assert(BasePointer && "Could not find base pointer"); |
| |
| auto &InsnToMemAcc = scop->getInsnToMemAccMap(); |
| auto AccItr = InsnToMemAcc.find(Inst); |
| if (AccItr == InsnToMemAcc.end()) |
| return false; |
| |
| std::vector<const SCEV *> Sizes = {nullptr}; |
| |
| Sizes.insert(Sizes.end(), AccItr->second.Shape->DelinearizedSizes.begin(), |
| AccItr->second.Shape->DelinearizedSizes.end()); |
| |
| // In case only the element size is contained in the 'Sizes' array, the |
| // access does not access a real multi-dimensional array. Hence, we allow |
| // the normal single-dimensional access construction to handle this. |
| if (Sizes.size() == 1) |
| return false; |
| |
| // Remove the element size. This information is already provided by the |
| // ElementSize parameter. In case the element size of this access and the |
| // element size used for delinearization differs the delinearization is |
| // incorrect. Hence, we invalidate the scop. |
| // |
| // TODO: Handle delinearization with differing element sizes. |
| auto DelinearizedSize = |
| cast<SCEVConstant>(Sizes.back())->getAPInt().getSExtValue(); |
| Sizes.pop_back(); |
| if (ElementSize != DelinearizedSize) |
| scop->invalidate(DELINEARIZATION, Inst->getDebugLoc(), Inst->getParent()); |
| |
| addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, |
| true, AccItr->second.DelinearizedSubscripts, Sizes, Val); |
| return true; |
| } |
| |
| bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, ScopStmt *Stmt) { |
| auto *MemIntr = dyn_cast_or_null<MemIntrinsic>(Inst); |
| |
| if (MemIntr == nullptr) |
| return false; |
| |
| auto *L = LI.getLoopFor(Inst->getParent()); |
| auto *LengthVal = SE.getSCEVAtScope(MemIntr->getLength(), L); |
| assert(LengthVal); |
| |
| // Check if the length val is actually affine or if we overapproximate it |
| InvariantLoadsSetTy AccessILS; |
| const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); |
| |
| Loop *SurroundingLoop = Stmt->getSurroundingLoop(); |
| bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop, |
| LengthVal, SE, &AccessILS); |
| for (LoadInst *LInst : AccessILS) |
| if (!ScopRIL.count(LInst)) |
| LengthIsAffine = false; |
| if (!LengthIsAffine) |
| LengthVal = nullptr; |
| |
| auto *DestPtrVal = MemIntr->getDest(); |
| assert(DestPtrVal); |
| |
| auto *DestAccFunc = SE.getSCEVAtScope(DestPtrVal, L); |
| assert(DestAccFunc); |
| // Ignore accesses to "NULL". |
| // TODO: We could use this to optimize the region further, e.g., intersect |
| // the context with |
| // isl_set_complement(isl_set_params(getDomain())) |
| // as we know it would be undefined to execute this instruction anyway. |
| if (DestAccFunc->isZero()) |
| return true; |
| |
| auto *DestPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(DestAccFunc)); |
| assert(DestPtrSCEV); |
| DestAccFunc = SE.getMinusSCEV(DestAccFunc, DestPtrSCEV); |
| addArrayAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, DestPtrSCEV->getValue(), |
| IntegerType::getInt8Ty(DestPtrVal->getContext()), |
| LengthIsAffine, {DestAccFunc, LengthVal}, {nullptr}, |
| Inst.getValueOperand()); |
| |
| auto *MemTrans = dyn_cast<MemTransferInst>(MemIntr); |
| if (!MemTrans) |
| return true; |
| |
| auto *SrcPtrVal = MemTrans->getSource(); |
| assert(SrcPtrVal); |
| |
| auto *SrcAccFunc = SE.getSCEVAtScope(SrcPtrVal, L); |
| assert(SrcAccFunc); |
| // Ignore accesses to "NULL". |
| // TODO: See above TODO |
| if (SrcAccFunc->isZero()) |
| return true; |
| |
| auto *SrcPtrSCEV = dyn_cast<SCEVUnknown>(SE.getPointerBase(SrcAccFunc)); |
| assert(SrcPtrSCEV); |
| SrcAccFunc = SE.getMinusSCEV(SrcAccFunc, SrcPtrSCEV); |
| addArrayAccess(Stmt, Inst, MemoryAccess::READ, SrcPtrSCEV->getValue(), |
| IntegerType::getInt8Ty(SrcPtrVal->getContext()), |
| LengthIsAffine, {SrcAccFunc, LengthVal}, {nullptr}, |
| Inst.getValueOperand()); |
| |
| return true; |
| } |
| |
| bool ScopBuilder::buildAccessCallInst(MemAccInst Inst, ScopStmt *Stmt) { |
| auto *CI = dyn_cast_or_null<CallInst>(Inst); |
| |
| if (CI == nullptr) |
| return false; |
| |
| if (CI->doesNotAccessMemory() || isIgnoredIntrinsic(CI) || isDebugCall(CI)) |
| return true; |
| |
| bool ReadOnly = false; |
| auto *AF = SE.getConstant(IntegerType::getInt64Ty(CI->getContext()), 0); |
| auto *CalledFunction = CI->getCalledFunction(); |
| switch (AA.getModRefBehavior(CalledFunction)) { |
| case FMRB_UnknownModRefBehavior: |
| llvm_unreachable("Unknown mod ref behaviour cannot be represented."); |
| case FMRB_DoesNotAccessMemory: |
| return true; |
| case FMRB_DoesNotReadMemory: |
| case FMRB_OnlyAccessesInaccessibleMem: |
| case FMRB_OnlyAccessesInaccessibleOrArgMem: |
| return false; |
| case FMRB_OnlyReadsMemory: |
| GlobalReads.emplace_back(Stmt, CI); |
| return true; |
| case FMRB_OnlyReadsArgumentPointees: |
| ReadOnly = true; |
| // Fall through |
| case FMRB_OnlyAccessesArgumentPointees: { |
| auto AccType = ReadOnly ? MemoryAccess::READ : MemoryAccess::MAY_WRITE; |
| Loop *L = LI.getLoopFor(Inst->getParent()); |
| for (const auto &Arg : CI->arg_operands()) { |
| if (!Arg->getType()->isPointerTy()) |
| continue; |
| |
| auto *ArgSCEV = SE.getSCEVAtScope(Arg, L); |
| if (ArgSCEV->isZero()) |
| continue; |
| |
| auto *ArgBasePtr = cast<SCEVUnknown>(SE.getPointerBase(ArgSCEV)); |
| addArrayAccess(Stmt, Inst, AccType, ArgBasePtr->getValue(), |
| ArgBasePtr->getType(), false, {AF}, {nullptr}, CI); |
| } |
| return true; |
| } |
| } |
| |
| return true; |
| } |
| |
| void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, ScopStmt *Stmt) { |
| Value *Address = Inst.getPointerOperand(); |
| Value *Val = Inst.getValueOperand(); |
| Type *ElementType = Val->getType(); |
| enum MemoryAccess::AccessType AccType = |
| isa<LoadInst>(Inst) ? MemoryAccess::READ : MemoryAccess::MUST_WRITE; |
| |
| const SCEV *AccessFunction = |
| SE.getSCEVAtScope(Address, LI.getLoopFor(Inst->getParent())); |
| const SCEVUnknown *BasePointer = |
| dyn_cast<SCEVUnknown>(SE.getPointerBase(AccessFunction)); |
| |
| assert(BasePointer && "Could not find base pointer"); |
| AccessFunction = SE.getMinusSCEV(AccessFunction, BasePointer); |
| |
| // Check if the access depends on a loop contained in a non-affine subregion. |
| bool isVariantInNonAffineLoop = false; |
| SetVector<const Loop *> Loops; |
| findLoops(AccessFunction, Loops); |
| for (const Loop *L : Loops) |
| if (Stmt->contains(L)) { |
| isVariantInNonAffineLoop = true; |
| break; |
| } |
| |
| InvariantLoadsSetTy AccessILS; |
| |
| Loop *SurroundingLoop = Stmt->getSurroundingLoop(); |
| bool IsAffine = !isVariantInNonAffineLoop && |
| isAffineExpr(&scop->getRegion(), SurroundingLoop, |
| AccessFunction, SE, &AccessILS); |
| |
| const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads(); |
| for (LoadInst *LInst : AccessILS) |
| if (!ScopRIL.count(LInst)) |
| IsAffine = false; |
| |
| if (!IsAffine && AccType == MemoryAccess::MUST_WRITE) |
| AccType = MemoryAccess::MAY_WRITE; |
| |
| addArrayAccess(Stmt, Inst, AccType, BasePointer->getValue(), ElementType, |
| IsAffine, {AccessFunction}, {nullptr}, Val); |
| } |
| |
| void ScopBuilder::buildMemoryAccess(MemAccInst Inst, ScopStmt *Stmt) { |
| if (buildAccessMemIntrinsic(Inst, Stmt)) |
| return; |
| |
| if (buildAccessCallInst(Inst, Stmt)) |
| return; |
| |
| if (buildAccessMultiDimFixed(Inst, Stmt)) |
| return; |
| |
| if (buildAccessMultiDimParam(Inst, Stmt)) |
| return; |
| |
| buildAccessSingleDim(Inst, Stmt); |
| } |
| |
| void ScopBuilder::buildAccessFunctions() { |
| for (auto &Stmt : *scop) { |
| if (Stmt.isBlockStmt()) { |
| buildAccessFunctions(&Stmt, *Stmt.getBasicBlock()); |
| continue; |
| } |
| |
| Region *R = Stmt.getRegion(); |
| for (BasicBlock *BB : R->blocks()) |
| buildAccessFunctions(&Stmt, *BB, R); |
| } |
| |
| // Build write accesses for values that are used after the SCoP. |
| // The instructions defining them might be synthesizable and therefore not |
| // contained in any statement, hence we iterate over the original instructions |
| // to identify all escaping values. |
| for (BasicBlock *BB : scop->getRegion().blocks()) { |
| for (Instruction &Inst : *BB) |
| buildEscapingDependences(&Inst); |
| } |
| } |
| |
| bool ScopBuilder::shouldModelInst(Instruction *Inst, Loop *L) { |
| return !isa<TerminatorInst>(Inst) && !isIgnoredIntrinsic(Inst) && |
| !canSynthesize(Inst, *scop, &SE, L); |
| } |
| |
| /// Generate a name for a statement. |
| /// |
| /// @param BB The basic block the statement will represent. |
| /// @param BBIdx The index of the @p BB relative to other BBs/regions. |
| /// @param Count The index of the created statement in @p BB. |
| /// @param IsMain Whether this is the main of all statement for @p BB. If true, |
| /// no suffix will be added. |
| /// @param IsLast Uses a special indicator for the last statement of a BB. |
| static std::string makeStmtName(BasicBlock *BB, long BBIdx, int Count, |
| bool IsMain, bool IsLast = false) { |
| std::string Suffix; |
| if (!IsMain) { |
| if (UseInstructionNames) |
| Suffix = '_'; |
| if (IsLast) |
| Suffix += "last"; |
| else if (Count < 26) |
| Suffix += 'a' + Count; |
| else |
| Suffix += std::to_string(Count); |
| } |
| return getIslCompatibleName("Stmt", BB, BBIdx, Suffix, UseInstructionNames); |
| } |
| |
| /// Generate a name for a statement that represents a non-affine subregion. |
| /// |
| /// @param R The region the statement will represent. |
| /// @param RIdx The index of the @p R relative to other BBs/regions. |
| static std::string makeStmtName(Region *R, long RIdx) { |
| return getIslCompatibleName("Stmt", R->getNameStr(), RIdx, "", |
| UseInstructionNames); |
| } |
| |
| void ScopBuilder::buildSequentialBlockStmts(BasicBlock *BB, bool SplitOnStore) { |
| Loop *SurroundingLoop = LI.getLoopFor(BB); |
| |
| int Count = 0; |
| long BBIdx = scop->getNextStmtIdx(); |
| std::vector<Instruction *> Instructions; |
| for (Instruction &Inst : *BB) { |
| if (shouldModelInst(&Inst, SurroundingLoop)) |
| Instructions.push_back(&Inst); |
| if (Inst.getMetadata("polly_split_after") || |
| (SplitOnStore && isa<StoreInst>(Inst))) { |
| std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0); |
| scop->addScopStmt(BB, Name, SurroundingLoop, Instructions); |
| Count++; |
| Instructions.clear(); |
| } |
| } |
| |
| std::string Name = makeStmtName(BB, BBIdx, Count, Count == 0); |
| scop->addScopStmt(BB, Name, SurroundingLoop, Instructions); |
| } |
| |
| /// Is @p Inst an ordered instruction? |
| /// |
| /// An unordered instruction is an instruction, such that a sequence of |
| /// unordered instructions can be permuted without changing semantics. Any |
| /// instruction for which this is not always the case is ordered. |
| static bool isOrderedInstruction(Instruction *Inst) { |
| return Inst->mayHaveSideEffects() || Inst->mayReadOrWriteMemory(); |
| } |
| |
| /// Join instructions to the same statement if one uses the scalar result of the |
| /// other. |
| static void joinOperandTree(EquivalenceClasses<Instruction *> &UnionFind, |
| ArrayRef<Instruction *> ModeledInsts) { |
| for (Instruction *Inst : ModeledInsts) { |
| if (isa<PHINode>(Inst)) |
| continue; |
| |
| for (Use &Op : Inst->operands()) { |
| Instruction *OpInst = dyn_cast<Instruction>(Op.get()); |
| if (!OpInst) |
| continue; |
| |
| // Check if OpInst is in the BB and is a modeled instruction. |
| auto OpVal = UnionFind.findValue(OpInst); |
| if (OpVal == UnionFind.end()) |
| continue; |
| |
| UnionFind.unionSets(Inst, OpInst); |
| } |
| } |
| } |
| |
| /// Ensure that the order of ordered instructions does not change. |
| /// |
| /// If we encounter an ordered instruction enclosed in instructions belonging to |
| /// a different statement (which might as well contain ordered instructions, but |
| /// this is not tested here), join them. |
| static void |
| joinOrderedInstructions(EquivalenceClasses<Instruction *> &UnionFind, |
| ArrayRef<Instruction *> ModeledInsts) { |
| SetVector<Instruction *> SeenLeaders; |
| for (Instruction *Inst : ModeledInsts) { |
| if (!isOrderedInstruction(Inst)) |
| continue; |
| |
| Instruction *Leader = UnionFind.getLeaderValue(Inst); |
| bool Inserted = SeenLeaders.insert(Leader); |
| if (Inserted) |
| continue; |
| |
| // Merge statements to close holes. Say, we have already seen statements A |
| // and B, in this order. Then we see an instruction of A again and we would |
| // see the pattern "A B A". This function joins all statements until the |
| // only seen occurrence of A. |
| for (Instruction *Prev : reverse(SeenLeaders)) { |
| // Items added to 'SeenLeaders' are leaders, but may have lost their |
| // leadership status when merged into another statement. |
| Instruction *PrevLeader = UnionFind.getLeaderValue(SeenLeaders.back()); |
| if (PrevLeader == Leader) |
| break; |
| UnionFind.unionSets(Prev, Leader); |
| } |
| } |
| } |
| |
| /// If the BasicBlock has an edge from itself, ensure that the PHI WRITEs for |
| /// the incoming values from this block are executed after the PHI READ. |
| /// |
| /// Otherwise it could overwrite the incoming value from before the BB with the |
| /// value for the next execution. This can happen if the PHI WRITE is added to |
| /// the statement with the instruction that defines the incoming value (instead |
| /// of the last statement of the same BB). To ensure that the PHI READ and WRITE |
| /// are in order, we put both into the statement. PHI WRITEs are always executed |
| /// after PHI READs when they are in the same statement. |
| /// |
| /// TODO: This is an overpessimization. We only have to ensure that the PHI |
| /// WRITE is not put into a statement containing the PHI itself. That could also |
| /// be done by |
| /// - having all (strongly connected) PHIs in a single statement, |
| /// - unite only the PHIs in the operand tree of the PHI WRITE (because it only |
| /// has a chance of being lifted before a PHI by being in a statement with a |
| /// PHI that comes before in the basic block), or |
| /// - when uniting statements, ensure that no (relevant) PHIs are overtaken. |
| static void joinOrderedPHIs(EquivalenceClasses<Instruction *> &UnionFind, |
| ArrayRef<Instruction *> ModeledInsts) { |
| for (Instruction *Inst : ModeledInsts) { |
| PHINode *PHI = dyn_cast<PHINode>(Inst); |
| if (!PHI) |
| continue; |
| |
| int Idx = PHI->getBasicBlockIndex(PHI->getParent()); |
| if (Idx < 0) |
| continue; |
| |
| Instruction *IncomingVal = |
| dyn_cast<Instruction>(PHI->getIncomingValue(Idx)); |
| if (!IncomingVal) |
| continue; |
| |
| UnionFind.unionSets(PHI, IncomingVal); |
| } |
| } |
| |
| void ScopBuilder::buildEqivClassBlockStmts(BasicBlock *BB) { |
| Loop *L = LI.getLoopFor(BB); |
| |
| // Extracting out modeled instructions saves us from checking |
| // shouldModelInst() repeatedly. |
| SmallVector<Instruction *, 32> ModeledInsts; |
| EquivalenceClasses<Instruction *> UnionFind; |
| Instruction *MainInst = nullptr; |
| for (Instruction &Inst : *BB) { |
| if (!shouldModelInst(&Inst, L)) |
| continue; |
| ModeledInsts.push_back(&Inst); |
| UnionFind.insert(&Inst); |
| |
| // When a BB is split into multiple statements, the main statement is the |
| // one containing the 'main' instruction. We select the first instruction |
| // that is unlikely to be removed (because it has side-effects) as the main |
| // one. It is used to ensure that at least one statement from the bb has the |
| // same name as with -polly-stmt-granularity=bb. |
| if (!MainInst && (isa<StoreInst>(Inst) || |
| (isa<CallInst>(Inst) && !isa<IntrinsicInst>(Inst)))) |
| MainInst = &Inst; |
| } |
| |
| joinOperandTree(UnionFind, ModeledInsts); |
| joinOrderedInstructions(UnionFind, ModeledInsts); |
| joinOrderedPHIs(UnionFind, ModeledInsts); |
| |
| // The list of instructions for statement (statement represented by the leader |
| // instruction). The order of statements instructions is reversed such that |
| // the epilogue is first. This makes it easier to ensure that the epilogue is |
| // the last statement. |
| MapVector<Instruction *, std::vector<Instruction *>> LeaderToInstList; |
| |
| // Collect the instructions of all leaders. UnionFind's member iterator |
| // unfortunately are not in any specific order. |
| for (Instruction &Inst : reverse(*BB)) { |
| auto LeaderIt = UnionFind.findLeader(&Inst); |
| if (LeaderIt == UnionFind.member_end()) |
| continue; |
| |
| std::vector<Instruction *> &InstList = LeaderToInstList[*LeaderIt]; |
| InstList.push_back(&Inst); |
| } |
| |
| // Finally build the statements. |
| int Count = 0; |
| long BBIdx = scop->getNextStmtIdx(); |
| bool MainFound = false; |
| for (auto &Instructions : reverse(LeaderToInstList)) { |
| std::vector<Instruction *> &InstList = Instructions.second; |
| |
| // If there is no main instruction, make the first statement the main. |
| bool IsMain; |
| if (MainInst) |
| IsMain = std::find(InstList.begin(), InstList.end(), MainInst) != |
| InstList.end(); |
| else |
| IsMain = (Count == 0); |
| if (IsMain) |
| MainFound = true; |
| |
| std::reverse(InstList.begin(), InstList.end()); |
| std::string Name = makeStmtName(BB, BBIdx, Count, IsMain); |
| scop->addScopStmt(BB, Name, L, std::move(InstList)); |
| Count += 1; |
| } |
| |
| // Unconditionally add an epilogue (last statement). It contains no |
| // instructions, but holds the PHI write accesses for successor basic blocks, |
| // if the incoming value is not defined in another statement if the same BB. |
| // The epilogue will be removed if no PHIWrite is added to it. |
| std::string EpilogueName = makeStmtName(BB, BBIdx, Count, !MainFound, true); |
| scop->addScopStmt(BB, EpilogueName, L, {}); |
| } |
| |
| void ScopBuilder::buildStmts(Region &SR) { |
| if (scop->isNonAffineSubRegion(&SR)) { |
| std::vector<Instruction *> Instructions; |
| Loop *SurroundingLoop = |
| getFirstNonBoxedLoopFor(SR.getEntry(), LI, scop->getBoxedLoops()); |
| for (Instruction &Inst : *SR.getEntry()) |
| if (shouldModelInst(&Inst, SurroundingLoop)) |
| Instructions.push_back(&Inst); |
| long RIdx = scop->getNextStmtIdx(); |
| std::string Name = makeStmtName(&SR, RIdx); |
| scop->addScopStmt(&SR, Name, SurroundingLoop, Instructions); |
| return; |
| } |
| |
| for (auto I = SR.element_begin(), E = SR.element_end(); I != E; ++I) |
| if (I->isSubRegion()) |
| buildStmts(*I->getNodeAs<Region>()); |
| else { |
| BasicBlock *BB = I->getNodeAs<BasicBlock>(); |
| switch (StmtGranularity) { |
| case GranularityChoice::BasicBlocks: |
| buildSequentialBlockStmts(BB); |
| break; |
| case GranularityChoice::ScalarIndependence: |
| buildEqivClassBlockStmts(BB); |
| break; |
| case GranularityChoice::Stores: |
| buildSequentialBlockStmts(BB, true); |
| break; |
| } |
| } |
| } |
| |
| void ScopBuilder::buildAccessFunctions(ScopStmt *Stmt, BasicBlock &BB, |
| Region *NonAffineSubRegion) { |
| assert( |
| Stmt && |
| "The exit BB is the only one that cannot be represented by a statement"); |
| assert(Stmt->represents(&BB)); |
| |
| // We do not build access functions for error blocks, as they may contain |
| // instructions we can not model. |
| if (isErrorBlock(BB, scop->getRegion(), LI, DT)) |
| return; |
| |
| auto BuildAccessesForInst = [this, Stmt, |
| NonAffineSubRegion](Instruction *Inst) { |
| PHINode *PHI = dyn_cast<PHINode>(Inst); |
| if (PHI) |
| buildPHIAccesses(Stmt, PHI, NonAffineSubRegion, false); |
| |
| if (auto MemInst = MemAccInst::dyn_cast(*Inst)) { |
| assert(Stmt && "Cannot build access function in non-existing statement"); |
| buildMemoryAccess(MemInst, Stmt); |
| } |
| |
| // PHI nodes have already been modeled above and TerminatorInsts that are |
| // not part of a non-affine subregion are fully modeled and regenerated |
| // from the polyhedral domains. Hence, they do not need to be modeled as |
| // explicit data dependences. |
| if (!PHI) |
| buildScalarDependences(Stmt, Inst); |
| }; |
| |
| const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads(); |
| bool IsEntryBlock = (Stmt->getEntryBlock() == &BB); |
| if (IsEntryBlock) { |
| for (Instruction *Inst : Stmt->getInstructions()) |
| BuildAccessesForInst(Inst); |
| if (Stmt->isRegionStmt()) |
| BuildAccessesForInst(BB.getTerminator()); |
| } else { |
| for (Instruction &Inst : BB) { |
| if (isIgnoredIntrinsic(&Inst)) |
| continue; |
| |
| // Invariant loads already have been processed. |
| if (isa<LoadInst>(Inst) && RIL.count(cast<LoadInst>(&Inst))) |
| continue; |
| |
| BuildAccessesForInst(&Inst); |
| } |
| } |
| } |
| |
| MemoryAccess *ScopBuilder::addMemoryAccess( |
| ScopStmt *Stmt, Instruction *Inst, MemoryAccess::AccessType AccType, |
| Value *BaseAddress, Type *ElementType, bool Affine, Value *AccessValue, |
| ArrayRef<const SCEV *> Subscripts, ArrayRef<const SCEV *> Sizes, |
| MemoryKind Kind) { |
| bool isKnownMustAccess = false; |
| |
| // Accesses in single-basic block statements are always executed. |
| if (Stmt->isBlockStmt()) |
| isKnownMustAccess = true; |
| |
| if (Stmt->isRegionStmt()) { |
| // Accesses that dominate the exit block of a non-affine region are always |
| // executed. In non-affine regions there may exist MemoryKind::Values that |
| // do not dominate the exit. MemoryKind::Values will always dominate the |
| // exit and MemoryKind::PHIs only if there is at most one PHI_WRITE in the |
| // non-affine region. |
| if (Inst && DT.dominates(Inst->getParent(), Stmt->getRegion()->getExit())) |
| isKnownMustAccess = true; |
| } |
| |
| // Non-affine PHI writes do not "happen" at a particular instruction, but |
| // after exiting the statement. Therefore they are guaranteed to execute and |
| // overwrite the old value. |
| if (Kind == MemoryKind::PHI || Kind == MemoryKind::ExitPHI) |
| isKnownMustAccess = true; |
| |
| if (!isKnownMustAccess && AccType == MemoryAccess::MUST_WRITE) |
| AccType = MemoryAccess::MAY_WRITE; |
| |
| auto *Access = new MemoryAccess(Stmt, Inst, AccType, BaseAddress, ElementType, |
| Affine, Subscripts, Sizes, AccessValue, Kind); |
| |
| scop->addAccessFunction(Access); |
| Stmt->addAccess(Access); |
| return Access; |
| } |
| |
| void ScopBuilder::addArrayAccess(ScopStmt *Stmt, MemAccInst MemAccInst, |
| MemoryAccess::AccessType AccType, |
| Value *BaseAddress, Type *ElementType, |
| bool IsAffine, |
| ArrayRef<const SCEV *> Subscripts, |
| ArrayRef<const SCEV *> Sizes, |
| Value *AccessValue) { |
| ArrayBasePointers.insert(BaseAddress); |
| auto *MemAccess = addMemoryAccess(Stmt, MemAccInst, AccType, BaseAddress, |
| ElementType, IsAffine, AccessValue, |
| Subscripts, Sizes, MemoryKind::Array); |
| |
| if (!DetectFortranArrays) |
| return; |
| |
| if (Value *FAD = findFADAllocationInvisible(MemAccInst)) |
| MemAccess->setFortranArrayDescriptor(FAD); |
| else if (Value *FAD = findFADAllocationVisible(MemAccInst)) |
| MemAccess->setFortranArrayDescriptor(FAD); |
| } |
| |
| void ScopBuilder::ensureValueWrite(Instruction *Inst) { |
| // Find the statement that defines the value of Inst. That statement has to |
| // write the value to make it available to those statements that read it. |
| ScopStmt *Stmt = scop->getStmtFor(Inst); |
| |
| // It is possible that the value is synthesizable within a loop (such that it |
| // is not part of any statement), but not after the loop (where you need the |
| // number of loop round-trips to synthesize it). In LCSSA-form a PHI node will |
| // avoid this. In case the IR has no such PHI, use the last statement (where |
| // the value is synthesizable) to write the value. |
| if (!Stmt) |
| Stmt = scop->getLastStmtFor(Inst->getParent()); |
| |
| // Inst not defined within this SCoP. |
| if (!Stmt) |
| return; |
| |
| // Do not process further if the instruction is already written. |
| if (Stmt->lookupValueWriteOf(Inst)) |
| return; |
| |
| addMemoryAccess(Stmt, Inst, MemoryAccess::MUST_WRITE, Inst, Inst->getType(), |
| true, Inst, ArrayRef<const SCEV *>(), |
| ArrayRef<const SCEV *>(), MemoryKind::Value); |
| } |
| |
| void ScopBuilder::ensureValueRead(Value *V, ScopStmt *UserStmt) { |
| // TODO: Make ScopStmt::ensureValueRead(Value*) offer the same functionality |
| // to be able to replace this one. Currently, there is a split responsibility. |
| // In a first step, the MemoryAccess is created, but without the |
| // AccessRelation. In the second step by ScopStmt::buildAccessRelations(), the |
| // AccessRelation is created. At least for scalar accesses, there is no new |
| // information available at ScopStmt::buildAccessRelations(), so we could |
| // create the AccessRelation right away. This is what |
| // ScopStmt::ensureValueRead(Value*) does. |
| |
| auto *Scope = UserStmt->getSurroundingLoop(); |
| auto VUse = VirtualUse::create(scop.get(), UserStmt, Scope, V, false); |
| switch (VUse.getKind()) { |
| case VirtualUse::Constant: |
| case VirtualUse::Block: |
| case VirtualUse::Synthesizable: |
| case VirtualUse::Hoisted: |
| case VirtualUse::Intra: |
| // Uses of these kinds do not need a MemoryAccess. |
| break; |
| |
| case VirtualUse::ReadOnly: |
| // Add MemoryAccess for invariant values only if requested. |
| if (!ModelReadOnlyScalars) |
| break; |
| |
| LLVM_FALLTHROUGH; |
| case VirtualUse::Inter: |
| |
| // Do not create another MemoryAccess for reloading the value if one already |
| // exists. |
| if (UserStmt->lookupValueReadOf(V)) |
| break; |
| |
| addMemoryAccess(UserStmt, nullptr, MemoryAccess::READ, V, V->getType(), |
| true, V, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(), |
| MemoryKind::Value); |
| |
| // Inter-statement uses need to write the value in their defining statement. |
| if (VUse.isInter()) |
| ensureValueWrite(cast<Instruction>(V)); |
| break; |
| } |
| } |
| |
| void ScopBuilder::ensurePHIWrite(PHINode *PHI, ScopStmt *IncomingStmt, |
| BasicBlock *IncomingBlock, |
| Value *IncomingValue, bool IsExitBlock) { |
| // As the incoming block might turn out to be an error statement ensure we |
| // will create an exit PHI SAI object. It is needed during code generation |
| // and would be created later anyway. |
| if (IsExitBlock) |
| scop->getOrCreateScopArrayInfo(PHI, PHI->getType(), {}, |
| MemoryKind::ExitPHI); |
| |
| // This is possible if PHI is in the SCoP's entry block. The incoming blocks |
| // from outside the SCoP's region have no statement representation. |
| if (!IncomingStmt) |
| return; |
| |
| // Take care for the incoming value being available in the incoming block. |
| // This must be done before the check for multiple PHI writes because multiple |
| // exiting edges from subregion each can be the effective written value of the |
| // subregion. As such, all of them must be made available in the subregion |
| // statement. |
| ensureValueRead(IncomingValue, IncomingStmt); |
| |
| // Do not add more than one MemoryAccess per PHINode and ScopStmt. |
| if (MemoryAccess *Acc = IncomingStmt->lookupPHIWriteOf(PHI)) { |
| assert(Acc->getAccessInstruction() == PHI); |
| Acc->addIncoming(IncomingBlock, IncomingValue); |
| return; |
| } |
| |
| MemoryAccess *Acc = addMemoryAccess( |
| IncomingStmt, PHI, MemoryAccess::MUST_WRITE, PHI, PHI->getType(), true, |
| PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(), |
| IsExitBlock ? MemoryKind::ExitPHI : MemoryKind::PHI); |
| assert(Acc); |
| Acc->addIncoming(IncomingBlock, IncomingValue); |
| } |
| |
| void ScopBuilder::addPHIReadAccess(ScopStmt *PHIStmt, PHINode *PHI) { |
| addMemoryAccess(PHIStmt, PHI, MemoryAccess::READ, PHI, PHI->getType(), true, |
| PHI, ArrayRef<const SCEV *>(), ArrayRef<const SCEV *>(), |
| MemoryKind::PHI); |
| } |
| |
| void ScopBuilder::buildDomain(ScopStmt &Stmt) { |
| isl::id Id = isl::id::alloc(scop->getIslCtx(), Stmt.getBaseName(), &Stmt); |
| |
| Stmt.Domain = scop->getDomainConditions(&Stmt); |
| Stmt.Domain = Stmt.Domain.set_tuple_id(Id); |
| } |
| |
| void ScopBuilder::collectSurroundingLoops(ScopStmt &Stmt) { |
| isl::set Domain = Stmt.getDomain(); |
| BasicBlock *BB = Stmt.getEntryBlock(); |
| |
| Loop *L = LI.getLoopFor(BB); |
| |
| while (L && Stmt.isRegionStmt() && Stmt.getRegion()->contains(L)) |
| L = L->getParentLoop(); |
| |
| SmallVector<llvm::Loop *, 8> Loops; |
| |
| while (L && Stmt.getParent()->getRegion().contains(L)) { |
| Loops.push_back(L); |
| L = L->getParentLoop(); |
| } |
| |
| Stmt.NestLoops.insert(Stmt.NestLoops.begin(), Loops.rbegin(), Loops.rend()); |
| } |
| |
| /// Return the reduction type for a given binary operator. |
| static MemoryAccess::ReductionType getReductionType(const BinaryOperator *BinOp, |
| const Instruction *Load) { |
| if (!BinOp) |
| return MemoryAccess::RT_NONE; |
| switch (BinOp->getOpcode()) { |
| case Instruction::FAdd: |
| if (!BinOp->isFast()) |
| return MemoryAccess::RT_NONE; |
| // Fall through |
| case Instruction::Add: |
| return MemoryAccess::RT_ADD; |
| case Instruction::Or: |
| return MemoryAccess::RT_BOR; |
| case Instruction::Xor: |
| return MemoryAccess::RT_BXOR; |
| case Instruction::And: |
| return MemoryAccess::RT_BAND; |
| case Instruction::FMul: |
| if (!BinOp->isFast()) |
| return MemoryAccess::RT_NONE; |
| // Fall through |
| case Instruction::Mul: |
| if (DisableMultiplicativeReductions) |
| return MemoryAccess::RT_NONE; |
| return MemoryAccess::RT_MUL; |
| default: |
| return MemoryAccess::RT_NONE; |
| } |
| } |
| |
| void ScopBuilder::checkForReductions(ScopStmt &Stmt) { |
| SmallVector<MemoryAccess *, 2> Loads; |
| SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates; |
| |
| // First collect candidate load-store reduction chains by iterating over all |
| // stores and collecting possible reduction loads. |
| for (MemoryAccess *StoreMA : Stmt) { |
| if (StoreMA->isRead()) |
| continue; |
| |
| Loads.clear(); |
| collectCandidateReductionLoads(StoreMA, Loads); |
| for (MemoryAccess *LoadMA : Loads) |
| Candidates.push_back(std::make_pair(LoadMA, StoreMA)); |
| } |
| |
| // Then check each possible candidate pair. |
| for (const auto &CandidatePair : Candidates) { |
| bool Valid = true; |
| isl::map LoadAccs = CandidatePair.first->getAccessRelation(); |
| isl::map StoreAccs = CandidatePair.second->getAccessRelation(); |
| |
| // Skip those with obviously unequal base addresses. |
| if (!LoadAccs.has_equal_space(StoreAccs)) { |
| continue; |
| } |
| |
| // And check if the remaining for overlap with other memory accesses. |
| isl::map AllAccsRel = LoadAccs.unite(StoreAccs); |
| AllAccsRel = AllAccsRel.intersect_domain(Stmt.getDomain()); |
| isl::set AllAccs = AllAccsRel.range(); |
| |
| for (MemoryAccess *MA : Stmt) { |
| if (MA == CandidatePair.first || MA == CandidatePair.second) |
| continue; |
| |
| isl::map AccRel = |
| MA->getAccessRelation().intersect_domain(Stmt.getDomain()); |
| isl::set Accs = AccRel.range(); |
| |
| if (AllAccs.has_equal_space(Accs)) { |
| isl::set OverlapAccs = Accs.intersect(AllAccs); |
| Valid = Valid && OverlapAccs.is_empty(); |
| } |
| } |
| |
| if (!Valid) |
| continue; |
| |
| const LoadInst *Load = |
| dyn_cast<const LoadInst>(CandidatePair.first->getAccessInstruction()); |
| MemoryAccess::ReductionType RT = |
| getReductionType(dyn_cast<BinaryOperator>(Load->user_back()), Load); |
| |
| // If no overlapping access was found we mark the load and store as |
| // reduction like. |
| CandidatePair.first->markAsReductionLike(RT); |
| CandidatePair.second->markAsReductionLike(RT); |
| } |
| } |
| |
| void ScopBuilder::collectCandidateReductionLoads( |
| MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) { |
| ScopStmt *Stmt = StoreMA->getStatement(); |
| |
| auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction()); |
| if (!Store) |
| return; |
| |
| // Skip if there is not one binary operator between the load and the store |
| auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand()); |
| if (!BinOp) |
| return; |
| |
| // Skip if the binary operators has multiple uses |
| if (BinOp->getNumUses() != 1) |
| return; |
| |
| // Skip if the opcode of the binary operator is not commutative/associative |
| if (!BinOp->isCommutative() || !BinOp->isAssociative()) |
| return; |
| |
| // Skip if the binary operator is outside the current SCoP |
| if (BinOp->getParent() != Store->getParent()) |
| return; |
| |
| // Skip if it is a multiplicative reduction and we disabled them |
| if (DisableMultiplicativeReductions && |
| (BinOp->getOpcode() == Instruction::Mul || |
| BinOp->getOpcode() == Instruction::FMul)) |
| return; |
| |
| // Check the binary operator operands for a candidate load |
| auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0)); |
| auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1)); |
| if (!PossibleLoad0 && !PossibleLoad1) |
| return; |
| |
| // A load is only a candidate if it cannot escape (thus has only this use) |
| if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1) |
| if (PossibleLoad0->getParent() == Store->getParent()) |
| Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad0)); |
| if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1) |
| if (PossibleLoad1->getParent() == Store->getParent()) |
| Loads.push_back(&Stmt->getArrayAccessFor(PossibleLoad1)); |
| } |
| |
| void ScopBuilder::buildAccessRelations(ScopStmt &Stmt) { |
| for (MemoryAccess *Access : Stmt.MemAccs) { |
| Type *ElementType = Access->getElementType(); |
| |
| MemoryKind Ty; |
| if (Access->isPHIKind()) |
| Ty = MemoryKind::PHI; |
| else if (Access->isExitPHIKind()) |
| Ty = MemoryKind::ExitPHI; |
| else if (Access->isValueKind()) |
| Ty = MemoryKind::Value; |
| else |
| Ty = MemoryKind::Array; |
| |
| auto *SAI = scop->getOrCreateScopArrayInfo(Access->getOriginalBaseAddr(), |
| ElementType, Access->Sizes, Ty); |
| Access->buildAccessRelation(SAI); |
| scop->addAccessData(Access); |
| } |
| } |
| |
| #ifndef NDEBUG |
| static void verifyUse(Scop *S, Use &Op, LoopInfo &LI) { |
| auto PhysUse = VirtualUse::create(S, Op, &LI, false); |
| auto VirtUse = VirtualUse::create(S, Op, &LI, true); |
| assert(PhysUse.getKind() == VirtUse.getKind()); |
| } |
| |
| /// Check the consistency of every statement's MemoryAccesses. |
| /// |
| /// The check is carried out by expecting the "physical" kind of use (derived |
| /// from the BasicBlocks instructions resides in) to be same as the "virtual" |
| /// kind of use (derived from a statement's MemoryAccess). |
| /// |
| /// The "physical" uses are taken by ensureValueRead to determine whether to |
| /// create MemoryAccesses. When done, the kind of scalar access should be the |
| /// same no matter which way it was derived. |
| /// |
| /// The MemoryAccesses might be changed by later SCoP-modifying passes and hence |
| /// can intentionally influence on the kind of uses (not corresponding to the |
| /// "physical" anymore, hence called "virtual"). The CodeGenerator therefore has |
| /// to pick up the virtual uses. But here in the code generator, this has not |
| /// happened yet, such that virtual and physical uses are equivalent. |
| static void verifyUses(Scop *S, LoopInfo &LI, DominatorTree &DT) { |
| for (auto *BB : S->getRegion().blocks()) { |
| for (auto &Inst : *BB) { |
| auto *Stmt = S->getStmtFor(&Inst); |
| if (!Stmt) |
| continue; |
| |
| if (isIgnoredIntrinsic(&Inst)) |
| continue; |
| |
| // Branch conditions are encoded in the statement domains. |
| if (isa<TerminatorInst>(&Inst) && Stmt->isBlockStmt()) |
| continue; |
| |
| // Verify all uses. |
| for (auto &Op : Inst.operands()) |
| verifyUse(S, Op, LI); |
| |
| // Stores do not produce values used by other statements. |
| if (isa<StoreInst>(Inst)) |
| continue; |
| |
| // For every value defined in the block, also check that a use of that |
| // value in the same statement would not be an inter-statement use. It can |
| // still be synthesizable or load-hoisted, but these kind of instructions |
| // are not directly copied in code-generation. |
| auto VirtDef = |
| VirtualUse::create(S, Stmt, Stmt->getSurroundingLoop(), &Inst, true); |
| assert(VirtDef.getKind() == VirtualUse::Synthesizable || |
| VirtDef.getKind() == VirtualUse::Intra || |
| VirtDef.getKind() == VirtualUse::Hoisted); |
| } |
| } |
| |
| if (S->hasSingleExitEdge()) |
| return; |
| |
| // PHINodes in the SCoP region's exit block are also uses to be checked. |
| if (!S->getRegion().isTopLevelRegion()) { |
| for (auto &Inst : *S->getRegion().getExit()) { |
| if (!isa<PHINode>(Inst)) |
| break; |
| |
| for (auto &Op : Inst.operands()) |
| verifyUse(S, Op, LI); |
| } |
| } |
| } |
| #endif |
| |
| /// Return the block that is the representing block for @p RN. |
| static inline BasicBlock *getRegionNodeBasicBlock(RegionNode *RN) { |
| return RN->isSubRegion() ? RN->getNodeAs<Region>()->getEntry() |
| : RN->getNodeAs<BasicBlock>(); |
| } |
| |
| void ScopBuilder::buildScop(Region &R, AssumptionCache &AC, |
| OptimizationRemarkEmitter &ORE) { |
| scop.reset(new Scop(R, SE, LI, DT, *SD.getDetectionContext(&R), ORE)); |
| |
| buildStmts(R); |
| |
| // Create all invariant load instructions first. These are categorized as |
| // 'synthesizable', therefore are not part of any ScopStmt but need to be |
| // created somewhere. |
| const InvariantLoadsSetTy &RIL = scop->getRequiredInvariantLoads(); |
| for (BasicBlock *BB : scop->getRegion().blocks()) { |
| if (isErrorBlock(*BB, scop->getRegion(), LI, DT)) |
| continue; |
| |
| for (Instruction &Inst : *BB) { |
| LoadInst *Load = dyn_cast<LoadInst>(&Inst); |
| if (!Load) |
| continue; |
| |
| if (!RIL.count(Load)) |
| continue; |
| |
| // Invariant loads require a MemoryAccess to be created in some statement. |
| // It is not important to which statement the MemoryAccess is added |
| // because it will later be removed from the ScopStmt again. We chose the |
| // first statement of the basic block the LoadInst is in. |
| ArrayRef<ScopStmt *> List = scop->getStmtListFor(BB); |
| assert(!List.empty()); |
| ScopStmt *RILStmt = List.front(); |
| buildMemoryAccess(Load, RILStmt); |
| } |
| } |
| buildAccessFunctions(); |
| |
| // In case the region does not have an exiting block we will later (during |
| // code generation) split the exit block. This will move potential PHI nodes |
| // from the current exit block into the new region exiting block. Hence, PHI |
| // nodes that are at this point not part of the region will be. |
| // To handle these PHI nodes later we will now model their operands as scalar |
| // accesses. Note that we do not model anything in the exit block if we have |
| // an exiting block in the region, as there will not be any splitting later. |
| if (!R.isTopLevelRegion() && !scop->hasSingleExitEdge()) { |
| for (Instruction &Inst : *R.getExit()) { |
| PHINode *PHI = dyn_cast<PHINode>(&Inst); |
| if (!PHI) |
| break; |
| |
| buildPHIAccesses(nullptr, PHI, nullptr, true); |
| } |
| } |
| |
| // Create memory accesses for global reads since all arrays are now known. |
| auto *AF = SE.getConstant(IntegerType::getInt64Ty(SE.getContext()), 0); |
| for (auto GlobalReadPair : GlobalReads) { |
| ScopStmt *GlobalReadStmt = GlobalReadPair.first; |
| Instruction *GlobalRead = GlobalReadPair.second; |
| for (auto *BP : ArrayBasePointers) |
| addArrayAccess(GlobalReadStmt, MemAccInst(GlobalRead), MemoryAccess::READ, |
| BP, BP->getType(), false, {AF}, {nullptr}, GlobalRead); |
| } |
| |
| scop->buildInvariantEquivalenceClasses(); |
| |
| /// A map from basic blocks to their invalid domains. |
| DenseMap<BasicBlock *, isl::set> InvalidDomainMap; |
| |
| if (!scop->buildDomains(&R, DT, LI, InvalidDomainMap)) { |
| LLVM_DEBUG( |
| dbgs() << "Bailing-out because buildDomains encountered problems\n"); |
| return; |
| } |
| |
| scop->addUserAssumptions(AC, DT, LI, InvalidDomainMap); |
| |
| // Initialize the invalid domain. |
| for (ScopStmt &Stmt : scop->Stmts) |
| if (Stmt.isBlockStmt()) |
| Stmt.setInvalidDomain(InvalidDomainMap[Stmt.getEntryBlock()]); |
| else |
| Stmt.setInvalidDomain(InvalidDomainMap[getRegionNodeBasicBlock( |
| Stmt.getRegion()->getNode())]); |
| |
| // Remove empty statements. |
| // Exit early in case there are no executable statements left in this scop. |
| scop->removeStmtNotInDomainMap(); |
| scop->simplifySCoP(false); |
| if (scop->isEmpty()) { |
| LLVM_DEBUG(dbgs() << "Bailing-out because SCoP is empty\n"); |
| return; |
| } |
| |
| // The ScopStmts now have enough information to initialize themselves. |
| for (ScopStmt &Stmt : *scop) { |
| collectSurroundingLoops(Stmt); |
| |
| buildDomain(Stmt); |
| buildAccessRelations(Stmt); |
| |
| if (DetectReductions) |
| checkForReductions(Stmt); |
| } |
| |
| // Check early for a feasible runtime context. |
| if (!scop->hasFeasibleRuntimeContext()) { |
| LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (early)\n"); |
| return; |
| } |
| |
| // Check early for profitability. Afterwards it cannot change anymore, |
| // only the runtime context could become infeasible. |
| if (!scop->isProfitable(UnprofitableScalarAccs)) { |
| scop->invalidate(PROFITABLE, DebugLoc()); |
| LLVM_DEBUG( |
| dbgs() << "Bailing-out because SCoP is not considered profitable\n"); |
| return; |
| } |
| |
| scop->buildSchedule(LI); |
| |
| scop->finalizeAccesses(); |
| |
| scop->realignParams(); |
| scop->addUserContext(); |
| |
| // After the context was fully constructed, thus all our knowledge about |
| // the parameters is in there, we add all recorded assumptions to the |
| // assumed/invalid context. |
| scop->addRecordedAssumptions(); |
| |
| scop->simplifyContexts(); |
| if (!scop->buildAliasChecks(AA)) { |
| LLVM_DEBUG(dbgs() << "Bailing-out because could not build alias checks\n"); |
| return; |
| } |
| |
| scop->hoistInvariantLoads(); |
| scop->canonicalizeDynamicBasePtrs(); |
| scop->verifyInvariantLoads(); |
| scop->simplifySCoP(true); |
| |
| // Check late for a feasible runtime context because profitability did not |
| // change. |
| if (!scop->hasFeasibleRuntimeContext()) { |
| LLVM_DEBUG(dbgs() << "Bailing-out because of unfeasible context (late)\n"); |
| return; |
| } |
| |
| #ifndef NDEBUG |
| verifyUses(scop.get(), LI, DT); |
| #endif |
| } |
| |
| ScopBuilder::ScopBuilder(Region *R, AssumptionCache &AC, AliasAnalysis &AA, |
| const DataLayout &DL, DominatorTree &DT, LoopInfo &LI, |
| ScopDetection &SD, ScalarEvolution &SE, |
| OptimizationRemarkEmitter &ORE) |
| : AA(AA), DL(DL), DT(DT), LI(LI), SD(SD), SE(SE) { |
| DebugLoc Beg, End; |
| auto P = getBBPairForRegion(R); |
| getDebugLocations(P, Beg, End); |
| |
| std::string Msg = "SCoP begins here."; |
| ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEntry", Beg, P.first) |
| << Msg); |
| |
| buildScop(*R, AC, ORE); |
| |
| LLVM_DEBUG(dbgs() << *scop); |
| |
| if (!scop->hasFeasibleRuntimeContext()) { |
| InfeasibleScops++; |
| Msg = "SCoP ends here but was dismissed."; |
| LLVM_DEBUG(dbgs() << "SCoP detected but dismissed\n"); |
| scop.reset(); |
| } else { |
| Msg = "SCoP ends here."; |
| ++ScopFound; |
| if (scop->getMaxLoopDepth() > 0) |
| ++RichScopFound; |
| } |
| |
| if (R->isTopLevelRegion()) |
| ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.first) |
| << Msg); |
| else |
| ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "ScopEnd", End, P.second) |
| << Msg); |
| } |