1706b4fc4SDimitry Andric //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===//
2eb11fae6SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6eb11fae6SDimitry Andric //
7eb11fae6SDimitry Andric //===----------------------------------------------------------------------===//
8eb11fae6SDimitry Andric ///
9eb11fae6SDimitry Andric /// \file
10eb11fae6SDimitry Andric /// This file implements a set of utility VPlan to VPlan transformations.
11eb11fae6SDimitry Andric ///
12eb11fae6SDimitry Andric //===----------------------------------------------------------------------===//
13eb11fae6SDimitry Andric
14706b4fc4SDimitry Andric #include "VPlanTransforms.h"
157fa27ce4SDimitry Andric #include "VPRecipeBuilder.h"
16b1c73532SDimitry Andric #include "VPlanAnalysis.h"
17e3b55780SDimitry Andric #include "VPlanCFG.h"
18b1c73532SDimitry Andric #include "VPlanDominatorTree.h"
19ac9a064cSDimitry Andric #include "VPlanPatternMatch.h"
20eb11fae6SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
21b1c73532SDimitry Andric #include "llvm/ADT/STLExtras.h"
22145449b1SDimitry Andric #include "llvm/ADT/SetVector.h"
23145449b1SDimitry Andric #include "llvm/Analysis/IVDescriptors.h"
24e3b55780SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
25e3b55780SDimitry Andric #include "llvm/IR/Intrinsics.h"
26b1c73532SDimitry Andric #include "llvm/IR/PatternMatch.h"
27eb11fae6SDimitry Andric
28eb11fae6SDimitry Andric using namespace llvm;
29eb11fae6SDimitry Andric
VPInstructionsToVPRecipes(VPlanPtr & Plan,function_ref<const InductionDescriptor * (PHINode *)> GetIntOrFpInductionDescriptor,ScalarEvolution & SE,const TargetLibraryInfo & TLI)30706b4fc4SDimitry Andric void VPlanTransforms::VPInstructionsToVPRecipes(
317fa27ce4SDimitry Andric VPlanPtr &Plan,
3277fc4c14SDimitry Andric function_ref<const InductionDescriptor *(PHINode *)>
3377fc4c14SDimitry Andric GetIntOrFpInductionDescriptor,
347fa27ce4SDimitry Andric ScalarEvolution &SE, const TargetLibraryInfo &TLI) {
35eb11fae6SDimitry Andric
36e3b55780SDimitry Andric ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
37ac9a064cSDimitry Andric Plan->getVectorLoopRegion());
38145449b1SDimitry Andric for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
39ac9a064cSDimitry Andric // Skip blocks outside region
40ac9a064cSDimitry Andric if (!VPBB->getParent())
41ac9a064cSDimitry Andric break;
42145449b1SDimitry Andric VPRecipeBase *Term = VPBB->getTerminator();
43145449b1SDimitry Andric auto EndIter = Term ? Term->getIterator() : VPBB->end();
44eb11fae6SDimitry Andric // Introduce each ingredient into VPlan.
45145449b1SDimitry Andric for (VPRecipeBase &Ingredient :
46145449b1SDimitry Andric make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
47145449b1SDimitry Andric
48c0981da4SDimitry Andric VPValue *VPV = Ingredient.getVPSingleValue();
49344a3780SDimitry Andric Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
50eb11fae6SDimitry Andric
51eb11fae6SDimitry Andric VPRecipeBase *NewRecipe = nullptr;
52c0981da4SDimitry Andric if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {
53344a3780SDimitry Andric auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
54ac9a064cSDimitry Andric const auto *II = GetIntOrFpInductionDescriptor(Phi);
55ac9a064cSDimitry Andric if (!II)
56ac9a064cSDimitry Andric continue;
57ac9a064cSDimitry Andric
58ac9a064cSDimitry Andric VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue());
59145449b1SDimitry Andric VPValue *Step =
60145449b1SDimitry Andric vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
617fa27ce4SDimitry Andric NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II);
62344a3780SDimitry Andric } else {
63c0981da4SDimitry Andric assert(isa<VPInstruction>(&Ingredient) &&
64344a3780SDimitry Andric "only VPInstructions expected here");
65344a3780SDimitry Andric assert(!isa<PHINode>(Inst) && "phis should be handled above");
66ac9a064cSDimitry Andric // Create VPWidenMemoryRecipe for loads and stores.
67344a3780SDimitry Andric if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
68ac9a064cSDimitry Andric NewRecipe = new VPWidenLoadRecipe(
697fa27ce4SDimitry Andric *Load, Ingredient.getOperand(0), nullptr /*Mask*/,
70ac9a064cSDimitry Andric false /*Consecutive*/, false /*Reverse*/,
71ac9a064cSDimitry Andric Ingredient.getDebugLoc());
72344a3780SDimitry Andric } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
73ac9a064cSDimitry Andric NewRecipe = new VPWidenStoreRecipe(
747fa27ce4SDimitry Andric *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
75ac9a064cSDimitry Andric nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
76ac9a064cSDimitry Andric Ingredient.getDebugLoc());
77706b4fc4SDimitry Andric } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
787fa27ce4SDimitry Andric NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
79344a3780SDimitry Andric } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
804df029ccSDimitry Andric NewRecipe = new VPWidenCallRecipe(
81ac9a064cSDimitry Andric CI, Ingredient.operands(), getVectorIntrinsicIDForCall(CI, &TLI),
82ac9a064cSDimitry Andric CI->getDebugLoc());
83344a3780SDimitry Andric } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
847fa27ce4SDimitry Andric NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
857fa27ce4SDimitry Andric } else if (auto *CI = dyn_cast<CastInst>(Inst)) {
867fa27ce4SDimitry Andric NewRecipe = new VPWidenCastRecipe(
87b1c73532SDimitry Andric CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
88344a3780SDimitry Andric } else {
897fa27ce4SDimitry Andric NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands());
90344a3780SDimitry Andric }
91344a3780SDimitry Andric }
92eb11fae6SDimitry Andric
93c0981da4SDimitry Andric NewRecipe->insertBefore(&Ingredient);
94b60736ecSDimitry Andric if (NewRecipe->getNumDefinedValues() == 1)
95344a3780SDimitry Andric VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
96b60736ecSDimitry Andric else
97b60736ecSDimitry Andric assert(NewRecipe->getNumDefinedValues() == 0 &&
98b60736ecSDimitry Andric "Only recpies with zero or one defined values expected");
99c0981da4SDimitry Andric Ingredient.eraseFromParent();
100eb11fae6SDimitry Andric }
101eb11fae6SDimitry Andric }
102344a3780SDimitry Andric }
103344a3780SDimitry Andric
sinkScalarOperands(VPlan & Plan)1047fa27ce4SDimitry Andric static bool sinkScalarOperands(VPlan &Plan) {
105e3b55780SDimitry Andric auto Iter = vp_depth_first_deep(Plan.getEntry());
106344a3780SDimitry Andric bool Changed = false;
107e3b55780SDimitry Andric // First, collect the operands of all recipes in replicate blocks as seeds for
108e3b55780SDimitry Andric // sinking.
1094df029ccSDimitry Andric SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
110e3b55780SDimitry Andric for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
111e3b55780SDimitry Andric VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
112e3b55780SDimitry Andric if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
113344a3780SDimitry Andric continue;
114e3b55780SDimitry Andric VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
115e3b55780SDimitry Andric if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
116e3b55780SDimitry Andric continue;
117e3b55780SDimitry Andric for (auto &Recipe : *VPBB) {
118e3b55780SDimitry Andric for (VPValue *Op : Recipe.operands())
1194df029ccSDimitry Andric if (auto *Def =
1204df029ccSDimitry Andric dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
121e3b55780SDimitry Andric WorkList.insert(std::make_pair(VPBB, Def));
122344a3780SDimitry Andric }
123344a3780SDimitry Andric }
124344a3780SDimitry Andric
125e3b55780SDimitry Andric bool ScalarVFOnly = Plan.hasScalarVFOnly();
126e3b55780SDimitry Andric // Try to sink each replicate or scalar IV steps recipe in the worklist.
127e3b55780SDimitry Andric for (unsigned I = 0; I != WorkList.size(); ++I) {
128c0981da4SDimitry Andric VPBasicBlock *SinkTo;
1294df029ccSDimitry Andric VPSingleDefRecipe *SinkCandidate;
130e3b55780SDimitry Andric std::tie(SinkTo, SinkCandidate) = WorkList[I];
131e3b55780SDimitry Andric if (SinkCandidate->getParent() == SinkTo ||
132344a3780SDimitry Andric SinkCandidate->mayHaveSideEffects() ||
133344a3780SDimitry Andric SinkCandidate->mayReadOrWriteMemory())
134344a3780SDimitry Andric continue;
135e3b55780SDimitry Andric if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
136e3b55780SDimitry Andric if (!ScalarVFOnly && RepR->isUniform())
137e3b55780SDimitry Andric continue;
138e3b55780SDimitry Andric } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
139e3b55780SDimitry Andric continue;
140344a3780SDimitry Andric
141c0981da4SDimitry Andric bool NeedsDuplicating = false;
142c0981da4SDimitry Andric // All recipe users of the sink candidate must be in the same block SinkTo
143c0981da4SDimitry Andric // or all users outside of SinkTo must be uniform-after-vectorization (
144c0981da4SDimitry Andric // i.e., only first lane is used) . In the latter case, we need to duplicate
145e3b55780SDimitry Andric // SinkCandidate.
146c0981da4SDimitry Andric auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
147c0981da4SDimitry Andric SinkCandidate](VPUser *U) {
148344a3780SDimitry Andric auto *UI = dyn_cast<VPRecipeBase>(U);
149c0981da4SDimitry Andric if (!UI)
150c0981da4SDimitry Andric return false;
151c0981da4SDimitry Andric if (UI->getParent() == SinkTo)
152c0981da4SDimitry Andric return true;
1534df029ccSDimitry Andric NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
154e3b55780SDimitry Andric // We only know how to duplicate VPRecipeRecipes for now.
155e3b55780SDimitry Andric return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate);
156c0981da4SDimitry Andric };
1574df029ccSDimitry Andric if (!all_of(SinkCandidate->users(), CanSinkWithUser))
158344a3780SDimitry Andric continue;
159344a3780SDimitry Andric
160c0981da4SDimitry Andric if (NeedsDuplicating) {
161e3b55780SDimitry Andric if (ScalarVFOnly)
162e3b55780SDimitry Andric continue;
163ac9a064cSDimitry Andric Instruction *I = SinkCandidate->getUnderlyingInstr();
1647fa27ce4SDimitry Andric auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true);
165c0981da4SDimitry Andric // TODO: add ".cloned" suffix to name of Clone's VPValue.
166c0981da4SDimitry Andric
167c0981da4SDimitry Andric Clone->insertBefore(SinkCandidate);
1684df029ccSDimitry Andric SinkCandidate->replaceUsesWithIf(Clone, [SinkTo](VPUser &U, unsigned) {
169b1c73532SDimitry Andric return cast<VPRecipeBase>(&U)->getParent() != SinkTo;
170b1c73532SDimitry Andric });
171c0981da4SDimitry Andric }
172344a3780SDimitry Andric SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
173c0981da4SDimitry Andric for (VPValue *Op : SinkCandidate->operands())
1744df029ccSDimitry Andric if (auto *Def =
1754df029ccSDimitry Andric dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
176e3b55780SDimitry Andric WorkList.insert(std::make_pair(SinkTo, Def));
177344a3780SDimitry Andric Changed = true;
178344a3780SDimitry Andric }
179344a3780SDimitry Andric return Changed;
180344a3780SDimitry Andric }
181344a3780SDimitry Andric
182344a3780SDimitry Andric /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return
183344a3780SDimitry Andric /// the mask.
getPredicatedMask(VPRegionBlock * R)184344a3780SDimitry Andric VPValue *getPredicatedMask(VPRegionBlock *R) {
185344a3780SDimitry Andric auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry());
186344a3780SDimitry Andric if (!EntryBB || EntryBB->size() != 1 ||
187344a3780SDimitry Andric !isa<VPBranchOnMaskRecipe>(EntryBB->begin()))
188344a3780SDimitry Andric return nullptr;
189344a3780SDimitry Andric
190344a3780SDimitry Andric return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0);
191344a3780SDimitry Andric }
192344a3780SDimitry Andric
193344a3780SDimitry Andric /// If \p R is a triangle region, return the 'then' block of the triangle.
getPredicatedThenBlock(VPRegionBlock * R)194344a3780SDimitry Andric static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {
195344a3780SDimitry Andric auto *EntryBB = cast<VPBasicBlock>(R->getEntry());
196344a3780SDimitry Andric if (EntryBB->getNumSuccessors() != 2)
197344a3780SDimitry Andric return nullptr;
198344a3780SDimitry Andric
199344a3780SDimitry Andric auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]);
200344a3780SDimitry Andric auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]);
201344a3780SDimitry Andric if (!Succ0 || !Succ1)
202344a3780SDimitry Andric return nullptr;
203344a3780SDimitry Andric
204344a3780SDimitry Andric if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
205344a3780SDimitry Andric return nullptr;
206344a3780SDimitry Andric if (Succ0->getSingleSuccessor() == Succ1)
207344a3780SDimitry Andric return Succ0;
208344a3780SDimitry Andric if (Succ1->getSingleSuccessor() == Succ0)
209344a3780SDimitry Andric return Succ1;
210344a3780SDimitry Andric return nullptr;
211344a3780SDimitry Andric }
212344a3780SDimitry Andric
2137fa27ce4SDimitry Andric // Merge replicate regions in their successor region, if a replicate region
2147fa27ce4SDimitry Andric // is connected to a successor replicate region with the same predicate by a
2157fa27ce4SDimitry Andric // single, empty VPBasicBlock.
mergeReplicateRegionsIntoSuccessors(VPlan & Plan)2167fa27ce4SDimitry Andric static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
217344a3780SDimitry Andric SetVector<VPRegionBlock *> DeletedRegions;
218344a3780SDimitry Andric
219e3b55780SDimitry Andric // Collect replicate regions followed by an empty block, followed by another
220e3b55780SDimitry Andric // replicate region with matching masks to process front. This is to avoid
221e3b55780SDimitry Andric // iterator invalidation issues while merging regions.
222e3b55780SDimitry Andric SmallVector<VPRegionBlock *, 8> WorkList;
223e3b55780SDimitry Andric for (VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>(
224e3b55780SDimitry Andric vp_depth_first_deep(Plan.getEntry()))) {
225e3b55780SDimitry Andric if (!Region1->isReplicator())
226344a3780SDimitry Andric continue;
227344a3780SDimitry Andric auto *MiddleBasicBlock =
228344a3780SDimitry Andric dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());
229344a3780SDimitry Andric if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
230344a3780SDimitry Andric continue;
231344a3780SDimitry Andric
232344a3780SDimitry Andric auto *Region2 =
233344a3780SDimitry Andric dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
234e3b55780SDimitry Andric if (!Region2 || !Region2->isReplicator())
235344a3780SDimitry Andric continue;
236344a3780SDimitry Andric
237344a3780SDimitry Andric VPValue *Mask1 = getPredicatedMask(Region1);
238344a3780SDimitry Andric VPValue *Mask2 = getPredicatedMask(Region2);
239344a3780SDimitry Andric if (!Mask1 || Mask1 != Mask2)
240344a3780SDimitry Andric continue;
241e3b55780SDimitry Andric
242e3b55780SDimitry Andric assert(Mask1 && Mask2 && "both region must have conditions");
243e3b55780SDimitry Andric WorkList.push_back(Region1);
244e3b55780SDimitry Andric }
245e3b55780SDimitry Andric
246e3b55780SDimitry Andric // Move recipes from Region1 to its successor region, if both are triangles.
247e3b55780SDimitry Andric for (VPRegionBlock *Region1 : WorkList) {
248e3b55780SDimitry Andric if (DeletedRegions.contains(Region1))
249e3b55780SDimitry Andric continue;
250e3b55780SDimitry Andric auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor());
251e3b55780SDimitry Andric auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
252e3b55780SDimitry Andric
253344a3780SDimitry Andric VPBasicBlock *Then1 = getPredicatedThenBlock(Region1);
254344a3780SDimitry Andric VPBasicBlock *Then2 = getPredicatedThenBlock(Region2);
255344a3780SDimitry Andric if (!Then1 || !Then2)
256344a3780SDimitry Andric continue;
257344a3780SDimitry Andric
258344a3780SDimitry Andric // Note: No fusion-preventing memory dependencies are expected in either
259344a3780SDimitry Andric // region. Such dependencies should be rejected during earlier dependence
260344a3780SDimitry Andric // checks, which guarantee accesses can be re-ordered for vectorization.
261344a3780SDimitry Andric //
262344a3780SDimitry Andric // Move recipes to the successor region.
263344a3780SDimitry Andric for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1)))
264344a3780SDimitry Andric ToMove.moveBefore(*Then2, Then2->getFirstNonPhi());
265344a3780SDimitry Andric
266344a3780SDimitry Andric auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor());
267344a3780SDimitry Andric auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor());
268344a3780SDimitry Andric
269344a3780SDimitry Andric // Move VPPredInstPHIRecipes from the merge block to the successor region's
270344a3780SDimitry Andric // merge block. Update all users inside the successor region to use the
271344a3780SDimitry Andric // original values.
272344a3780SDimitry Andric for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
273344a3780SDimitry Andric VPValue *PredInst1 =
274344a3780SDimitry Andric cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
275c0981da4SDimitry Andric VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
276b1c73532SDimitry Andric Phi1ToMoveV->replaceUsesWithIf(PredInst1, [Then2](VPUser &U, unsigned) {
277b1c73532SDimitry Andric auto *UI = dyn_cast<VPRecipeBase>(&U);
278b1c73532SDimitry Andric return UI && UI->getParent() == Then2;
279b1c73532SDimitry Andric });
280344a3780SDimitry Andric
281ac9a064cSDimitry Andric // Remove phi recipes that are unused after merging the regions.
282ac9a064cSDimitry Andric if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
283ac9a064cSDimitry Andric Phi1ToMove.eraseFromParent();
284ac9a064cSDimitry Andric continue;
285ac9a064cSDimitry Andric }
286344a3780SDimitry Andric Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
287344a3780SDimitry Andric }
288344a3780SDimitry Andric
289344a3780SDimitry Andric // Finally, remove the first region.
290344a3780SDimitry Andric for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) {
291344a3780SDimitry Andric VPBlockUtils::disconnectBlocks(Pred, Region1);
292344a3780SDimitry Andric VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock);
293344a3780SDimitry Andric }
294344a3780SDimitry Andric VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock);
295344a3780SDimitry Andric DeletedRegions.insert(Region1);
296344a3780SDimitry Andric }
297344a3780SDimitry Andric
298344a3780SDimitry Andric for (VPRegionBlock *ToDelete : DeletedRegions)
299344a3780SDimitry Andric delete ToDelete;
300e3b55780SDimitry Andric return !DeletedRegions.empty();
301e3b55780SDimitry Andric }
302e3b55780SDimitry Andric
createReplicateRegion(VPReplicateRecipe * PredRecipe,VPlan & Plan)3037fa27ce4SDimitry Andric static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
3047fa27ce4SDimitry Andric VPlan &Plan) {
3057fa27ce4SDimitry Andric Instruction *Instr = PredRecipe->getUnderlyingInstr();
3067fa27ce4SDimitry Andric // Build the triangular if-then region.
3077fa27ce4SDimitry Andric std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();
3087fa27ce4SDimitry Andric assert(Instr->getParent() && "Predicated instruction not in any basic block");
3097fa27ce4SDimitry Andric auto *BlockInMask = PredRecipe->getMask();
3107fa27ce4SDimitry Andric auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask);
3117fa27ce4SDimitry Andric auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
3127fa27ce4SDimitry Andric
3137fa27ce4SDimitry Andric // Replace predicated replicate recipe with a replicate recipe without a
3147fa27ce4SDimitry Andric // mask but in the replicate region.
3157fa27ce4SDimitry Andric auto *RecipeWithoutMask = new VPReplicateRecipe(
3167fa27ce4SDimitry Andric PredRecipe->getUnderlyingInstr(),
3177fa27ce4SDimitry Andric make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())),
3187fa27ce4SDimitry Andric PredRecipe->isUniform());
3197fa27ce4SDimitry Andric auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask);
3207fa27ce4SDimitry Andric
3217fa27ce4SDimitry Andric VPPredInstPHIRecipe *PHIRecipe = nullptr;
3227fa27ce4SDimitry Andric if (PredRecipe->getNumUsers() != 0) {
3237fa27ce4SDimitry Andric PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask);
3247fa27ce4SDimitry Andric PredRecipe->replaceAllUsesWith(PHIRecipe);
3257fa27ce4SDimitry Andric PHIRecipe->setOperand(0, RecipeWithoutMask);
3267fa27ce4SDimitry Andric }
3277fa27ce4SDimitry Andric PredRecipe->eraseFromParent();
3287fa27ce4SDimitry Andric auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
3297fa27ce4SDimitry Andric VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true);
3307fa27ce4SDimitry Andric
3317fa27ce4SDimitry Andric // Note: first set Entry as region entry and then connect successors starting
3327fa27ce4SDimitry Andric // from it in order, to propagate the "parent" of each VPBasicBlock.
3337fa27ce4SDimitry Andric VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry);
3347fa27ce4SDimitry Andric VPBlockUtils::connectBlocks(Pred, Exiting);
3357fa27ce4SDimitry Andric
3367fa27ce4SDimitry Andric return Region;
3377fa27ce4SDimitry Andric }
3387fa27ce4SDimitry Andric
addReplicateRegions(VPlan & Plan)3397fa27ce4SDimitry Andric static void addReplicateRegions(VPlan &Plan) {
3407fa27ce4SDimitry Andric SmallVector<VPReplicateRecipe *> WorkList;
3417fa27ce4SDimitry Andric for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
3427fa27ce4SDimitry Andric vp_depth_first_deep(Plan.getEntry()))) {
3437fa27ce4SDimitry Andric for (VPRecipeBase &R : *VPBB)
3447fa27ce4SDimitry Andric if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
3457fa27ce4SDimitry Andric if (RepR->isPredicated())
3467fa27ce4SDimitry Andric WorkList.push_back(RepR);
3477fa27ce4SDimitry Andric }
3487fa27ce4SDimitry Andric }
3497fa27ce4SDimitry Andric
3507fa27ce4SDimitry Andric unsigned BBNum = 0;
3517fa27ce4SDimitry Andric for (VPReplicateRecipe *RepR : WorkList) {
3527fa27ce4SDimitry Andric VPBasicBlock *CurrentBlock = RepR->getParent();
3537fa27ce4SDimitry Andric VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator());
3547fa27ce4SDimitry Andric
3557fa27ce4SDimitry Andric BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent();
3567fa27ce4SDimitry Andric SplitBlock->setName(
3577fa27ce4SDimitry Andric OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : "");
3587fa27ce4SDimitry Andric // Record predicated instructions for above packing optimizations.
3597fa27ce4SDimitry Andric VPBlockBase *Region = createReplicateRegion(RepR, Plan);
3607fa27ce4SDimitry Andric Region->setParent(CurrentBlock->getParent());
3617fa27ce4SDimitry Andric VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock);
3627fa27ce4SDimitry Andric VPBlockUtils::connectBlocks(CurrentBlock, Region);
3637fa27ce4SDimitry Andric VPBlockUtils::connectBlocks(Region, SplitBlock);
3647fa27ce4SDimitry Andric }
3657fa27ce4SDimitry Andric }
3667fa27ce4SDimitry Andric
367ac9a064cSDimitry Andric /// Remove redundant VPBasicBlocks by merging them into their predecessor if
368ac9a064cSDimitry Andric /// the predecessor has a single successor.
mergeBlocksIntoPredecessors(VPlan & Plan)369ac9a064cSDimitry Andric static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
370e3b55780SDimitry Andric SmallVector<VPBasicBlock *> WorkList;
371e3b55780SDimitry Andric for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
372e3b55780SDimitry Andric vp_depth_first_deep(Plan.getEntry()))) {
373ac9a064cSDimitry Andric // Don't fold the exit block of the Plan into its single predecessor for
374ac9a064cSDimitry Andric // now.
375ac9a064cSDimitry Andric // TODO: Remove restriction once more of the skeleton is modeled in VPlan.
376ac9a064cSDimitry Andric if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
377ac9a064cSDimitry Andric continue;
378e3b55780SDimitry Andric auto *PredVPBB =
379e3b55780SDimitry Andric dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
380ac9a064cSDimitry Andric if (!PredVPBB || PredVPBB->getNumSuccessors() != 1)
381ac9a064cSDimitry Andric continue;
382e3b55780SDimitry Andric WorkList.push_back(VPBB);
383e3b55780SDimitry Andric }
384e3b55780SDimitry Andric
385e3b55780SDimitry Andric for (VPBasicBlock *VPBB : WorkList) {
386e3b55780SDimitry Andric VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor());
387e3b55780SDimitry Andric for (VPRecipeBase &R : make_early_inc_range(*VPBB))
388e3b55780SDimitry Andric R.moveBefore(*PredVPBB, PredVPBB->end());
389e3b55780SDimitry Andric VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);
390e3b55780SDimitry Andric auto *ParentRegion = cast_or_null<VPRegionBlock>(VPBB->getParent());
391e3b55780SDimitry Andric if (ParentRegion && ParentRegion->getExiting() == VPBB)
392e3b55780SDimitry Andric ParentRegion->setExiting(PredVPBB);
393e3b55780SDimitry Andric for (auto *Succ : to_vector(VPBB->successors())) {
394e3b55780SDimitry Andric VPBlockUtils::disconnectBlocks(VPBB, Succ);
395e3b55780SDimitry Andric VPBlockUtils::connectBlocks(PredVPBB, Succ);
396e3b55780SDimitry Andric }
397e3b55780SDimitry Andric delete VPBB;
398e3b55780SDimitry Andric }
399e3b55780SDimitry Andric return !WorkList.empty();
400344a3780SDimitry Andric }
40177fc4c14SDimitry Andric
createAndOptimizeReplicateRegions(VPlan & Plan)402ac9a064cSDimitry Andric void VPlanTransforms::createAndOptimizeReplicateRegions(VPlan &Plan) {
403ac9a064cSDimitry Andric // Convert masked VPReplicateRecipes to if-then region blocks.
404ac9a064cSDimitry Andric addReplicateRegions(Plan);
405ac9a064cSDimitry Andric
406ac9a064cSDimitry Andric bool ShouldSimplify = true;
407ac9a064cSDimitry Andric while (ShouldSimplify) {
408ac9a064cSDimitry Andric ShouldSimplify = sinkScalarOperands(Plan);
409ac9a064cSDimitry Andric ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan);
410ac9a064cSDimitry Andric ShouldSimplify |= mergeBlocksIntoPredecessors(Plan);
411ac9a064cSDimitry Andric }
412ac9a064cSDimitry Andric }
413ac9a064cSDimitry Andric
414ac9a064cSDimitry Andric /// Remove redundant casts of inductions.
415ac9a064cSDimitry Andric ///
416ac9a064cSDimitry Andric /// Such redundant casts are casts of induction variables that can be ignored,
417ac9a064cSDimitry Andric /// because we already proved that the casted phi is equal to the uncasted phi
418ac9a064cSDimitry Andric /// in the vectorized loop. There is no need to vectorize the cast - the same
419ac9a064cSDimitry Andric /// value can be used for both the phi and casts in the vector loop.
removeRedundantInductionCasts(VPlan & Plan)420ac9a064cSDimitry Andric static void removeRedundantInductionCasts(VPlan &Plan) {
421145449b1SDimitry Andric for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
42277fc4c14SDimitry Andric auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
42377fc4c14SDimitry Andric if (!IV || IV->getTruncInst())
42477fc4c14SDimitry Andric continue;
42577fc4c14SDimitry Andric
426145449b1SDimitry Andric // A sequence of IR Casts has potentially been recorded for IV, which
427145449b1SDimitry Andric // *must be bypassed* when the IV is vectorized, because the vectorized IV
428145449b1SDimitry Andric // will produce the desired casted value. This sequence forms a def-use
429145449b1SDimitry Andric // chain and is provided in reverse order, ending with the cast that uses
430145449b1SDimitry Andric // the IV phi. Search for the recipe of the last cast in the chain and
431145449b1SDimitry Andric // replace it with the original IV. Note that only the final cast is
432145449b1SDimitry Andric // expected to have users outside the cast-chain and the dead casts left
433145449b1SDimitry Andric // over will be cleaned up later.
43477fc4c14SDimitry Andric auto &Casts = IV->getInductionDescriptor().getCastInsts();
43577fc4c14SDimitry Andric VPValue *FindMyCast = IV;
43677fc4c14SDimitry Andric for (Instruction *IRCast : reverse(Casts)) {
4374df029ccSDimitry Andric VPSingleDefRecipe *FoundUserCast = nullptr;
43877fc4c14SDimitry Andric for (auto *U : FindMyCast->users()) {
4394df029ccSDimitry Andric auto *UserCast = dyn_cast<VPSingleDefRecipe>(U);
4404df029ccSDimitry Andric if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
44177fc4c14SDimitry Andric FoundUserCast = UserCast;
44277fc4c14SDimitry Andric break;
44377fc4c14SDimitry Andric }
44477fc4c14SDimitry Andric }
4454df029ccSDimitry Andric FindMyCast = FoundUserCast;
44677fc4c14SDimitry Andric }
447145449b1SDimitry Andric FindMyCast->replaceAllUsesWith(IV);
44877fc4c14SDimitry Andric }
44977fc4c14SDimitry Andric }
4506f8fc217SDimitry Andric
451ac9a064cSDimitry Andric /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
452ac9a064cSDimitry Andric /// recipe, if it exists.
removeRedundantCanonicalIVs(VPlan & Plan)453ac9a064cSDimitry Andric static void removeRedundantCanonicalIVs(VPlan &Plan) {
4546f8fc217SDimitry Andric VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
4556f8fc217SDimitry Andric VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
4566f8fc217SDimitry Andric for (VPUser *U : CanonicalIV->users()) {
4576f8fc217SDimitry Andric WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
4586f8fc217SDimitry Andric if (WidenNewIV)
4596f8fc217SDimitry Andric break;
4606f8fc217SDimitry Andric }
4616f8fc217SDimitry Andric
4626f8fc217SDimitry Andric if (!WidenNewIV)
4636f8fc217SDimitry Andric return;
4646f8fc217SDimitry Andric
4656f8fc217SDimitry Andric VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
4666f8fc217SDimitry Andric for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
4676f8fc217SDimitry Andric auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
4686f8fc217SDimitry Andric
469ac9a064cSDimitry Andric if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
470ecbca9f5SDimitry Andric continue;
471ecbca9f5SDimitry Andric
472ecbca9f5SDimitry Andric // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
473ecbca9f5SDimitry Andric // everything WidenNewIV's users need. That is, WidenOriginalIV will
474ecbca9f5SDimitry Andric // generate a vector phi or all users of WidenNewIV demand the first lane
475ecbca9f5SDimitry Andric // only.
4767fa27ce4SDimitry Andric if (any_of(WidenOriginalIV->users(),
4777fa27ce4SDimitry Andric [WidenOriginalIV](VPUser *U) {
4787fa27ce4SDimitry Andric return !U->usesScalars(WidenOriginalIV);
4797fa27ce4SDimitry Andric }) ||
480ecbca9f5SDimitry Andric vputils::onlyFirstLaneUsed(WidenNewIV)) {
4816f8fc217SDimitry Andric WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
4826f8fc217SDimitry Andric WidenNewIV->eraseFromParent();
4836f8fc217SDimitry Andric return;
4846f8fc217SDimitry Andric }
4856f8fc217SDimitry Andric }
4866f8fc217SDimitry Andric }
487145449b1SDimitry Andric
488ac9a064cSDimitry Andric /// Returns true if \p R is dead and can be removed.
isDeadRecipe(VPRecipeBase & R)489ac9a064cSDimitry Andric static bool isDeadRecipe(VPRecipeBase &R) {
490ac9a064cSDimitry Andric using namespace llvm::PatternMatch;
491ac9a064cSDimitry Andric // Do remove conditional assume instructions as their conditions may be
492ac9a064cSDimitry Andric // flattened.
493ac9a064cSDimitry Andric auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
494ac9a064cSDimitry Andric bool IsConditionalAssume =
495ac9a064cSDimitry Andric RepR && RepR->isPredicated() &&
496ac9a064cSDimitry Andric match(RepR->getUnderlyingInstr(), m_Intrinsic<Intrinsic::assume>());
497ac9a064cSDimitry Andric if (IsConditionalAssume)
498ac9a064cSDimitry Andric return true;
499ac9a064cSDimitry Andric
500ac9a064cSDimitry Andric if (R.mayHaveSideEffects())
501ac9a064cSDimitry Andric return false;
502ac9a064cSDimitry Andric
503ac9a064cSDimitry Andric // Recipe is dead if no user keeps the recipe alive.
504ac9a064cSDimitry Andric return all_of(R.definedValues(),
505ac9a064cSDimitry Andric [](VPValue *V) { return V->getNumUsers() == 0; });
506ac9a064cSDimitry Andric }
507ac9a064cSDimitry Andric
removeDeadRecipes(VPlan & Plan)508ac9a064cSDimitry Andric static void removeDeadRecipes(VPlan &Plan) {
509e3b55780SDimitry Andric ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
510e3b55780SDimitry Andric Plan.getEntry());
511145449b1SDimitry Andric
512145449b1SDimitry Andric for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
513145449b1SDimitry Andric // The recipes in the block are processed in reverse order, to catch chains
514145449b1SDimitry Andric // of dead recipes.
515145449b1SDimitry Andric for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
516ac9a064cSDimitry Andric if (isDeadRecipe(R))
517145449b1SDimitry Andric R.eraseFromParent();
518145449b1SDimitry Andric }
519145449b1SDimitry Andric }
520145449b1SDimitry Andric }
521145449b1SDimitry Andric
522ac9a064cSDimitry Andric static VPScalarIVStepsRecipe *
createScalarIVSteps(VPlan & Plan,InductionDescriptor::InductionKind Kind,Instruction::BinaryOps InductionOpcode,FPMathOperator * FPBinOp,ScalarEvolution & SE,Instruction * TruncI,VPValue * StartV,VPValue * Step,VPBasicBlock::iterator IP)523ac9a064cSDimitry Andric createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
524ac9a064cSDimitry Andric Instruction::BinaryOps InductionOpcode,
525ac9a064cSDimitry Andric FPMathOperator *FPBinOp, ScalarEvolution &SE,
526ac9a064cSDimitry Andric Instruction *TruncI, VPValue *StartV, VPValue *Step,
527ac9a064cSDimitry Andric VPBasicBlock::iterator IP) {
528b1c73532SDimitry Andric VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
529b1c73532SDimitry Andric VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
530ac9a064cSDimitry Andric VPSingleDefRecipe *BaseIV = CanonicalIV;
531ac9a064cSDimitry Andric if (!CanonicalIV->isCanonical(Kind, StartV, Step)) {
532ac9a064cSDimitry Andric BaseIV = new VPDerivedIVRecipe(Kind, FPBinOp, StartV, CanonicalIV, Step);
533ac9a064cSDimitry Andric HeaderVPBB->insert(BaseIV, IP);
534b1c73532SDimitry Andric }
535b1c73532SDimitry Andric
536ac9a064cSDimitry Andric // Truncate base induction if needed.
537ac9a064cSDimitry Andric VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(),
538ac9a064cSDimitry Andric SE.getContext());
539ac9a064cSDimitry Andric Type *ResultTy = TypeInfo.inferScalarType(BaseIV);
540ac9a064cSDimitry Andric if (TruncI) {
541ac9a064cSDimitry Andric Type *TruncTy = TruncI->getType();
542ac9a064cSDimitry Andric assert(ResultTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits() &&
543ac9a064cSDimitry Andric "Not truncating.");
544ac9a064cSDimitry Andric assert(ResultTy->isIntegerTy() && "Truncation requires an integer type");
545ac9a064cSDimitry Andric BaseIV = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy);
546ac9a064cSDimitry Andric HeaderVPBB->insert(BaseIV, IP);
547ac9a064cSDimitry Andric ResultTy = TruncTy;
548ac9a064cSDimitry Andric }
549ac9a064cSDimitry Andric
550ac9a064cSDimitry Andric // Truncate step if needed.
551ac9a064cSDimitry Andric Type *StepTy = TypeInfo.inferScalarType(Step);
552ac9a064cSDimitry Andric if (ResultTy != StepTy) {
553ac9a064cSDimitry Andric assert(StepTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits() &&
554ac9a064cSDimitry Andric "Not truncating.");
555ac9a064cSDimitry Andric assert(StepTy->isIntegerTy() && "Truncation requires an integer type");
556ac9a064cSDimitry Andric Step = new VPScalarCastRecipe(Instruction::Trunc, Step, ResultTy);
557ac9a064cSDimitry Andric auto *VecPreheader =
558ac9a064cSDimitry Andric cast<VPBasicBlock>(HeaderVPBB->getSingleHierarchicalPredecessor());
559ac9a064cSDimitry Andric VecPreheader->appendRecipe(Step->getDefiningRecipe());
560ac9a064cSDimitry Andric }
561ac9a064cSDimitry Andric
562ac9a064cSDimitry Andric VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(
563ac9a064cSDimitry Andric BaseIV, Step, InductionOpcode,
564ac9a064cSDimitry Andric FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags());
565b1c73532SDimitry Andric HeaderVPBB->insert(Steps, IP);
566b1c73532SDimitry Andric return Steps;
567b1c73532SDimitry Andric }
568b1c73532SDimitry Andric
569ac9a064cSDimitry Andric /// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
570ac9a064cSDimitry Andric /// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
571ac9a064cSDimitry Andric /// VPWidenPointerInductionRecipe will generate vectors only. If some users
572ac9a064cSDimitry Andric /// require vectors while other require scalars, the scalar uses need to extract
573ac9a064cSDimitry Andric /// the scalars from the generated vectors (Note that this is different to how
574ac9a064cSDimitry Andric /// int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe,
575ac9a064cSDimitry Andric /// if any of its users needs scalar values, by providing them scalar steps
576ac9a064cSDimitry Andric /// built on the canonical scalar IV and update the original IV's users. This is
577ac9a064cSDimitry Andric /// an optional optimization to reduce the needs of vector extracts.
legalizeAndOptimizeInductions(VPlan & Plan,ScalarEvolution & SE)578ac9a064cSDimitry Andric static void legalizeAndOptimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
579145449b1SDimitry Andric SmallVector<VPRecipeBase *> ToRemove;
580145449b1SDimitry Andric VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
581145449b1SDimitry Andric bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
582ac9a064cSDimitry Andric VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi();
583145449b1SDimitry Andric for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
584ac9a064cSDimitry Andric // Replace wide pointer inductions which have only their scalars used by
585ac9a064cSDimitry Andric // PtrAdd(IndStart, ScalarIVSteps (0, Step)).
586ac9a064cSDimitry Andric if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
587ac9a064cSDimitry Andric if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))
588ac9a064cSDimitry Andric continue;
589ac9a064cSDimitry Andric
590ac9a064cSDimitry Andric const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
591ac9a064cSDimitry Andric VPValue *StartV =
592ac9a064cSDimitry Andric Plan.getOrAddLiveIn(ConstantInt::get(ID.getStep()->getType(), 0));
593ac9a064cSDimitry Andric VPValue *StepV = PtrIV->getOperand(1);
594ac9a064cSDimitry Andric VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
595ac9a064cSDimitry Andric Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,
596ac9a064cSDimitry Andric SE, nullptr, StartV, StepV, InsertPt);
597ac9a064cSDimitry Andric
598ac9a064cSDimitry Andric auto *Recipe = new VPInstruction(VPInstruction::PtrAdd,
599ac9a064cSDimitry Andric {PtrIV->getStartValue(), Steps},
600ac9a064cSDimitry Andric PtrIV->getDebugLoc(), "next.gep");
601ac9a064cSDimitry Andric
602ac9a064cSDimitry Andric Recipe->insertAfter(Steps);
603ac9a064cSDimitry Andric PtrIV->replaceAllUsesWith(Recipe);
604ac9a064cSDimitry Andric continue;
605ac9a064cSDimitry Andric }
606ac9a064cSDimitry Andric
607ac9a064cSDimitry Andric // Replace widened induction with scalar steps for users that only use
608ac9a064cSDimitry Andric // scalars.
609e3b55780SDimitry Andric auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
610e3b55780SDimitry Andric if (!WideIV)
611145449b1SDimitry Andric continue;
612e3b55780SDimitry Andric if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) {
613e3b55780SDimitry Andric return U->usesScalars(WideIV);
614e3b55780SDimitry Andric }))
615145449b1SDimitry Andric continue;
616145449b1SDimitry Andric
617e3b55780SDimitry Andric const InductionDescriptor &ID = WideIV->getInductionDescriptor();
618ac9a064cSDimitry Andric VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
619ac9a064cSDimitry Andric Plan, ID.getKind(), ID.getInductionOpcode(),
620ac9a064cSDimitry Andric dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()), SE,
621ac9a064cSDimitry Andric WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
622ac9a064cSDimitry Andric InsertPt);
623145449b1SDimitry Andric
624312c0ed1SDimitry Andric // Update scalar users of IV to use Step instead.
625312c0ed1SDimitry Andric if (!HasOnlyVectorVFs)
626312c0ed1SDimitry Andric WideIV->replaceAllUsesWith(Steps);
627312c0ed1SDimitry Andric else
628312c0ed1SDimitry Andric WideIV->replaceUsesWithIf(Steps, [WideIV](VPUser &U, unsigned) {
629312c0ed1SDimitry Andric return U.usesScalars(WideIV);
630b1c73532SDimitry Andric });
631145449b1SDimitry Andric }
632145449b1SDimitry Andric }
633145449b1SDimitry Andric
634ac9a064cSDimitry Andric /// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing
635ac9a064cSDimitry Andric /// them with already existing recipes expanding the same SCEV expression.
removeRedundantExpandSCEVRecipes(VPlan & Plan)636ac9a064cSDimitry Andric static void removeRedundantExpandSCEVRecipes(VPlan &Plan) {
637145449b1SDimitry Andric DenseMap<const SCEV *, VPValue *> SCEV2VPV;
638145449b1SDimitry Andric
639145449b1SDimitry Andric for (VPRecipeBase &R :
640145449b1SDimitry Andric make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) {
641145449b1SDimitry Andric auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R);
642145449b1SDimitry Andric if (!ExpR)
643145449b1SDimitry Andric continue;
644145449b1SDimitry Andric
645145449b1SDimitry Andric auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR});
646145449b1SDimitry Andric if (I.second)
647145449b1SDimitry Andric continue;
648145449b1SDimitry Andric ExpR->replaceAllUsesWith(I.first->second);
649145449b1SDimitry Andric ExpR->eraseFromParent();
650145449b1SDimitry Andric }
651145449b1SDimitry Andric }
652e3b55780SDimitry Andric
recursivelyDeleteDeadRecipes(VPValue * V)653ac9a064cSDimitry Andric static void recursivelyDeleteDeadRecipes(VPValue *V) {
654ac9a064cSDimitry Andric SmallVector<VPValue *> WorkList;
655ac9a064cSDimitry Andric SmallPtrSet<VPValue *, 8> Seen;
656ac9a064cSDimitry Andric WorkList.push_back(V);
657e3b55780SDimitry Andric
658ac9a064cSDimitry Andric while (!WorkList.empty()) {
659ac9a064cSDimitry Andric VPValue *Cur = WorkList.pop_back_val();
660ac9a064cSDimitry Andric if (!Seen.insert(Cur).second)
661ac9a064cSDimitry Andric continue;
662ac9a064cSDimitry Andric VPRecipeBase *R = Cur->getDefiningRecipe();
663ac9a064cSDimitry Andric if (!R)
664ac9a064cSDimitry Andric continue;
665ac9a064cSDimitry Andric if (!isDeadRecipe(*R))
666ac9a064cSDimitry Andric continue;
667ac9a064cSDimitry Andric WorkList.append(R->op_begin(), R->op_end());
668ac9a064cSDimitry Andric R->eraseFromParent();
669ac9a064cSDimitry Andric }
670e3b55780SDimitry Andric }
671e3b55780SDimitry Andric
optimizeForVFAndUF(VPlan & Plan,ElementCount BestVF,unsigned BestUF,PredicatedScalarEvolution & PSE)672e3b55780SDimitry Andric void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
673e3b55780SDimitry Andric unsigned BestUF,
674e3b55780SDimitry Andric PredicatedScalarEvolution &PSE) {
675e3b55780SDimitry Andric assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
676e3b55780SDimitry Andric assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
677e3b55780SDimitry Andric VPBasicBlock *ExitingVPBB =
678e3b55780SDimitry Andric Plan.getVectorLoopRegion()->getExitingBasicBlock();
679ac9a064cSDimitry Andric auto *Term = &ExitingVPBB->back();
680e3b55780SDimitry Andric // Try to simplify the branch condition if TC <= VF * UF when preparing to
681e3b55780SDimitry Andric // execute the plan for the main vector loop. We only do this if the
682e3b55780SDimitry Andric // terminator is:
683e3b55780SDimitry Andric // 1. BranchOnCount, or
684e3b55780SDimitry Andric // 2. BranchOnCond where the input is Not(ActiveLaneMask).
685ac9a064cSDimitry Andric using namespace llvm::VPlanPatternMatch;
686ac9a064cSDimitry Andric if (!match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
687ac9a064cSDimitry Andric !match(Term,
688ac9a064cSDimitry Andric m_BranchOnCond(m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue())))))
689e3b55780SDimitry Andric return;
690e3b55780SDimitry Andric
691e3b55780SDimitry Andric Type *IdxTy =
692e3b55780SDimitry Andric Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType();
693e3b55780SDimitry Andric const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE);
694e3b55780SDimitry Andric ScalarEvolution &SE = *PSE.getSE();
695ac9a064cSDimitry Andric ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
696ac9a064cSDimitry Andric const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
697e3b55780SDimitry Andric if (TripCount->isZero() ||
698e3b55780SDimitry Andric !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
699e3b55780SDimitry Andric return;
700e3b55780SDimitry Andric
701e3b55780SDimitry Andric LLVMContext &Ctx = SE.getContext();
702ac9a064cSDimitry Andric auto *BOC =
703ac9a064cSDimitry Andric new VPInstruction(VPInstruction::BranchOnCond,
704ac9a064cSDimitry Andric {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))});
705ac9a064cSDimitry Andric
706ac9a064cSDimitry Andric SmallVector<VPValue *> PossiblyDead(Term->operands());
707e3b55780SDimitry Andric Term->eraseFromParent();
708ac9a064cSDimitry Andric for (VPValue *Op : PossiblyDead)
709ac9a064cSDimitry Andric recursivelyDeleteDeadRecipes(Op);
710e3b55780SDimitry Andric ExitingVPBB->appendRecipe(BOC);
711e3b55780SDimitry Andric Plan.setVF(BestVF);
712e3b55780SDimitry Andric Plan.setUF(BestUF);
713e3b55780SDimitry Andric // TODO: Further simplifications are possible
714e3b55780SDimitry Andric // 1. Replace inductions with constants.
715e3b55780SDimitry Andric // 2. Replace vector loop region with VPBasicBlock.
716e3b55780SDimitry Andric }
7177fa27ce4SDimitry Andric
7187fa27ce4SDimitry Andric #ifndef NDEBUG
GetReplicateRegion(VPRecipeBase * R)7197fa27ce4SDimitry Andric static VPRegionBlock *GetReplicateRegion(VPRecipeBase *R) {
7207fa27ce4SDimitry Andric auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
7217fa27ce4SDimitry Andric if (Region && Region->isReplicator()) {
7227fa27ce4SDimitry Andric assert(Region->getNumSuccessors() == 1 &&
7237fa27ce4SDimitry Andric Region->getNumPredecessors() == 1 && "Expected SESE region!");
7247fa27ce4SDimitry Andric assert(R->getParent()->size() == 1 &&
7257fa27ce4SDimitry Andric "A recipe in an original replicator region must be the only "
7267fa27ce4SDimitry Andric "recipe in its block");
7277fa27ce4SDimitry Andric return Region;
7287fa27ce4SDimitry Andric }
7297fa27ce4SDimitry Andric return nullptr;
7307fa27ce4SDimitry Andric }
7317fa27ce4SDimitry Andric #endif
7327fa27ce4SDimitry Andric
properlyDominates(const VPRecipeBase * A,const VPRecipeBase * B,VPDominatorTree & VPDT)7337fa27ce4SDimitry Andric static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B,
7347fa27ce4SDimitry Andric VPDominatorTree &VPDT) {
7357fa27ce4SDimitry Andric if (A == B)
7367fa27ce4SDimitry Andric return false;
7377fa27ce4SDimitry Andric
7387fa27ce4SDimitry Andric auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
7397fa27ce4SDimitry Andric for (auto &R : *A->getParent()) {
7407fa27ce4SDimitry Andric if (&R == A)
7417fa27ce4SDimitry Andric return true;
7427fa27ce4SDimitry Andric if (&R == B)
7437fa27ce4SDimitry Andric return false;
7447fa27ce4SDimitry Andric }
7457fa27ce4SDimitry Andric llvm_unreachable("recipe not found");
7467fa27ce4SDimitry Andric };
7477fa27ce4SDimitry Andric const VPBlockBase *ParentA = A->getParent();
7487fa27ce4SDimitry Andric const VPBlockBase *ParentB = B->getParent();
7497fa27ce4SDimitry Andric if (ParentA == ParentB)
7507fa27ce4SDimitry Andric return LocalComesBefore(A, B);
7517fa27ce4SDimitry Andric
7527fa27ce4SDimitry Andric assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
7537fa27ce4SDimitry Andric "No replicate regions expected at this point");
7547fa27ce4SDimitry Andric assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
7557fa27ce4SDimitry Andric "No replicate regions expected at this point");
7567fa27ce4SDimitry Andric return VPDT.properlyDominates(ParentA, ParentB);
7577fa27ce4SDimitry Andric }
7587fa27ce4SDimitry Andric
7597fa27ce4SDimitry Andric /// Sink users of \p FOR after the recipe defining the previous value \p
7607fa27ce4SDimitry Andric /// Previous of the recurrence. \returns true if all users of \p FOR could be
7617fa27ce4SDimitry Andric /// re-arranged as needed or false if it is not possible.
7627fa27ce4SDimitry Andric static bool
sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe * FOR,VPRecipeBase * Previous,VPDominatorTree & VPDT)7637fa27ce4SDimitry Andric sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
7647fa27ce4SDimitry Andric VPRecipeBase *Previous,
7657fa27ce4SDimitry Andric VPDominatorTree &VPDT) {
7667fa27ce4SDimitry Andric // Collect recipes that need sinking.
7677fa27ce4SDimitry Andric SmallVector<VPRecipeBase *> WorkList;
7687fa27ce4SDimitry Andric SmallPtrSet<VPRecipeBase *, 8> Seen;
7697fa27ce4SDimitry Andric Seen.insert(Previous);
7707fa27ce4SDimitry Andric auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
7717fa27ce4SDimitry Andric // The previous value must not depend on the users of the recurrence phi. In
7727fa27ce4SDimitry Andric // that case, FOR is not a fixed order recurrence.
7737fa27ce4SDimitry Andric if (SinkCandidate == Previous)
7747fa27ce4SDimitry Andric return false;
7757fa27ce4SDimitry Andric
7767fa27ce4SDimitry Andric if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
7777fa27ce4SDimitry Andric !Seen.insert(SinkCandidate).second ||
7787fa27ce4SDimitry Andric properlyDominates(Previous, SinkCandidate, VPDT))
7797fa27ce4SDimitry Andric return true;
7807fa27ce4SDimitry Andric
7817fa27ce4SDimitry Andric if (SinkCandidate->mayHaveSideEffects())
7827fa27ce4SDimitry Andric return false;
7837fa27ce4SDimitry Andric
7847fa27ce4SDimitry Andric WorkList.push_back(SinkCandidate);
7857fa27ce4SDimitry Andric return true;
7867fa27ce4SDimitry Andric };
7877fa27ce4SDimitry Andric
7887fa27ce4SDimitry Andric // Recursively sink users of FOR after Previous.
7897fa27ce4SDimitry Andric WorkList.push_back(FOR);
7907fa27ce4SDimitry Andric for (unsigned I = 0; I != WorkList.size(); ++I) {
7917fa27ce4SDimitry Andric VPRecipeBase *Current = WorkList[I];
7927fa27ce4SDimitry Andric assert(Current->getNumDefinedValues() == 1 &&
7937fa27ce4SDimitry Andric "only recipes with a single defined value expected");
7947fa27ce4SDimitry Andric
7957fa27ce4SDimitry Andric for (VPUser *User : Current->getVPSingleValue()->users()) {
7967fa27ce4SDimitry Andric if (auto *R = dyn_cast<VPRecipeBase>(User))
7977fa27ce4SDimitry Andric if (!TryToPushSinkCandidate(R))
7987fa27ce4SDimitry Andric return false;
7997fa27ce4SDimitry Andric }
8007fa27ce4SDimitry Andric }
8017fa27ce4SDimitry Andric
8027fa27ce4SDimitry Andric // Keep recipes to sink ordered by dominance so earlier instructions are
8037fa27ce4SDimitry Andric // processed first.
8047fa27ce4SDimitry Andric sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
8057fa27ce4SDimitry Andric return properlyDominates(A, B, VPDT);
8067fa27ce4SDimitry Andric });
8077fa27ce4SDimitry Andric
8087fa27ce4SDimitry Andric for (VPRecipeBase *SinkCandidate : WorkList) {
8097fa27ce4SDimitry Andric if (SinkCandidate == FOR)
8107fa27ce4SDimitry Andric continue;
8117fa27ce4SDimitry Andric
8127fa27ce4SDimitry Andric SinkCandidate->moveAfter(Previous);
8137fa27ce4SDimitry Andric Previous = SinkCandidate;
8147fa27ce4SDimitry Andric }
8157fa27ce4SDimitry Andric return true;
8167fa27ce4SDimitry Andric }
8177fa27ce4SDimitry Andric
adjustFixedOrderRecurrences(VPlan & Plan,VPBuilder & LoopBuilder)8187fa27ce4SDimitry Andric bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
819ac9a064cSDimitry Andric VPBuilder &LoopBuilder) {
8207fa27ce4SDimitry Andric VPDominatorTree VPDT;
8217fa27ce4SDimitry Andric VPDT.recalculate(Plan);
8227fa27ce4SDimitry Andric
8237fa27ce4SDimitry Andric SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis;
8247fa27ce4SDimitry Andric for (VPRecipeBase &R :
8257fa27ce4SDimitry Andric Plan.getVectorLoopRegion()->getEntry()->getEntryBasicBlock()->phis())
8267fa27ce4SDimitry Andric if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
8277fa27ce4SDimitry Andric RecurrencePhis.push_back(FOR);
8287fa27ce4SDimitry Andric
829ac9a064cSDimitry Andric VPBasicBlock *MiddleVPBB =
830ac9a064cSDimitry Andric cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
831ac9a064cSDimitry Andric VPBuilder MiddleBuilder;
832ac9a064cSDimitry Andric // Set insert point so new recipes are inserted before terminator and
833ac9a064cSDimitry Andric // condition, if there is either the former or both.
834ac9a064cSDimitry Andric if (auto *Term =
835ac9a064cSDimitry Andric dyn_cast_or_null<VPInstruction>(MiddleVPBB->getTerminator())) {
836ac9a064cSDimitry Andric if (auto *Cmp = dyn_cast<VPInstruction>(Term->getOperand(0)))
837ac9a064cSDimitry Andric MiddleBuilder.setInsertPoint(Cmp);
838ac9a064cSDimitry Andric else
839ac9a064cSDimitry Andric MiddleBuilder.setInsertPoint(Term);
840ac9a064cSDimitry Andric } else
841ac9a064cSDimitry Andric MiddleBuilder.setInsertPoint(MiddleVPBB);
842ac9a064cSDimitry Andric
8437fa27ce4SDimitry Andric for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) {
8447fa27ce4SDimitry Andric SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis;
8457fa27ce4SDimitry Andric VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
8467fa27ce4SDimitry Andric // Fixed-order recurrences do not contain cycles, so this loop is guaranteed
8477fa27ce4SDimitry Andric // to terminate.
8487fa27ce4SDimitry Andric while (auto *PrevPhi =
8497fa27ce4SDimitry Andric dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) {
8507fa27ce4SDimitry Andric assert(PrevPhi->getParent() == FOR->getParent());
8517fa27ce4SDimitry Andric assert(SeenPhis.insert(PrevPhi).second);
8527fa27ce4SDimitry Andric Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
8537fa27ce4SDimitry Andric }
8547fa27ce4SDimitry Andric
8557fa27ce4SDimitry Andric if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
8567fa27ce4SDimitry Andric return false;
8577fa27ce4SDimitry Andric
8587fa27ce4SDimitry Andric // Introduce a recipe to combine the incoming and previous values of a
8597fa27ce4SDimitry Andric // fixed-order recurrence.
8607fa27ce4SDimitry Andric VPBasicBlock *InsertBlock = Previous->getParent();
8617fa27ce4SDimitry Andric if (isa<VPHeaderPHIRecipe>(Previous))
862ac9a064cSDimitry Andric LoopBuilder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());
8637fa27ce4SDimitry Andric else
864ac9a064cSDimitry Andric LoopBuilder.setInsertPoint(InsertBlock,
865ac9a064cSDimitry Andric std::next(Previous->getIterator()));
8667fa27ce4SDimitry Andric
8677fa27ce4SDimitry Andric auto *RecurSplice = cast<VPInstruction>(
868ac9a064cSDimitry Andric LoopBuilder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice,
8697fa27ce4SDimitry Andric {FOR, FOR->getBackedgeValue()}));
8707fa27ce4SDimitry Andric
8717fa27ce4SDimitry Andric FOR->replaceAllUsesWith(RecurSplice);
8727fa27ce4SDimitry Andric // Set the first operand of RecurSplice to FOR again, after replacing
8737fa27ce4SDimitry Andric // all users.
8747fa27ce4SDimitry Andric RecurSplice->setOperand(0, FOR);
875ac9a064cSDimitry Andric
876ac9a064cSDimitry Andric // This is the second phase of vectorizing first-order recurrences. An
877ac9a064cSDimitry Andric // overview of the transformation is described below. Suppose we have the
878ac9a064cSDimitry Andric // following loop with some use after the loop of the last a[i-1],
879ac9a064cSDimitry Andric //
880ac9a064cSDimitry Andric // for (int i = 0; i < n; ++i) {
881ac9a064cSDimitry Andric // t = a[i - 1];
882ac9a064cSDimitry Andric // b[i] = a[i] - t;
883ac9a064cSDimitry Andric // }
884ac9a064cSDimitry Andric // use t;
885ac9a064cSDimitry Andric //
886ac9a064cSDimitry Andric // There is a first-order recurrence on "a". For this loop, the shorthand
887ac9a064cSDimitry Andric // scalar IR looks like:
888ac9a064cSDimitry Andric //
889ac9a064cSDimitry Andric // scalar.ph:
890ac9a064cSDimitry Andric // s_init = a[-1]
891ac9a064cSDimitry Andric // br scalar.body
892ac9a064cSDimitry Andric //
893ac9a064cSDimitry Andric // scalar.body:
894ac9a064cSDimitry Andric // i = phi [0, scalar.ph], [i+1, scalar.body]
895ac9a064cSDimitry Andric // s1 = phi [s_init, scalar.ph], [s2, scalar.body]
896ac9a064cSDimitry Andric // s2 = a[i]
897ac9a064cSDimitry Andric // b[i] = s2 - s1
898ac9a064cSDimitry Andric // br cond, scalar.body, exit.block
899ac9a064cSDimitry Andric //
900ac9a064cSDimitry Andric // exit.block:
901ac9a064cSDimitry Andric // use = lcssa.phi [s1, scalar.body]
902ac9a064cSDimitry Andric //
903ac9a064cSDimitry Andric // In this example, s1 is a recurrence because it's value depends on the
904ac9a064cSDimitry Andric // previous iteration. In the first phase of vectorization, we created a
905ac9a064cSDimitry Andric // vector phi v1 for s1. We now complete the vectorization and produce the
906ac9a064cSDimitry Andric // shorthand vector IR shown below (for VF = 4, UF = 1).
907ac9a064cSDimitry Andric //
908ac9a064cSDimitry Andric // vector.ph:
909ac9a064cSDimitry Andric // v_init = vector(..., ..., ..., a[-1])
910ac9a064cSDimitry Andric // br vector.body
911ac9a064cSDimitry Andric //
912ac9a064cSDimitry Andric // vector.body
913ac9a064cSDimitry Andric // i = phi [0, vector.ph], [i+4, vector.body]
914ac9a064cSDimitry Andric // v1 = phi [v_init, vector.ph], [v2, vector.body]
915ac9a064cSDimitry Andric // v2 = a[i, i+1, i+2, i+3];
916ac9a064cSDimitry Andric // v3 = vector(v1(3), v2(0, 1, 2))
917ac9a064cSDimitry Andric // b[i, i+1, i+2, i+3] = v2 - v3
918ac9a064cSDimitry Andric // br cond, vector.body, middle.block
919ac9a064cSDimitry Andric //
920ac9a064cSDimitry Andric // middle.block:
921ac9a064cSDimitry Andric // s_penultimate = v2(2) = v3(3)
922ac9a064cSDimitry Andric // s_resume = v2(3)
923ac9a064cSDimitry Andric // br cond, scalar.ph, exit.block
924ac9a064cSDimitry Andric //
925ac9a064cSDimitry Andric // scalar.ph:
926ac9a064cSDimitry Andric // s_init' = phi [s_resume, middle.block], [s_init, otherwise]
927ac9a064cSDimitry Andric // br scalar.body
928ac9a064cSDimitry Andric //
929ac9a064cSDimitry Andric // scalar.body:
930ac9a064cSDimitry Andric // i = phi [0, scalar.ph], [i+1, scalar.body]
931ac9a064cSDimitry Andric // s1 = phi [s_init', scalar.ph], [s2, scalar.body]
932ac9a064cSDimitry Andric // s2 = a[i]
933ac9a064cSDimitry Andric // b[i] = s2 - s1
934ac9a064cSDimitry Andric // br cond, scalar.body, exit.block
935ac9a064cSDimitry Andric //
936ac9a064cSDimitry Andric // exit.block:
937ac9a064cSDimitry Andric // lo = lcssa.phi [s1, scalar.body], [s.penultimate, middle.block]
938ac9a064cSDimitry Andric //
939ac9a064cSDimitry Andric // After execution completes the vector loop, we extract the next value of
940ac9a064cSDimitry Andric // the recurrence (x) to use as the initial value in the scalar loop. This
941ac9a064cSDimitry Andric // is modeled by ExtractFromEnd.
942ac9a064cSDimitry Andric Type *IntTy = Plan.getCanonicalIV()->getScalarType();
943ac9a064cSDimitry Andric
944ac9a064cSDimitry Andric // Extract the penultimate value of the recurrence and update VPLiveOut
945ac9a064cSDimitry Andric // users of the recurrence splice. Note that the extract of the final value
946ac9a064cSDimitry Andric // used to resume in the scalar loop is created earlier during VPlan
947ac9a064cSDimitry Andric // construction.
948ac9a064cSDimitry Andric auto *Penultimate = cast<VPInstruction>(MiddleBuilder.createNaryOp(
949ac9a064cSDimitry Andric VPInstruction::ExtractFromEnd,
950ac9a064cSDimitry Andric {FOR->getBackedgeValue(),
951ac9a064cSDimitry Andric Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 2))},
952ac9a064cSDimitry Andric {}, "vector.recur.extract.for.phi"));
953ac9a064cSDimitry Andric RecurSplice->replaceUsesWithIf(
954ac9a064cSDimitry Andric Penultimate, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });
9557fa27ce4SDimitry Andric }
9567fa27ce4SDimitry Andric return true;
9577fa27ce4SDimitry Andric }
9587fa27ce4SDimitry Andric
collectUsersRecursively(VPValue * V)959ac9a064cSDimitry Andric static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
960ac9a064cSDimitry Andric SetVector<VPUser *> Users(V->user_begin(), V->user_end());
961ac9a064cSDimitry Andric for (unsigned I = 0; I != Users.size(); ++I) {
962ac9a064cSDimitry Andric VPRecipeBase *Cur = dyn_cast<VPRecipeBase>(Users[I]);
963ac9a064cSDimitry Andric if (!Cur || isa<VPHeaderPHIRecipe>(Cur))
964ac9a064cSDimitry Andric continue;
965ac9a064cSDimitry Andric for (VPValue *V : Cur->definedValues())
966ac9a064cSDimitry Andric Users.insert(V->user_begin(), V->user_end());
967ac9a064cSDimitry Andric }
968ac9a064cSDimitry Andric return Users.takeVector();
969ac9a064cSDimitry Andric }
970ac9a064cSDimitry Andric
clearReductionWrapFlags(VPlan & Plan)9717fa27ce4SDimitry Andric void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
9727fa27ce4SDimitry Andric for (VPRecipeBase &R :
9737fa27ce4SDimitry Andric Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
9747fa27ce4SDimitry Andric auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9757fa27ce4SDimitry Andric if (!PhiR)
9767fa27ce4SDimitry Andric continue;
9777fa27ce4SDimitry Andric const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
9787fa27ce4SDimitry Andric RecurKind RK = RdxDesc.getRecurrenceKind();
9797fa27ce4SDimitry Andric if (RK != RecurKind::Add && RK != RecurKind::Mul)
9807fa27ce4SDimitry Andric continue;
9817fa27ce4SDimitry Andric
982ac9a064cSDimitry Andric for (VPUser *U : collectUsersRecursively(PhiR))
983ac9a064cSDimitry Andric if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(U)) {
9847fa27ce4SDimitry Andric RecWithFlags->dropPoisonGeneratingFlags();
9857fa27ce4SDimitry Andric }
9867fa27ce4SDimitry Andric }
9877fa27ce4SDimitry Andric }
988b1c73532SDimitry Andric
989b1c73532SDimitry Andric /// Try to simplify recipe \p R.
simplifyRecipe(VPRecipeBase & R,VPTypeAnalysis & TypeInfo)990b1c73532SDimitry Andric static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
991ac9a064cSDimitry Andric using namespace llvm::VPlanPatternMatch;
992ac9a064cSDimitry Andric // Try to remove redundant blend recipes.
993ac9a064cSDimitry Andric if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
994ac9a064cSDimitry Andric VPValue *Inc0 = Blend->getIncomingValue(0);
995ac9a064cSDimitry Andric for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
996ac9a064cSDimitry Andric if (Inc0 != Blend->getIncomingValue(I) &&
997ac9a064cSDimitry Andric !match(Blend->getMask(I), m_False()))
998ac9a064cSDimitry Andric return;
999ac9a064cSDimitry Andric Blend->replaceAllUsesWith(Inc0);
1000ac9a064cSDimitry Andric Blend->eraseFromParent();
1001ac9a064cSDimitry Andric return;
1002b1c73532SDimitry Andric }
1003ac9a064cSDimitry Andric
1004ac9a064cSDimitry Andric VPValue *A;
1005ac9a064cSDimitry Andric if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
1006b1c73532SDimitry Andric VPValue *Trunc = R.getVPSingleValue();
1007b1c73532SDimitry Andric Type *TruncTy = TypeInfo.inferScalarType(Trunc);
1008b1c73532SDimitry Andric Type *ATy = TypeInfo.inferScalarType(A);
1009b1c73532SDimitry Andric if (TruncTy == ATy) {
1010b1c73532SDimitry Andric Trunc->replaceAllUsesWith(A);
1011aca2e42cSDimitry Andric } else {
1012aca2e42cSDimitry Andric // Don't replace a scalarizing recipe with a widened cast.
1013aca2e42cSDimitry Andric if (isa<VPReplicateRecipe>(&R))
1014ac9a064cSDimitry Andric return;
1015aca2e42cSDimitry Andric if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
1016ac9a064cSDimitry Andric
1017ac9a064cSDimitry Andric unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
1018ac9a064cSDimitry Andric ? Instruction::SExt
1019ac9a064cSDimitry Andric : Instruction::ZExt;
1020b1c73532SDimitry Andric auto *VPC =
1021b1c73532SDimitry Andric new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
1022ac9a064cSDimitry Andric if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
1023ac9a064cSDimitry Andric // UnderlyingExt has distinct return type, used to retain legacy cost.
1024ac9a064cSDimitry Andric VPC->setUnderlyingValue(UnderlyingExt);
1025ac9a064cSDimitry Andric }
1026b1c73532SDimitry Andric VPC->insertBefore(&R);
1027b1c73532SDimitry Andric Trunc->replaceAllUsesWith(VPC);
1028b1c73532SDimitry Andric } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
1029b1c73532SDimitry Andric auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
1030b1c73532SDimitry Andric VPC->insertBefore(&R);
1031b1c73532SDimitry Andric Trunc->replaceAllUsesWith(VPC);
1032b1c73532SDimitry Andric }
1033aca2e42cSDimitry Andric }
1034b1c73532SDimitry Andric #ifndef NDEBUG
1035b1c73532SDimitry Andric // Verify that the cached type info is for both A and its users is still
1036b1c73532SDimitry Andric // accurate by comparing it to freshly computed types.
1037ac9a064cSDimitry Andric VPTypeAnalysis TypeInfo2(
1038ac9a064cSDimitry Andric R.getParent()->getPlan()->getCanonicalIV()->getScalarType(),
1039ac9a064cSDimitry Andric TypeInfo.getContext());
1040b1c73532SDimitry Andric assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
1041b1c73532SDimitry Andric for (VPUser *U : A->users()) {
1042b1c73532SDimitry Andric auto *R = dyn_cast<VPRecipeBase>(U);
1043b1c73532SDimitry Andric if (!R)
1044b1c73532SDimitry Andric continue;
1045b1c73532SDimitry Andric for (VPValue *VPV : R->definedValues())
1046b1c73532SDimitry Andric assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
1047b1c73532SDimitry Andric }
1048b1c73532SDimitry Andric #endif
1049b1c73532SDimitry Andric }
1050ac9a064cSDimitry Andric
1051ac9a064cSDimitry Andric // Simplify (X && Y) || (X && !Y) -> X.
1052ac9a064cSDimitry Andric // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
1053ac9a064cSDimitry Andric // && (Y || Z) and (X || !X) into true. This requires queuing newly created
1054ac9a064cSDimitry Andric // recipes to be visited during simplification.
1055ac9a064cSDimitry Andric VPValue *X, *Y, *X1, *Y1;
1056ac9a064cSDimitry Andric if (match(&R,
1057ac9a064cSDimitry Andric m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1058ac9a064cSDimitry Andric m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
1059ac9a064cSDimitry Andric X == X1 && Y == Y1) {
1060ac9a064cSDimitry Andric R.getVPSingleValue()->replaceAllUsesWith(X);
1061ac9a064cSDimitry Andric return;
1062b1c73532SDimitry Andric }
1063ac9a064cSDimitry Andric
1064ac9a064cSDimitry Andric if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
1065ac9a064cSDimitry Andric return R.getVPSingleValue()->replaceAllUsesWith(A);
1066b1c73532SDimitry Andric }
1067b1c73532SDimitry Andric
1068b1c73532SDimitry Andric /// Try to simplify the recipes in \p Plan.
simplifyRecipes(VPlan & Plan,LLVMContext & Ctx)1069b1c73532SDimitry Andric static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
1070b1c73532SDimitry Andric ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
1071b1c73532SDimitry Andric Plan.getEntry());
1072ac9a064cSDimitry Andric VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
1073b1c73532SDimitry Andric for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
1074b1c73532SDimitry Andric for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1075b1c73532SDimitry Andric simplifyRecipe(R, TypeInfo);
1076b1c73532SDimitry Andric }
1077b1c73532SDimitry Andric }
1078b1c73532SDimitry Andric }
1079b1c73532SDimitry Andric
truncateToMinimalBitwidths(VPlan & Plan,const MapVector<Instruction *,uint64_t> & MinBWs,LLVMContext & Ctx)1080b1c73532SDimitry Andric void VPlanTransforms::truncateToMinimalBitwidths(
1081b1c73532SDimitry Andric VPlan &Plan, const MapVector<Instruction *, uint64_t> &MinBWs,
1082b1c73532SDimitry Andric LLVMContext &Ctx) {
1083b1c73532SDimitry Andric #ifndef NDEBUG
1084b1c73532SDimitry Andric // Count the processed recipes and cross check the count later with MinBWs
1085b1c73532SDimitry Andric // size, to make sure all entries in MinBWs have been handled.
1086b1c73532SDimitry Andric unsigned NumProcessedRecipes = 0;
1087b1c73532SDimitry Andric #endif
1088b1c73532SDimitry Andric // Keep track of created truncates, so they can be re-used. Note that we
1089b1c73532SDimitry Andric // cannot use RAUW after creating a new truncate, as this would could make
1090b1c73532SDimitry Andric // other uses have different types for their operands, making them invalidly
1091b1c73532SDimitry Andric // typed.
1092b1c73532SDimitry Andric DenseMap<VPValue *, VPWidenCastRecipe *> ProcessedTruncs;
1093ac9a064cSDimitry Andric VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
1094b1c73532SDimitry Andric VPBasicBlock *PH = Plan.getEntry();
1095b1c73532SDimitry Andric for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1096b1c73532SDimitry Andric vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
1097b1c73532SDimitry Andric for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1098b1c73532SDimitry Andric if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
1099ac9a064cSDimitry Andric VPWidenSelectRecipe, VPWidenLoadRecipe>(&R))
1100b1c73532SDimitry Andric continue;
1101b1c73532SDimitry Andric
1102b1c73532SDimitry Andric VPValue *ResultVPV = R.getVPSingleValue();
1103b1c73532SDimitry Andric auto *UI = cast_or_null<Instruction>(ResultVPV->getUnderlyingValue());
1104b1c73532SDimitry Andric unsigned NewResSizeInBits = MinBWs.lookup(UI);
1105b1c73532SDimitry Andric if (!NewResSizeInBits)
1106b1c73532SDimitry Andric continue;
1107b1c73532SDimitry Andric
1108b1c73532SDimitry Andric #ifndef NDEBUG
1109b1c73532SDimitry Andric NumProcessedRecipes++;
1110b1c73532SDimitry Andric #endif
1111b1c73532SDimitry Andric // If the value wasn't vectorized, we must maintain the original scalar
1112b1c73532SDimitry Andric // type. Skip those here, after incrementing NumProcessedRecipes. Also
1113b1c73532SDimitry Andric // skip casts which do not need to be handled explicitly here, as
1114b1c73532SDimitry Andric // redundant casts will be removed during recipe simplification.
1115b1c73532SDimitry Andric if (isa<VPReplicateRecipe, VPWidenCastRecipe>(&R)) {
1116b1c73532SDimitry Andric #ifndef NDEBUG
1117b1c73532SDimitry Andric // If any of the operands is a live-in and not used by VPWidenRecipe or
1118b1c73532SDimitry Andric // VPWidenSelectRecipe, but in MinBWs, make sure it is counted as
1119b1c73532SDimitry Andric // processed as well. When MinBWs is currently constructed, there is no
1120b1c73532SDimitry Andric // information about whether recipes are widened or replicated and in
1121b1c73532SDimitry Andric // case they are reciplicated the operands are not truncated. Counting
1122b1c73532SDimitry Andric // them them here ensures we do not miss any recipes in MinBWs.
1123b1c73532SDimitry Andric // TODO: Remove once the analysis is done on VPlan.
1124b1c73532SDimitry Andric for (VPValue *Op : R.operands()) {
1125b1c73532SDimitry Andric if (!Op->isLiveIn())
1126b1c73532SDimitry Andric continue;
1127b1c73532SDimitry Andric auto *UV = dyn_cast_or_null<Instruction>(Op->getUnderlyingValue());
1128b1c73532SDimitry Andric if (UV && MinBWs.contains(UV) && !ProcessedTruncs.contains(Op) &&
1129b1c73532SDimitry Andric all_of(Op->users(), [](VPUser *U) {
1130b1c73532SDimitry Andric return !isa<VPWidenRecipe, VPWidenSelectRecipe>(U);
1131b1c73532SDimitry Andric })) {
1132b1c73532SDimitry Andric // Add an entry to ProcessedTruncs to avoid counting the same
1133b1c73532SDimitry Andric // operand multiple times.
1134b1c73532SDimitry Andric ProcessedTruncs[Op] = nullptr;
1135b1c73532SDimitry Andric NumProcessedRecipes += 1;
1136b1c73532SDimitry Andric }
1137b1c73532SDimitry Andric }
1138b1c73532SDimitry Andric #endif
1139b1c73532SDimitry Andric continue;
1140b1c73532SDimitry Andric }
1141b1c73532SDimitry Andric
1142b1c73532SDimitry Andric Type *OldResTy = TypeInfo.inferScalarType(ResultVPV);
1143b1c73532SDimitry Andric unsigned OldResSizeInBits = OldResTy->getScalarSizeInBits();
1144b1c73532SDimitry Andric assert(OldResTy->isIntegerTy() && "only integer types supported");
1145b1c73532SDimitry Andric (void)OldResSizeInBits;
1146b1c73532SDimitry Andric
1147b1c73532SDimitry Andric auto *NewResTy = IntegerType::get(Ctx, NewResSizeInBits);
1148b1c73532SDimitry Andric
11494df029ccSDimitry Andric // Any wrapping introduced by shrinking this operation shouldn't be
11504df029ccSDimitry Andric // considered undefined behavior. So, we can't unconditionally copy
11514df029ccSDimitry Andric // arithmetic wrapping flags to VPW.
11524df029ccSDimitry Andric if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R))
11534df029ccSDimitry Andric VPW->dropPoisonGeneratingFlags();
11544df029ccSDimitry Andric
1155ac9a064cSDimitry Andric using namespace llvm::VPlanPatternMatch;
1156ac9a064cSDimitry Andric if (OldResSizeInBits != NewResSizeInBits &&
1157ac9a064cSDimitry Andric !match(&R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue()))) {
11584df029ccSDimitry Andric // Extend result to original width.
1159ac9a064cSDimitry Andric auto *Ext =
1160ac9a064cSDimitry Andric new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy);
11614df029ccSDimitry Andric Ext->insertAfter(&R);
11624df029ccSDimitry Andric ResultVPV->replaceAllUsesWith(Ext);
11634df029ccSDimitry Andric Ext->setOperand(0, ResultVPV);
1164ac9a064cSDimitry Andric assert(OldResSizeInBits > NewResSizeInBits && "Nothing to shrink?");
1165ac9a064cSDimitry Andric } else
1166ac9a064cSDimitry Andric assert(
1167ac9a064cSDimitry Andric match(&R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue())) &&
1168ac9a064cSDimitry Andric "Only ICmps should not need extending the result.");
11694df029ccSDimitry Andric
1170ac9a064cSDimitry Andric assert(!isa<VPWidenStoreRecipe>(&R) && "stores cannot be narrowed");
1171ac9a064cSDimitry Andric if (isa<VPWidenLoadRecipe>(&R))
11724df029ccSDimitry Andric continue;
11734df029ccSDimitry Andric
1174b1c73532SDimitry Andric // Shrink operands by introducing truncates as needed.
1175b1c73532SDimitry Andric unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0;
1176b1c73532SDimitry Andric for (unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
1177b1c73532SDimitry Andric auto *Op = R.getOperand(Idx);
1178b1c73532SDimitry Andric unsigned OpSizeInBits =
1179b1c73532SDimitry Andric TypeInfo.inferScalarType(Op)->getScalarSizeInBits();
1180b1c73532SDimitry Andric if (OpSizeInBits == NewResSizeInBits)
1181b1c73532SDimitry Andric continue;
1182b1c73532SDimitry Andric assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate");
1183b1c73532SDimitry Andric auto [ProcessedIter, IterIsEmpty] =
1184b1c73532SDimitry Andric ProcessedTruncs.insert({Op, nullptr});
1185b1c73532SDimitry Andric VPWidenCastRecipe *NewOp =
1186b1c73532SDimitry Andric IterIsEmpty
1187b1c73532SDimitry Andric ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy)
1188b1c73532SDimitry Andric : ProcessedIter->second;
1189b1c73532SDimitry Andric R.setOperand(Idx, NewOp);
1190b1c73532SDimitry Andric if (!IterIsEmpty)
1191b1c73532SDimitry Andric continue;
1192b1c73532SDimitry Andric ProcessedIter->second = NewOp;
1193b1c73532SDimitry Andric if (!Op->isLiveIn()) {
1194b1c73532SDimitry Andric NewOp->insertBefore(&R);
1195b1c73532SDimitry Andric } else {
1196b1c73532SDimitry Andric PH->appendRecipe(NewOp);
1197b1c73532SDimitry Andric #ifndef NDEBUG
1198b1c73532SDimitry Andric auto *OpInst = dyn_cast<Instruction>(Op->getLiveInIRValue());
1199b1c73532SDimitry Andric bool IsContained = MinBWs.contains(OpInst);
1200b1c73532SDimitry Andric NumProcessedRecipes += IsContained;
1201b1c73532SDimitry Andric #endif
1202b1c73532SDimitry Andric }
1203b1c73532SDimitry Andric }
1204b1c73532SDimitry Andric
1205b1c73532SDimitry Andric }
1206b1c73532SDimitry Andric }
1207b1c73532SDimitry Andric
1208b1c73532SDimitry Andric assert(MinBWs.size() == NumProcessedRecipes &&
1209b1c73532SDimitry Andric "some entries in MinBWs haven't been processed");
1210b1c73532SDimitry Andric }
1211b1c73532SDimitry Andric
optimize(VPlan & Plan,ScalarEvolution & SE)1212b1c73532SDimitry Andric void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
1213b1c73532SDimitry Andric removeRedundantCanonicalIVs(Plan);
1214b1c73532SDimitry Andric removeRedundantInductionCasts(Plan);
1215b1c73532SDimitry Andric
1216b1c73532SDimitry Andric simplifyRecipes(Plan, SE.getContext());
1217ac9a064cSDimitry Andric legalizeAndOptimizeInductions(Plan, SE);
1218b1c73532SDimitry Andric removeDeadRecipes(Plan);
1219b1c73532SDimitry Andric
1220b1c73532SDimitry Andric createAndOptimizeReplicateRegions(Plan);
1221b1c73532SDimitry Andric
1222b1c73532SDimitry Andric removeRedundantExpandSCEVRecipes(Plan);
1223b1c73532SDimitry Andric mergeBlocksIntoPredecessors(Plan);
1224b1c73532SDimitry Andric }
1225b1c73532SDimitry Andric
1226b1c73532SDimitry Andric // Add a VPActiveLaneMaskPHIRecipe and related recipes to \p Plan and replace
1227b1c73532SDimitry Andric // the loop terminator with a branch-on-cond recipe with the negated
1228b1c73532SDimitry Andric // active-lane-mask as operand. Note that this turns the loop into an
1229b1c73532SDimitry Andric // uncountable one. Only the existing terminator is replaced, all other existing
1230b1c73532SDimitry Andric // recipes/users remain unchanged, except for poison-generating flags being
1231b1c73532SDimitry Andric // dropped from the canonical IV increment. Return the created
1232b1c73532SDimitry Andric // VPActiveLaneMaskPHIRecipe.
1233b1c73532SDimitry Andric //
1234b1c73532SDimitry Andric // The function uses the following definitions:
1235b1c73532SDimitry Andric //
1236b1c73532SDimitry Andric // %TripCount = DataWithControlFlowWithoutRuntimeCheck ?
1237b1c73532SDimitry Andric // calculate-trip-count-minus-VF (original TC) : original TC
1238b1c73532SDimitry Andric // %IncrementValue = DataWithControlFlowWithoutRuntimeCheck ?
1239b1c73532SDimitry Andric // CanonicalIVPhi : CanonicalIVIncrement
1240b1c73532SDimitry Andric // %StartV is the canonical induction start value.
1241b1c73532SDimitry Andric //
1242b1c73532SDimitry Andric // The function adds the following recipes:
1243b1c73532SDimitry Andric //
1244b1c73532SDimitry Andric // vector.ph:
1245b1c73532SDimitry Andric // %TripCount = calculate-trip-count-minus-VF (original TC)
1246b1c73532SDimitry Andric // [if DataWithControlFlowWithoutRuntimeCheck]
1247b1c73532SDimitry Andric // %EntryInc = canonical-iv-increment-for-part %StartV
1248b1c73532SDimitry Andric // %EntryALM = active-lane-mask %EntryInc, %TripCount
1249b1c73532SDimitry Andric //
1250b1c73532SDimitry Andric // vector.body:
1251b1c73532SDimitry Andric // ...
1252b1c73532SDimitry Andric // %P = active-lane-mask-phi [ %EntryALM, %vector.ph ], [ %ALM, %vector.body ]
1253b1c73532SDimitry Andric // ...
1254b1c73532SDimitry Andric // %InLoopInc = canonical-iv-increment-for-part %IncrementValue
1255b1c73532SDimitry Andric // %ALM = active-lane-mask %InLoopInc, TripCount
1256b1c73532SDimitry Andric // %Negated = Not %ALM
1257b1c73532SDimitry Andric // branch-on-cond %Negated
1258b1c73532SDimitry Andric //
addVPLaneMaskPhiAndUpdateExitBranch(VPlan & Plan,bool DataAndControlFlowWithoutRuntimeCheck)1259b1c73532SDimitry Andric static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
1260b1c73532SDimitry Andric VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) {
1261b1c73532SDimitry Andric VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
1262b1c73532SDimitry Andric VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
1263b1c73532SDimitry Andric auto *CanonicalIVPHI = Plan.getCanonicalIV();
1264b1c73532SDimitry Andric VPValue *StartV = CanonicalIVPHI->getStartValue();
1265b1c73532SDimitry Andric
1266b1c73532SDimitry Andric auto *CanonicalIVIncrement =
1267b1c73532SDimitry Andric cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
1268b1c73532SDimitry Andric // TODO: Check if dropping the flags is needed if
1269b1c73532SDimitry Andric // !DataAndControlFlowWithoutRuntimeCheck.
1270b1c73532SDimitry Andric CanonicalIVIncrement->dropPoisonGeneratingFlags();
1271b1c73532SDimitry Andric DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
1272b1c73532SDimitry Andric // We can't use StartV directly in the ActiveLaneMask VPInstruction, since
1273b1c73532SDimitry Andric // we have to take unrolling into account. Each part needs to start at
1274b1c73532SDimitry Andric // Part * VF
1275b1c73532SDimitry Andric auto *VecPreheader = cast<VPBasicBlock>(TopRegion->getSinglePredecessor());
1276b1c73532SDimitry Andric VPBuilder Builder(VecPreheader);
1277b1c73532SDimitry Andric
1278b1c73532SDimitry Andric // Create the ActiveLaneMask instruction using the correct start values.
1279b1c73532SDimitry Andric VPValue *TC = Plan.getTripCount();
1280b1c73532SDimitry Andric
1281b1c73532SDimitry Andric VPValue *TripCount, *IncrementValue;
1282b1c73532SDimitry Andric if (!DataAndControlFlowWithoutRuntimeCheck) {
1283b1c73532SDimitry Andric // When the loop is guarded by a runtime overflow check for the loop
1284b1c73532SDimitry Andric // induction variable increment by VF, we can increment the value before
1285b1c73532SDimitry Andric // the get.active.lane mask and use the unmodified tripcount.
1286b1c73532SDimitry Andric IncrementValue = CanonicalIVIncrement;
1287b1c73532SDimitry Andric TripCount = TC;
1288b1c73532SDimitry Andric } else {
1289b1c73532SDimitry Andric // When avoiding a runtime check, the active.lane.mask inside the loop
1290b1c73532SDimitry Andric // uses a modified trip count and the induction variable increment is
1291b1c73532SDimitry Andric // done after the active.lane.mask intrinsic is called.
1292b1c73532SDimitry Andric IncrementValue = CanonicalIVPHI;
1293b1c73532SDimitry Andric TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF,
1294b1c73532SDimitry Andric {TC}, DL);
1295b1c73532SDimitry Andric }
1296b1c73532SDimitry Andric auto *EntryIncrement = Builder.createOverflowingOp(
1297b1c73532SDimitry Andric VPInstruction::CanonicalIVIncrementForPart, {StartV}, {false, false}, DL,
1298b1c73532SDimitry Andric "index.part.next");
1299b1c73532SDimitry Andric
1300b1c73532SDimitry Andric // Create the active lane mask instruction in the VPlan preheader.
1301b1c73532SDimitry Andric auto *EntryALM =
1302b1c73532SDimitry Andric Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC},
1303b1c73532SDimitry Andric DL, "active.lane.mask.entry");
1304b1c73532SDimitry Andric
1305b1c73532SDimitry Andric // Now create the ActiveLaneMaskPhi recipe in the main loop using the
1306b1c73532SDimitry Andric // preheader ActiveLaneMask instruction.
1307b1c73532SDimitry Andric auto LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc());
1308b1c73532SDimitry Andric LaneMaskPhi->insertAfter(CanonicalIVPHI);
1309b1c73532SDimitry Andric
1310b1c73532SDimitry Andric // Create the active lane mask for the next iteration of the loop before the
1311b1c73532SDimitry Andric // original terminator.
1312b1c73532SDimitry Andric VPRecipeBase *OriginalTerminator = EB->getTerminator();
1313b1c73532SDimitry Andric Builder.setInsertPoint(OriginalTerminator);
1314b1c73532SDimitry Andric auto *InLoopIncrement =
1315b1c73532SDimitry Andric Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart,
1316b1c73532SDimitry Andric {IncrementValue}, {false, false}, DL);
1317b1c73532SDimitry Andric auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
1318b1c73532SDimitry Andric {InLoopIncrement, TripCount}, DL,
1319b1c73532SDimitry Andric "active.lane.mask.next");
1320b1c73532SDimitry Andric LaneMaskPhi->addOperand(ALM);
1321b1c73532SDimitry Andric
1322b1c73532SDimitry Andric // Replace the original terminator with BranchOnCond. We have to invert the
1323b1c73532SDimitry Andric // mask here because a true condition means jumping to the exit block.
1324b1c73532SDimitry Andric auto *NotMask = Builder.createNot(ALM, DL);
1325b1c73532SDimitry Andric Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL);
1326b1c73532SDimitry Andric OriginalTerminator->eraseFromParent();
1327b1c73532SDimitry Andric return LaneMaskPhi;
1328b1c73532SDimitry Andric }
1329b1c73532SDimitry Andric
1330ac9a064cSDimitry Andric /// Collect all VPValues representing a header mask through the (ICMP_ULE,
1331ac9a064cSDimitry Andric /// WideCanonicalIV, backedge-taken-count) pattern.
1332ac9a064cSDimitry Andric /// TODO: Introduce explicit recipe for header-mask instead of searching
1333ac9a064cSDimitry Andric /// for the header-mask pattern manually.
collectAllHeaderMasks(VPlan & Plan)1334ac9a064cSDimitry Andric static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
1335ac9a064cSDimitry Andric SmallVector<VPValue *> WideCanonicalIVs;
1336ac9a064cSDimitry Andric auto *FoundWidenCanonicalIVUser =
1337ac9a064cSDimitry Andric find_if(Plan.getCanonicalIV()->users(),
1338ac9a064cSDimitry Andric [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
1339ac9a064cSDimitry Andric assert(count_if(Plan.getCanonicalIV()->users(),
1340ac9a064cSDimitry Andric [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }) <=
1341ac9a064cSDimitry Andric 1 &&
1342ac9a064cSDimitry Andric "Must have at most one VPWideCanonicalIVRecipe");
1343ac9a064cSDimitry Andric if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) {
1344ac9a064cSDimitry Andric auto *WideCanonicalIV =
1345ac9a064cSDimitry Andric cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
1346ac9a064cSDimitry Andric WideCanonicalIVs.push_back(WideCanonicalIV);
1347ac9a064cSDimitry Andric }
1348ac9a064cSDimitry Andric
1349ac9a064cSDimitry Andric // Also include VPWidenIntOrFpInductionRecipes that represent a widened
1350ac9a064cSDimitry Andric // version of the canonical induction.
1351ac9a064cSDimitry Andric VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
1352ac9a064cSDimitry Andric for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
1353ac9a064cSDimitry Andric auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
1354ac9a064cSDimitry Andric if (WidenOriginalIV && WidenOriginalIV->isCanonical())
1355ac9a064cSDimitry Andric WideCanonicalIVs.push_back(WidenOriginalIV);
1356ac9a064cSDimitry Andric }
1357ac9a064cSDimitry Andric
1358ac9a064cSDimitry Andric // Walk users of wide canonical IVs and collect to all compares of the form
1359ac9a064cSDimitry Andric // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
1360ac9a064cSDimitry Andric SmallVector<VPValue *> HeaderMasks;
1361ac9a064cSDimitry Andric for (auto *Wide : WideCanonicalIVs) {
1362ac9a064cSDimitry Andric for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
1363ac9a064cSDimitry Andric auto *HeaderMask = dyn_cast<VPInstruction>(U);
1364ac9a064cSDimitry Andric if (!HeaderMask || !vputils::isHeaderMask(HeaderMask, Plan))
1365ac9a064cSDimitry Andric continue;
1366ac9a064cSDimitry Andric
1367ac9a064cSDimitry Andric assert(HeaderMask->getOperand(0) == Wide &&
1368ac9a064cSDimitry Andric "WidenCanonicalIV must be the first operand of the compare");
1369ac9a064cSDimitry Andric HeaderMasks.push_back(HeaderMask);
1370ac9a064cSDimitry Andric }
1371ac9a064cSDimitry Andric }
1372ac9a064cSDimitry Andric return HeaderMasks;
1373ac9a064cSDimitry Andric }
1374ac9a064cSDimitry Andric
addActiveLaneMask(VPlan & Plan,bool UseActiveLaneMaskForControlFlow,bool DataAndControlFlowWithoutRuntimeCheck)1375b1c73532SDimitry Andric void VPlanTransforms::addActiveLaneMask(
1376b1c73532SDimitry Andric VPlan &Plan, bool UseActiveLaneMaskForControlFlow,
1377b1c73532SDimitry Andric bool DataAndControlFlowWithoutRuntimeCheck) {
1378b1c73532SDimitry Andric assert((!DataAndControlFlowWithoutRuntimeCheck ||
1379b1c73532SDimitry Andric UseActiveLaneMaskForControlFlow) &&
1380b1c73532SDimitry Andric "DataAndControlFlowWithoutRuntimeCheck implies "
1381b1c73532SDimitry Andric "UseActiveLaneMaskForControlFlow");
1382b1c73532SDimitry Andric
1383b1c73532SDimitry Andric auto FoundWidenCanonicalIVUser =
1384b1c73532SDimitry Andric find_if(Plan.getCanonicalIV()->users(),
1385b1c73532SDimitry Andric [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
1386b1c73532SDimitry Andric assert(FoundWidenCanonicalIVUser &&
1387b1c73532SDimitry Andric "Must have widened canonical IV when tail folding!");
1388b1c73532SDimitry Andric auto *WideCanonicalIV =
1389b1c73532SDimitry Andric cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
13904df029ccSDimitry Andric VPSingleDefRecipe *LaneMask;
1391b1c73532SDimitry Andric if (UseActiveLaneMaskForControlFlow) {
1392b1c73532SDimitry Andric LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(
1393b1c73532SDimitry Andric Plan, DataAndControlFlowWithoutRuntimeCheck);
1394b1c73532SDimitry Andric } else {
1395ac9a064cSDimitry Andric VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
1396ac9a064cSDimitry Andric LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask,
1397ac9a064cSDimitry Andric {WideCanonicalIV, Plan.getTripCount()}, nullptr,
1398ac9a064cSDimitry Andric "active.lane.mask");
1399b1c73532SDimitry Andric }
1400b1c73532SDimitry Andric
1401b1c73532SDimitry Andric // Walk users of WideCanonicalIV and replace all compares of the form
1402b1c73532SDimitry Andric // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
1403b1c73532SDimitry Andric // active-lane-mask.
1404ac9a064cSDimitry Andric for (VPValue *HeaderMask : collectAllHeaderMasks(Plan))
1405ac9a064cSDimitry Andric HeaderMask->replaceAllUsesWith(LaneMask);
1406ac9a064cSDimitry Andric }
1407ac9a064cSDimitry Andric
1408ac9a064cSDimitry Andric /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
1409ac9a064cSDimitry Andric /// replaces all uses except the canonical IV increment of
1410ac9a064cSDimitry Andric /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
1411ac9a064cSDimitry Andric /// is used only for loop iterations counting after this transformation.
1412ac9a064cSDimitry Andric ///
1413ac9a064cSDimitry Andric /// The function uses the following definitions:
1414ac9a064cSDimitry Andric /// %StartV is the canonical induction start value.
1415ac9a064cSDimitry Andric ///
1416ac9a064cSDimitry Andric /// The function adds the following recipes:
1417ac9a064cSDimitry Andric ///
1418ac9a064cSDimitry Andric /// vector.ph:
1419ac9a064cSDimitry Andric /// ...
1420ac9a064cSDimitry Andric ///
1421ac9a064cSDimitry Andric /// vector.body:
1422ac9a064cSDimitry Andric /// ...
1423ac9a064cSDimitry Andric /// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ],
1424ac9a064cSDimitry Andric /// [ %NextEVLIV, %vector.body ]
1425ac9a064cSDimitry Andric /// %VPEVL = EXPLICIT-VECTOR-LENGTH %EVLPhi, original TC
1426ac9a064cSDimitry Andric /// ...
1427ac9a064cSDimitry Andric /// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi
1428ac9a064cSDimitry Andric /// ...
1429ac9a064cSDimitry Andric ///
tryAddExplicitVectorLength(VPlan & Plan)1430ac9a064cSDimitry Andric bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
1431ac9a064cSDimitry Andric VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
1432ac9a064cSDimitry Andric // The transform updates all users of inductions to work based on EVL, instead
1433ac9a064cSDimitry Andric // of the VF directly. At the moment, widened inductions cannot be updated, so
1434ac9a064cSDimitry Andric // bail out if the plan contains any.
1435ac9a064cSDimitry Andric bool ContainsWidenInductions = any_of(Header->phis(), [](VPRecipeBase &Phi) {
1436ac9a064cSDimitry Andric return isa<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>(
1437ac9a064cSDimitry Andric &Phi);
1438ac9a064cSDimitry Andric });
1439ac9a064cSDimitry Andric // FIXME: Remove this once we can transform (select header_mask, true_value,
1440ac9a064cSDimitry Andric // false_value) into vp.merge.
1441ac9a064cSDimitry Andric bool ContainsOutloopReductions =
1442ac9a064cSDimitry Andric any_of(Header->phis(), [&](VPRecipeBase &Phi) {
1443ac9a064cSDimitry Andric auto *R = dyn_cast<VPReductionPHIRecipe>(&Phi);
1444ac9a064cSDimitry Andric return R && !R->isInLoop();
1445ac9a064cSDimitry Andric });
1446ac9a064cSDimitry Andric if (ContainsWidenInductions || ContainsOutloopReductions)
1447ac9a064cSDimitry Andric return false;
1448ac9a064cSDimitry Andric
1449ac9a064cSDimitry Andric auto *CanonicalIVPHI = Plan.getCanonicalIV();
1450ac9a064cSDimitry Andric VPValue *StartV = CanonicalIVPHI->getStartValue();
1451ac9a064cSDimitry Andric
1452ac9a064cSDimitry Andric // Create the ExplicitVectorLengthPhi recipe in the main loop.
1453ac9a064cSDimitry Andric auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());
1454ac9a064cSDimitry Andric EVLPhi->insertAfter(CanonicalIVPHI);
1455ac9a064cSDimitry Andric auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength,
1456ac9a064cSDimitry Andric {EVLPhi, Plan.getTripCount()});
1457ac9a064cSDimitry Andric VPEVL->insertBefore(*Header, Header->getFirstNonPhi());
1458ac9a064cSDimitry Andric
1459ac9a064cSDimitry Andric auto *CanonicalIVIncrement =
1460ac9a064cSDimitry Andric cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
1461ac9a064cSDimitry Andric VPSingleDefRecipe *OpVPEVL = VPEVL;
1462ac9a064cSDimitry Andric if (unsigned IVSize = CanonicalIVPHI->getScalarType()->getScalarSizeInBits();
1463ac9a064cSDimitry Andric IVSize != 32) {
1464ac9a064cSDimitry Andric OpVPEVL = new VPScalarCastRecipe(IVSize < 32 ? Instruction::Trunc
1465ac9a064cSDimitry Andric : Instruction::ZExt,
1466ac9a064cSDimitry Andric OpVPEVL, CanonicalIVPHI->getScalarType());
1467ac9a064cSDimitry Andric OpVPEVL->insertBefore(CanonicalIVIncrement);
1468ac9a064cSDimitry Andric }
1469ac9a064cSDimitry Andric auto *NextEVLIV =
1470ac9a064cSDimitry Andric new VPInstruction(Instruction::Add, {OpVPEVL, EVLPhi},
1471ac9a064cSDimitry Andric {CanonicalIVIncrement->hasNoUnsignedWrap(),
1472ac9a064cSDimitry Andric CanonicalIVIncrement->hasNoSignedWrap()},
1473ac9a064cSDimitry Andric CanonicalIVIncrement->getDebugLoc(), "index.evl.next");
1474ac9a064cSDimitry Andric NextEVLIV->insertBefore(CanonicalIVIncrement);
1475ac9a064cSDimitry Andric EVLPhi->addOperand(NextEVLIV);
1476ac9a064cSDimitry Andric
1477ac9a064cSDimitry Andric for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
1478ac9a064cSDimitry Andric for (VPUser *U : collectUsersRecursively(HeaderMask)) {
1479ac9a064cSDimitry Andric VPRecipeBase *NewRecipe = nullptr;
1480ac9a064cSDimitry Andric auto *CurRecipe = dyn_cast<VPRecipeBase>(U);
1481ac9a064cSDimitry Andric if (!CurRecipe)
1482b1c73532SDimitry Andric continue;
1483b1c73532SDimitry Andric
1484ac9a064cSDimitry Andric auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
1485ac9a064cSDimitry Andric assert(OrigMask && "Unmasked recipe when folding tail");
1486ac9a064cSDimitry Andric return HeaderMask == OrigMask ? nullptr : OrigMask;
1487ac9a064cSDimitry Andric };
1488ac9a064cSDimitry Andric if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(CurRecipe)) {
1489ac9a064cSDimitry Andric VPValue *NewMask = GetNewMask(MemR->getMask());
1490ac9a064cSDimitry Andric if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR))
1491ac9a064cSDimitry Andric NewRecipe = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
1492ac9a064cSDimitry Andric else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR))
1493ac9a064cSDimitry Andric NewRecipe = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
1494ac9a064cSDimitry Andric else
1495ac9a064cSDimitry Andric llvm_unreachable("unsupported recipe");
1496ac9a064cSDimitry Andric } else if (auto *RedR = dyn_cast<VPReductionRecipe>(CurRecipe)) {
1497ac9a064cSDimitry Andric NewRecipe = new VPReductionEVLRecipe(RedR, VPEVL,
1498ac9a064cSDimitry Andric GetNewMask(RedR->getCondOp()));
1499ac9a064cSDimitry Andric }
1500ac9a064cSDimitry Andric
1501ac9a064cSDimitry Andric if (NewRecipe) {
1502ac9a064cSDimitry Andric [[maybe_unused]] unsigned NumDefVal = NewRecipe->getNumDefinedValues();
1503ac9a064cSDimitry Andric assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
1504ac9a064cSDimitry Andric "New recipe must define the same number of values as the "
1505ac9a064cSDimitry Andric "original.");
1506ac9a064cSDimitry Andric assert(
1507ac9a064cSDimitry Andric NumDefVal <= 1 &&
1508ac9a064cSDimitry Andric "Only supports recipes with a single definition or without users.");
1509ac9a064cSDimitry Andric NewRecipe->insertBefore(CurRecipe);
1510ac9a064cSDimitry Andric if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(NewRecipe)) {
1511ac9a064cSDimitry Andric VPValue *CurVPV = CurRecipe->getVPSingleValue();
1512ac9a064cSDimitry Andric CurVPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
1513ac9a064cSDimitry Andric }
1514ac9a064cSDimitry Andric CurRecipe->eraseFromParent();
1515ac9a064cSDimitry Andric }
1516ac9a064cSDimitry Andric }
1517ac9a064cSDimitry Andric recursivelyDeleteDeadRecipes(HeaderMask);
1518ac9a064cSDimitry Andric }
1519ac9a064cSDimitry Andric // Replace all uses of VPCanonicalIVPHIRecipe by
1520ac9a064cSDimitry Andric // VPEVLBasedIVPHIRecipe except for the canonical IV increment.
1521ac9a064cSDimitry Andric CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
1522ac9a064cSDimitry Andric CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
1523ac9a064cSDimitry Andric // TODO: support unroll factor > 1.
1524ac9a064cSDimitry Andric Plan.setUF(1);
1525ac9a064cSDimitry Andric return true;
1526ac9a064cSDimitry Andric }
1527ac9a064cSDimitry Andric
dropPoisonGeneratingRecipes(VPlan & Plan,function_ref<bool (BasicBlock *)> BlockNeedsPredication)1528ac9a064cSDimitry Andric void VPlanTransforms::dropPoisonGeneratingRecipes(
1529ac9a064cSDimitry Andric VPlan &Plan, function_ref<bool(BasicBlock *)> BlockNeedsPredication) {
1530ac9a064cSDimitry Andric // Collect recipes in the backward slice of `Root` that may generate a poison
1531ac9a064cSDimitry Andric // value that is used after vectorization.
1532ac9a064cSDimitry Andric SmallPtrSet<VPRecipeBase *, 16> Visited;
1533ac9a064cSDimitry Andric auto collectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) {
1534ac9a064cSDimitry Andric SmallVector<VPRecipeBase *, 16> Worklist;
1535ac9a064cSDimitry Andric Worklist.push_back(Root);
1536ac9a064cSDimitry Andric
1537ac9a064cSDimitry Andric // Traverse the backward slice of Root through its use-def chain.
1538ac9a064cSDimitry Andric while (!Worklist.empty()) {
1539ac9a064cSDimitry Andric VPRecipeBase *CurRec = Worklist.back();
1540ac9a064cSDimitry Andric Worklist.pop_back();
1541ac9a064cSDimitry Andric
1542ac9a064cSDimitry Andric if (!Visited.insert(CurRec).second)
1543ac9a064cSDimitry Andric continue;
1544ac9a064cSDimitry Andric
1545ac9a064cSDimitry Andric // Prune search if we find another recipe generating a widen memory
1546ac9a064cSDimitry Andric // instruction. Widen memory instructions involved in address computation
1547ac9a064cSDimitry Andric // will lead to gather/scatter instructions, which don't need to be
1548ac9a064cSDimitry Andric // handled.
1549ac9a064cSDimitry Andric if (isa<VPWidenMemoryRecipe>(CurRec) || isa<VPInterleaveRecipe>(CurRec) ||
1550ac9a064cSDimitry Andric isa<VPScalarIVStepsRecipe>(CurRec) || isa<VPHeaderPHIRecipe>(CurRec))
1551ac9a064cSDimitry Andric continue;
1552ac9a064cSDimitry Andric
1553ac9a064cSDimitry Andric // This recipe contributes to the address computation of a widen
1554ac9a064cSDimitry Andric // load/store. If the underlying instruction has poison-generating flags,
1555ac9a064cSDimitry Andric // drop them directly.
1556ac9a064cSDimitry Andric if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
1557ac9a064cSDimitry Andric VPValue *A, *B;
1558ac9a064cSDimitry Andric using namespace llvm::VPlanPatternMatch;
1559ac9a064cSDimitry Andric // Dropping disjoint from an OR may yield incorrect results, as some
1560ac9a064cSDimitry Andric // analysis may have converted it to an Add implicitly (e.g. SCEV used
1561ac9a064cSDimitry Andric // for dependence analysis). Instead, replace it with an equivalent Add.
1562ac9a064cSDimitry Andric // This is possible as all users of the disjoint OR only access lanes
1563ac9a064cSDimitry Andric // where the operands are disjoint or poison otherwise.
1564ac9a064cSDimitry Andric if (match(RecWithFlags, m_BinaryOr(m_VPValue(A), m_VPValue(B))) &&
1565ac9a064cSDimitry Andric RecWithFlags->isDisjoint()) {
1566ac9a064cSDimitry Andric VPBuilder Builder(RecWithFlags);
1567ac9a064cSDimitry Andric VPInstruction *New = Builder.createOverflowingOp(
1568ac9a064cSDimitry Andric Instruction::Add, {A, B}, {false, false},
1569ac9a064cSDimitry Andric RecWithFlags->getDebugLoc());
1570ac9a064cSDimitry Andric New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
1571ac9a064cSDimitry Andric RecWithFlags->replaceAllUsesWith(New);
1572ac9a064cSDimitry Andric RecWithFlags->eraseFromParent();
1573ac9a064cSDimitry Andric CurRec = New;
1574ac9a064cSDimitry Andric } else
1575ac9a064cSDimitry Andric RecWithFlags->dropPoisonGeneratingFlags();
1576ac9a064cSDimitry Andric } else {
1577ac9a064cSDimitry Andric Instruction *Instr = dyn_cast_or_null<Instruction>(
1578ac9a064cSDimitry Andric CurRec->getVPSingleValue()->getUnderlyingValue());
1579ac9a064cSDimitry Andric (void)Instr;
1580ac9a064cSDimitry Andric assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
1581ac9a064cSDimitry Andric "found instruction with poison generating flags not covered by "
1582ac9a064cSDimitry Andric "VPRecipeWithIRFlags");
1583ac9a064cSDimitry Andric }
1584ac9a064cSDimitry Andric
1585ac9a064cSDimitry Andric // Add new definitions to the worklist.
1586ac9a064cSDimitry Andric for (VPValue *operand : CurRec->operands())
1587ac9a064cSDimitry Andric if (VPRecipeBase *OpDef = operand->getDefiningRecipe())
1588ac9a064cSDimitry Andric Worklist.push_back(OpDef);
1589ac9a064cSDimitry Andric }
1590ac9a064cSDimitry Andric });
1591ac9a064cSDimitry Andric
1592ac9a064cSDimitry Andric // Traverse all the recipes in the VPlan and collect the poison-generating
1593ac9a064cSDimitry Andric // recipes in the backward slice starting at the address of a VPWidenRecipe or
1594ac9a064cSDimitry Andric // VPInterleaveRecipe.
1595ac9a064cSDimitry Andric auto Iter = vp_depth_first_deep(Plan.getEntry());
1596ac9a064cSDimitry Andric for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
1597ac9a064cSDimitry Andric for (VPRecipeBase &Recipe : *VPBB) {
1598ac9a064cSDimitry Andric if (auto *WidenRec = dyn_cast<VPWidenMemoryRecipe>(&Recipe)) {
1599ac9a064cSDimitry Andric Instruction &UnderlyingInstr = WidenRec->getIngredient();
1600ac9a064cSDimitry Andric VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
1601ac9a064cSDimitry Andric if (AddrDef && WidenRec->isConsecutive() &&
1602ac9a064cSDimitry Andric BlockNeedsPredication(UnderlyingInstr.getParent()))
1603ac9a064cSDimitry Andric collectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
1604ac9a064cSDimitry Andric } else if (auto *InterleaveRec = dyn_cast<VPInterleaveRecipe>(&Recipe)) {
1605ac9a064cSDimitry Andric VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
1606ac9a064cSDimitry Andric if (AddrDef) {
1607ac9a064cSDimitry Andric // Check if any member of the interleave group needs predication.
1608ac9a064cSDimitry Andric const InterleaveGroup<Instruction> *InterGroup =
1609ac9a064cSDimitry Andric InterleaveRec->getInterleaveGroup();
1610ac9a064cSDimitry Andric bool NeedPredication = false;
1611ac9a064cSDimitry Andric for (int I = 0, NumMembers = InterGroup->getNumMembers();
1612ac9a064cSDimitry Andric I < NumMembers; ++I) {
1613ac9a064cSDimitry Andric Instruction *Member = InterGroup->getMember(I);
1614ac9a064cSDimitry Andric if (Member)
1615ac9a064cSDimitry Andric NeedPredication |= BlockNeedsPredication(Member->getParent());
1616ac9a064cSDimitry Andric }
1617ac9a064cSDimitry Andric
1618ac9a064cSDimitry Andric if (NeedPredication)
1619ac9a064cSDimitry Andric collectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
1620ac9a064cSDimitry Andric }
1621ac9a064cSDimitry Andric }
1622ac9a064cSDimitry Andric }
1623b1c73532SDimitry Andric }
1624b1c73532SDimitry Andric }
1625