xref: /src/contrib/llvm-project/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1706b4fc4SDimitry Andric //===-- VPlanTransforms.cpp - Utility VPlan to VPlan transforms -----------===//
2eb11fae6SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6eb11fae6SDimitry Andric //
7eb11fae6SDimitry Andric //===----------------------------------------------------------------------===//
8eb11fae6SDimitry Andric ///
9eb11fae6SDimitry Andric /// \file
10eb11fae6SDimitry Andric /// This file implements a set of utility VPlan to VPlan transformations.
11eb11fae6SDimitry Andric ///
12eb11fae6SDimitry Andric //===----------------------------------------------------------------------===//
13eb11fae6SDimitry Andric 
14706b4fc4SDimitry Andric #include "VPlanTransforms.h"
157fa27ce4SDimitry Andric #include "VPRecipeBuilder.h"
16b1c73532SDimitry Andric #include "VPlanAnalysis.h"
17e3b55780SDimitry Andric #include "VPlanCFG.h"
18b1c73532SDimitry Andric #include "VPlanDominatorTree.h"
19ac9a064cSDimitry Andric #include "VPlanPatternMatch.h"
20eb11fae6SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
21b1c73532SDimitry Andric #include "llvm/ADT/STLExtras.h"
22145449b1SDimitry Andric #include "llvm/ADT/SetVector.h"
23145449b1SDimitry Andric #include "llvm/Analysis/IVDescriptors.h"
24e3b55780SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
25e3b55780SDimitry Andric #include "llvm/IR/Intrinsics.h"
26b1c73532SDimitry Andric #include "llvm/IR/PatternMatch.h"
27eb11fae6SDimitry Andric 
28eb11fae6SDimitry Andric using namespace llvm;
29eb11fae6SDimitry Andric 
VPInstructionsToVPRecipes(VPlanPtr & Plan,function_ref<const InductionDescriptor * (PHINode *)> GetIntOrFpInductionDescriptor,ScalarEvolution & SE,const TargetLibraryInfo & TLI)30706b4fc4SDimitry Andric void VPlanTransforms::VPInstructionsToVPRecipes(
317fa27ce4SDimitry Andric     VPlanPtr &Plan,
3277fc4c14SDimitry Andric     function_ref<const InductionDescriptor *(PHINode *)>
3377fc4c14SDimitry Andric         GetIntOrFpInductionDescriptor,
347fa27ce4SDimitry Andric     ScalarEvolution &SE, const TargetLibraryInfo &TLI) {
35eb11fae6SDimitry Andric 
36e3b55780SDimitry Andric   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
37ac9a064cSDimitry Andric       Plan->getVectorLoopRegion());
38145449b1SDimitry Andric   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
39ac9a064cSDimitry Andric     // Skip blocks outside region
40ac9a064cSDimitry Andric     if (!VPBB->getParent())
41ac9a064cSDimitry Andric       break;
42145449b1SDimitry Andric     VPRecipeBase *Term = VPBB->getTerminator();
43145449b1SDimitry Andric     auto EndIter = Term ? Term->getIterator() : VPBB->end();
44eb11fae6SDimitry Andric     // Introduce each ingredient into VPlan.
45145449b1SDimitry Andric     for (VPRecipeBase &Ingredient :
46145449b1SDimitry Andric          make_early_inc_range(make_range(VPBB->begin(), EndIter))) {
47145449b1SDimitry Andric 
48c0981da4SDimitry Andric       VPValue *VPV = Ingredient.getVPSingleValue();
49344a3780SDimitry Andric       Instruction *Inst = cast<Instruction>(VPV->getUnderlyingValue());
50eb11fae6SDimitry Andric 
51eb11fae6SDimitry Andric       VPRecipeBase *NewRecipe = nullptr;
52c0981da4SDimitry Andric       if (auto *VPPhi = dyn_cast<VPWidenPHIRecipe>(&Ingredient)) {
53344a3780SDimitry Andric         auto *Phi = cast<PHINode>(VPPhi->getUnderlyingValue());
54ac9a064cSDimitry Andric         const auto *II = GetIntOrFpInductionDescriptor(Phi);
55ac9a064cSDimitry Andric         if (!II)
56ac9a064cSDimitry Andric           continue;
57ac9a064cSDimitry Andric 
58ac9a064cSDimitry Andric         VPValue *Start = Plan->getOrAddLiveIn(II->getStartValue());
59145449b1SDimitry Andric         VPValue *Step =
60145449b1SDimitry Andric             vputils::getOrCreateVPValueForSCEVExpr(*Plan, II->getStep(), SE);
617fa27ce4SDimitry Andric         NewRecipe = new VPWidenIntOrFpInductionRecipe(Phi, Start, Step, *II);
62344a3780SDimitry Andric       } else {
63c0981da4SDimitry Andric         assert(isa<VPInstruction>(&Ingredient) &&
64344a3780SDimitry Andric                "only VPInstructions expected here");
65344a3780SDimitry Andric         assert(!isa<PHINode>(Inst) && "phis should be handled above");
66ac9a064cSDimitry Andric         // Create VPWidenMemoryRecipe for loads and stores.
67344a3780SDimitry Andric         if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
68ac9a064cSDimitry Andric           NewRecipe = new VPWidenLoadRecipe(
697fa27ce4SDimitry Andric               *Load, Ingredient.getOperand(0), nullptr /*Mask*/,
70ac9a064cSDimitry Andric               false /*Consecutive*/, false /*Reverse*/,
71ac9a064cSDimitry Andric               Ingredient.getDebugLoc());
72344a3780SDimitry Andric         } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
73ac9a064cSDimitry Andric           NewRecipe = new VPWidenStoreRecipe(
747fa27ce4SDimitry Andric               *Store, Ingredient.getOperand(1), Ingredient.getOperand(0),
75ac9a064cSDimitry Andric               nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/,
76ac9a064cSDimitry Andric               Ingredient.getDebugLoc());
77706b4fc4SDimitry Andric         } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
787fa27ce4SDimitry Andric           NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands());
79344a3780SDimitry Andric         } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
804df029ccSDimitry Andric           NewRecipe = new VPWidenCallRecipe(
81ac9a064cSDimitry Andric               CI, Ingredient.operands(), getVectorIntrinsicIDForCall(CI, &TLI),
82ac9a064cSDimitry Andric               CI->getDebugLoc());
83344a3780SDimitry Andric         } else if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
847fa27ce4SDimitry Andric           NewRecipe = new VPWidenSelectRecipe(*SI, Ingredient.operands());
857fa27ce4SDimitry Andric         } else if (auto *CI = dyn_cast<CastInst>(Inst)) {
867fa27ce4SDimitry Andric           NewRecipe = new VPWidenCastRecipe(
87b1c73532SDimitry Andric               CI->getOpcode(), Ingredient.getOperand(0), CI->getType(), *CI);
88344a3780SDimitry Andric         } else {
897fa27ce4SDimitry Andric           NewRecipe = new VPWidenRecipe(*Inst, Ingredient.operands());
90344a3780SDimitry Andric         }
91344a3780SDimitry Andric       }
92eb11fae6SDimitry Andric 
93c0981da4SDimitry Andric       NewRecipe->insertBefore(&Ingredient);
94b60736ecSDimitry Andric       if (NewRecipe->getNumDefinedValues() == 1)
95344a3780SDimitry Andric         VPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
96b60736ecSDimitry Andric       else
97b60736ecSDimitry Andric         assert(NewRecipe->getNumDefinedValues() == 0 &&
98b60736ecSDimitry Andric                "Only recpies with zero or one defined values expected");
99c0981da4SDimitry Andric       Ingredient.eraseFromParent();
100eb11fae6SDimitry Andric     }
101eb11fae6SDimitry Andric   }
102344a3780SDimitry Andric }
103344a3780SDimitry Andric 
sinkScalarOperands(VPlan & Plan)1047fa27ce4SDimitry Andric static bool sinkScalarOperands(VPlan &Plan) {
105e3b55780SDimitry Andric   auto Iter = vp_depth_first_deep(Plan.getEntry());
106344a3780SDimitry Andric   bool Changed = false;
107e3b55780SDimitry Andric   // First, collect the operands of all recipes in replicate blocks as seeds for
108e3b55780SDimitry Andric   // sinking.
1094df029ccSDimitry Andric   SetVector<std::pair<VPBasicBlock *, VPSingleDefRecipe *>> WorkList;
110e3b55780SDimitry Andric   for (VPRegionBlock *VPR : VPBlockUtils::blocksOnly<VPRegionBlock>(Iter)) {
111e3b55780SDimitry Andric     VPBasicBlock *EntryVPBB = VPR->getEntryBasicBlock();
112e3b55780SDimitry Andric     if (!VPR->isReplicator() || EntryVPBB->getSuccessors().size() != 2)
113344a3780SDimitry Andric       continue;
114e3b55780SDimitry Andric     VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(EntryVPBB->getSuccessors()[0]);
115e3b55780SDimitry Andric     if (!VPBB || VPBB->getSingleSuccessor() != VPR->getExitingBasicBlock())
116e3b55780SDimitry Andric       continue;
117e3b55780SDimitry Andric     for (auto &Recipe : *VPBB) {
118e3b55780SDimitry Andric       for (VPValue *Op : Recipe.operands())
1194df029ccSDimitry Andric         if (auto *Def =
1204df029ccSDimitry Andric                 dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
121e3b55780SDimitry Andric           WorkList.insert(std::make_pair(VPBB, Def));
122344a3780SDimitry Andric     }
123344a3780SDimitry Andric   }
124344a3780SDimitry Andric 
125e3b55780SDimitry Andric   bool ScalarVFOnly = Plan.hasScalarVFOnly();
126e3b55780SDimitry Andric   // Try to sink each replicate or scalar IV steps recipe in the worklist.
127e3b55780SDimitry Andric   for (unsigned I = 0; I != WorkList.size(); ++I) {
128c0981da4SDimitry Andric     VPBasicBlock *SinkTo;
1294df029ccSDimitry Andric     VPSingleDefRecipe *SinkCandidate;
130e3b55780SDimitry Andric     std::tie(SinkTo, SinkCandidate) = WorkList[I];
131e3b55780SDimitry Andric     if (SinkCandidate->getParent() == SinkTo ||
132344a3780SDimitry Andric         SinkCandidate->mayHaveSideEffects() ||
133344a3780SDimitry Andric         SinkCandidate->mayReadOrWriteMemory())
134344a3780SDimitry Andric       continue;
135e3b55780SDimitry Andric     if (auto *RepR = dyn_cast<VPReplicateRecipe>(SinkCandidate)) {
136e3b55780SDimitry Andric       if (!ScalarVFOnly && RepR->isUniform())
137e3b55780SDimitry Andric         continue;
138e3b55780SDimitry Andric     } else if (!isa<VPScalarIVStepsRecipe>(SinkCandidate))
139e3b55780SDimitry Andric       continue;
140344a3780SDimitry Andric 
141c0981da4SDimitry Andric     bool NeedsDuplicating = false;
142c0981da4SDimitry Andric     // All recipe users of the sink candidate must be in the same block SinkTo
143c0981da4SDimitry Andric     // or all users outside of SinkTo must be uniform-after-vectorization (
144c0981da4SDimitry Andric     // i.e., only first lane is used) . In the latter case, we need to duplicate
145e3b55780SDimitry Andric     // SinkCandidate.
146c0981da4SDimitry Andric     auto CanSinkWithUser = [SinkTo, &NeedsDuplicating,
147c0981da4SDimitry Andric                             SinkCandidate](VPUser *U) {
148344a3780SDimitry Andric       auto *UI = dyn_cast<VPRecipeBase>(U);
149c0981da4SDimitry Andric       if (!UI)
150c0981da4SDimitry Andric         return false;
151c0981da4SDimitry Andric       if (UI->getParent() == SinkTo)
152c0981da4SDimitry Andric         return true;
1534df029ccSDimitry Andric       NeedsDuplicating = UI->onlyFirstLaneUsed(SinkCandidate);
154e3b55780SDimitry Andric       // We only know how to duplicate VPRecipeRecipes for now.
155e3b55780SDimitry Andric       return NeedsDuplicating && isa<VPReplicateRecipe>(SinkCandidate);
156c0981da4SDimitry Andric     };
1574df029ccSDimitry Andric     if (!all_of(SinkCandidate->users(), CanSinkWithUser))
158344a3780SDimitry Andric       continue;
159344a3780SDimitry Andric 
160c0981da4SDimitry Andric     if (NeedsDuplicating) {
161e3b55780SDimitry Andric       if (ScalarVFOnly)
162e3b55780SDimitry Andric         continue;
163ac9a064cSDimitry Andric       Instruction *I = SinkCandidate->getUnderlyingInstr();
1647fa27ce4SDimitry Andric       auto *Clone = new VPReplicateRecipe(I, SinkCandidate->operands(), true);
165c0981da4SDimitry Andric       // TODO: add ".cloned" suffix to name of Clone's VPValue.
166c0981da4SDimitry Andric 
167c0981da4SDimitry Andric       Clone->insertBefore(SinkCandidate);
1684df029ccSDimitry Andric       SinkCandidate->replaceUsesWithIf(Clone, [SinkTo](VPUser &U, unsigned) {
169b1c73532SDimitry Andric         return cast<VPRecipeBase>(&U)->getParent() != SinkTo;
170b1c73532SDimitry Andric       });
171c0981da4SDimitry Andric     }
172344a3780SDimitry Andric     SinkCandidate->moveBefore(*SinkTo, SinkTo->getFirstNonPhi());
173c0981da4SDimitry Andric     for (VPValue *Op : SinkCandidate->operands())
1744df029ccSDimitry Andric       if (auto *Def =
1754df029ccSDimitry Andric               dyn_cast_or_null<VPSingleDefRecipe>(Op->getDefiningRecipe()))
176e3b55780SDimitry Andric         WorkList.insert(std::make_pair(SinkTo, Def));
177344a3780SDimitry Andric     Changed = true;
178344a3780SDimitry Andric   }
179344a3780SDimitry Andric   return Changed;
180344a3780SDimitry Andric }
181344a3780SDimitry Andric 
182344a3780SDimitry Andric /// If \p R is a region with a VPBranchOnMaskRecipe in the entry block, return
183344a3780SDimitry Andric /// the mask.
getPredicatedMask(VPRegionBlock * R)184344a3780SDimitry Andric VPValue *getPredicatedMask(VPRegionBlock *R) {
185344a3780SDimitry Andric   auto *EntryBB = dyn_cast<VPBasicBlock>(R->getEntry());
186344a3780SDimitry Andric   if (!EntryBB || EntryBB->size() != 1 ||
187344a3780SDimitry Andric       !isa<VPBranchOnMaskRecipe>(EntryBB->begin()))
188344a3780SDimitry Andric     return nullptr;
189344a3780SDimitry Andric 
190344a3780SDimitry Andric   return cast<VPBranchOnMaskRecipe>(&*EntryBB->begin())->getOperand(0);
191344a3780SDimitry Andric }
192344a3780SDimitry Andric 
193344a3780SDimitry Andric /// If \p R is a triangle region, return the 'then' block of the triangle.
getPredicatedThenBlock(VPRegionBlock * R)194344a3780SDimitry Andric static VPBasicBlock *getPredicatedThenBlock(VPRegionBlock *R) {
195344a3780SDimitry Andric   auto *EntryBB = cast<VPBasicBlock>(R->getEntry());
196344a3780SDimitry Andric   if (EntryBB->getNumSuccessors() != 2)
197344a3780SDimitry Andric     return nullptr;
198344a3780SDimitry Andric 
199344a3780SDimitry Andric   auto *Succ0 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[0]);
200344a3780SDimitry Andric   auto *Succ1 = dyn_cast<VPBasicBlock>(EntryBB->getSuccessors()[1]);
201344a3780SDimitry Andric   if (!Succ0 || !Succ1)
202344a3780SDimitry Andric     return nullptr;
203344a3780SDimitry Andric 
204344a3780SDimitry Andric   if (Succ0->getNumSuccessors() + Succ1->getNumSuccessors() != 1)
205344a3780SDimitry Andric     return nullptr;
206344a3780SDimitry Andric   if (Succ0->getSingleSuccessor() == Succ1)
207344a3780SDimitry Andric     return Succ0;
208344a3780SDimitry Andric   if (Succ1->getSingleSuccessor() == Succ0)
209344a3780SDimitry Andric     return Succ1;
210344a3780SDimitry Andric   return nullptr;
211344a3780SDimitry Andric }
212344a3780SDimitry Andric 
2137fa27ce4SDimitry Andric // Merge replicate regions in their successor region, if a replicate region
2147fa27ce4SDimitry Andric // is connected to a successor replicate region with the same predicate by a
2157fa27ce4SDimitry Andric // single, empty VPBasicBlock.
mergeReplicateRegionsIntoSuccessors(VPlan & Plan)2167fa27ce4SDimitry Andric static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) {
217344a3780SDimitry Andric   SetVector<VPRegionBlock *> DeletedRegions;
218344a3780SDimitry Andric 
219e3b55780SDimitry Andric   // Collect replicate regions followed by an empty block, followed by another
220e3b55780SDimitry Andric   // replicate region with matching masks to process front. This is to avoid
221e3b55780SDimitry Andric   // iterator invalidation issues while merging regions.
222e3b55780SDimitry Andric   SmallVector<VPRegionBlock *, 8> WorkList;
223e3b55780SDimitry Andric   for (VPRegionBlock *Region1 : VPBlockUtils::blocksOnly<VPRegionBlock>(
224e3b55780SDimitry Andric            vp_depth_first_deep(Plan.getEntry()))) {
225e3b55780SDimitry Andric     if (!Region1->isReplicator())
226344a3780SDimitry Andric       continue;
227344a3780SDimitry Andric     auto *MiddleBasicBlock =
228344a3780SDimitry Andric         dyn_cast_or_null<VPBasicBlock>(Region1->getSingleSuccessor());
229344a3780SDimitry Andric     if (!MiddleBasicBlock || !MiddleBasicBlock->empty())
230344a3780SDimitry Andric       continue;
231344a3780SDimitry Andric 
232344a3780SDimitry Andric     auto *Region2 =
233344a3780SDimitry Andric         dyn_cast_or_null<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
234e3b55780SDimitry Andric     if (!Region2 || !Region2->isReplicator())
235344a3780SDimitry Andric       continue;
236344a3780SDimitry Andric 
237344a3780SDimitry Andric     VPValue *Mask1 = getPredicatedMask(Region1);
238344a3780SDimitry Andric     VPValue *Mask2 = getPredicatedMask(Region2);
239344a3780SDimitry Andric     if (!Mask1 || Mask1 != Mask2)
240344a3780SDimitry Andric       continue;
241e3b55780SDimitry Andric 
242e3b55780SDimitry Andric     assert(Mask1 && Mask2 && "both region must have conditions");
243e3b55780SDimitry Andric     WorkList.push_back(Region1);
244e3b55780SDimitry Andric   }
245e3b55780SDimitry Andric 
246e3b55780SDimitry Andric   // Move recipes from Region1 to its successor region, if both are triangles.
247e3b55780SDimitry Andric   for (VPRegionBlock *Region1 : WorkList) {
248e3b55780SDimitry Andric     if (DeletedRegions.contains(Region1))
249e3b55780SDimitry Andric       continue;
250e3b55780SDimitry Andric     auto *MiddleBasicBlock = cast<VPBasicBlock>(Region1->getSingleSuccessor());
251e3b55780SDimitry Andric     auto *Region2 = cast<VPRegionBlock>(MiddleBasicBlock->getSingleSuccessor());
252e3b55780SDimitry Andric 
253344a3780SDimitry Andric     VPBasicBlock *Then1 = getPredicatedThenBlock(Region1);
254344a3780SDimitry Andric     VPBasicBlock *Then2 = getPredicatedThenBlock(Region2);
255344a3780SDimitry Andric     if (!Then1 || !Then2)
256344a3780SDimitry Andric       continue;
257344a3780SDimitry Andric 
258344a3780SDimitry Andric     // Note: No fusion-preventing memory dependencies are expected in either
259344a3780SDimitry Andric     // region. Such dependencies should be rejected during earlier dependence
260344a3780SDimitry Andric     // checks, which guarantee accesses can be re-ordered for vectorization.
261344a3780SDimitry Andric     //
262344a3780SDimitry Andric     // Move recipes to the successor region.
263344a3780SDimitry Andric     for (VPRecipeBase &ToMove : make_early_inc_range(reverse(*Then1)))
264344a3780SDimitry Andric       ToMove.moveBefore(*Then2, Then2->getFirstNonPhi());
265344a3780SDimitry Andric 
266344a3780SDimitry Andric     auto *Merge1 = cast<VPBasicBlock>(Then1->getSingleSuccessor());
267344a3780SDimitry Andric     auto *Merge2 = cast<VPBasicBlock>(Then2->getSingleSuccessor());
268344a3780SDimitry Andric 
269344a3780SDimitry Andric     // Move VPPredInstPHIRecipes from the merge block to the successor region's
270344a3780SDimitry Andric     // merge block. Update all users inside the successor region to use the
271344a3780SDimitry Andric     // original values.
272344a3780SDimitry Andric     for (VPRecipeBase &Phi1ToMove : make_early_inc_range(reverse(*Merge1))) {
273344a3780SDimitry Andric       VPValue *PredInst1 =
274344a3780SDimitry Andric           cast<VPPredInstPHIRecipe>(&Phi1ToMove)->getOperand(0);
275c0981da4SDimitry Andric       VPValue *Phi1ToMoveV = Phi1ToMove.getVPSingleValue();
276b1c73532SDimitry Andric       Phi1ToMoveV->replaceUsesWithIf(PredInst1, [Then2](VPUser &U, unsigned) {
277b1c73532SDimitry Andric         auto *UI = dyn_cast<VPRecipeBase>(&U);
278b1c73532SDimitry Andric         return UI && UI->getParent() == Then2;
279b1c73532SDimitry Andric       });
280344a3780SDimitry Andric 
281ac9a064cSDimitry Andric       // Remove phi recipes that are unused after merging the regions.
282ac9a064cSDimitry Andric       if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) {
283ac9a064cSDimitry Andric         Phi1ToMove.eraseFromParent();
284ac9a064cSDimitry Andric         continue;
285ac9a064cSDimitry Andric       }
286344a3780SDimitry Andric       Phi1ToMove.moveBefore(*Merge2, Merge2->begin());
287344a3780SDimitry Andric     }
288344a3780SDimitry Andric 
289344a3780SDimitry Andric     // Finally, remove the first region.
290344a3780SDimitry Andric     for (VPBlockBase *Pred : make_early_inc_range(Region1->getPredecessors())) {
291344a3780SDimitry Andric       VPBlockUtils::disconnectBlocks(Pred, Region1);
292344a3780SDimitry Andric       VPBlockUtils::connectBlocks(Pred, MiddleBasicBlock);
293344a3780SDimitry Andric     }
294344a3780SDimitry Andric     VPBlockUtils::disconnectBlocks(Region1, MiddleBasicBlock);
295344a3780SDimitry Andric     DeletedRegions.insert(Region1);
296344a3780SDimitry Andric   }
297344a3780SDimitry Andric 
298344a3780SDimitry Andric   for (VPRegionBlock *ToDelete : DeletedRegions)
299344a3780SDimitry Andric     delete ToDelete;
300e3b55780SDimitry Andric   return !DeletedRegions.empty();
301e3b55780SDimitry Andric }
302e3b55780SDimitry Andric 
createReplicateRegion(VPReplicateRecipe * PredRecipe,VPlan & Plan)3037fa27ce4SDimitry Andric static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe,
3047fa27ce4SDimitry Andric                                             VPlan &Plan) {
3057fa27ce4SDimitry Andric   Instruction *Instr = PredRecipe->getUnderlyingInstr();
3067fa27ce4SDimitry Andric   // Build the triangular if-then region.
3077fa27ce4SDimitry Andric   std::string RegionName = (Twine("pred.") + Instr->getOpcodeName()).str();
3087fa27ce4SDimitry Andric   assert(Instr->getParent() && "Predicated instruction not in any basic block");
3097fa27ce4SDimitry Andric   auto *BlockInMask = PredRecipe->getMask();
3107fa27ce4SDimitry Andric   auto *BOMRecipe = new VPBranchOnMaskRecipe(BlockInMask);
3117fa27ce4SDimitry Andric   auto *Entry = new VPBasicBlock(Twine(RegionName) + ".entry", BOMRecipe);
3127fa27ce4SDimitry Andric 
3137fa27ce4SDimitry Andric   // Replace predicated replicate recipe with a replicate recipe without a
3147fa27ce4SDimitry Andric   // mask but in the replicate region.
3157fa27ce4SDimitry Andric   auto *RecipeWithoutMask = new VPReplicateRecipe(
3167fa27ce4SDimitry Andric       PredRecipe->getUnderlyingInstr(),
3177fa27ce4SDimitry Andric       make_range(PredRecipe->op_begin(), std::prev(PredRecipe->op_end())),
3187fa27ce4SDimitry Andric       PredRecipe->isUniform());
3197fa27ce4SDimitry Andric   auto *Pred = new VPBasicBlock(Twine(RegionName) + ".if", RecipeWithoutMask);
3207fa27ce4SDimitry Andric 
3217fa27ce4SDimitry Andric   VPPredInstPHIRecipe *PHIRecipe = nullptr;
3227fa27ce4SDimitry Andric   if (PredRecipe->getNumUsers() != 0) {
3237fa27ce4SDimitry Andric     PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask);
3247fa27ce4SDimitry Andric     PredRecipe->replaceAllUsesWith(PHIRecipe);
3257fa27ce4SDimitry Andric     PHIRecipe->setOperand(0, RecipeWithoutMask);
3267fa27ce4SDimitry Andric   }
3277fa27ce4SDimitry Andric   PredRecipe->eraseFromParent();
3287fa27ce4SDimitry Andric   auto *Exiting = new VPBasicBlock(Twine(RegionName) + ".continue", PHIRecipe);
3297fa27ce4SDimitry Andric   VPRegionBlock *Region = new VPRegionBlock(Entry, Exiting, RegionName, true);
3307fa27ce4SDimitry Andric 
3317fa27ce4SDimitry Andric   // Note: first set Entry as region entry and then connect successors starting
3327fa27ce4SDimitry Andric   // from it in order, to propagate the "parent" of each VPBasicBlock.
3337fa27ce4SDimitry Andric   VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry);
3347fa27ce4SDimitry Andric   VPBlockUtils::connectBlocks(Pred, Exiting);
3357fa27ce4SDimitry Andric 
3367fa27ce4SDimitry Andric   return Region;
3377fa27ce4SDimitry Andric }
3387fa27ce4SDimitry Andric 
addReplicateRegions(VPlan & Plan)3397fa27ce4SDimitry Andric static void addReplicateRegions(VPlan &Plan) {
3407fa27ce4SDimitry Andric   SmallVector<VPReplicateRecipe *> WorkList;
3417fa27ce4SDimitry Andric   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
3427fa27ce4SDimitry Andric            vp_depth_first_deep(Plan.getEntry()))) {
3437fa27ce4SDimitry Andric     for (VPRecipeBase &R : *VPBB)
3447fa27ce4SDimitry Andric       if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
3457fa27ce4SDimitry Andric         if (RepR->isPredicated())
3467fa27ce4SDimitry Andric           WorkList.push_back(RepR);
3477fa27ce4SDimitry Andric       }
3487fa27ce4SDimitry Andric   }
3497fa27ce4SDimitry Andric 
3507fa27ce4SDimitry Andric   unsigned BBNum = 0;
3517fa27ce4SDimitry Andric   for (VPReplicateRecipe *RepR : WorkList) {
3527fa27ce4SDimitry Andric     VPBasicBlock *CurrentBlock = RepR->getParent();
3537fa27ce4SDimitry Andric     VPBasicBlock *SplitBlock = CurrentBlock->splitAt(RepR->getIterator());
3547fa27ce4SDimitry Andric 
3557fa27ce4SDimitry Andric     BasicBlock *OrigBB = RepR->getUnderlyingInstr()->getParent();
3567fa27ce4SDimitry Andric     SplitBlock->setName(
3577fa27ce4SDimitry Andric         OrigBB->hasName() ? OrigBB->getName() + "." + Twine(BBNum++) : "");
3587fa27ce4SDimitry Andric     // Record predicated instructions for above packing optimizations.
3597fa27ce4SDimitry Andric     VPBlockBase *Region = createReplicateRegion(RepR, Plan);
3607fa27ce4SDimitry Andric     Region->setParent(CurrentBlock->getParent());
3617fa27ce4SDimitry Andric     VPBlockUtils::disconnectBlocks(CurrentBlock, SplitBlock);
3627fa27ce4SDimitry Andric     VPBlockUtils::connectBlocks(CurrentBlock, Region);
3637fa27ce4SDimitry Andric     VPBlockUtils::connectBlocks(Region, SplitBlock);
3647fa27ce4SDimitry Andric   }
3657fa27ce4SDimitry Andric }
3667fa27ce4SDimitry Andric 
367ac9a064cSDimitry Andric /// Remove redundant VPBasicBlocks by merging them into their predecessor if
368ac9a064cSDimitry Andric /// the predecessor has a single successor.
mergeBlocksIntoPredecessors(VPlan & Plan)369ac9a064cSDimitry Andric static bool mergeBlocksIntoPredecessors(VPlan &Plan) {
370e3b55780SDimitry Andric   SmallVector<VPBasicBlock *> WorkList;
371e3b55780SDimitry Andric   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
372e3b55780SDimitry Andric            vp_depth_first_deep(Plan.getEntry()))) {
373ac9a064cSDimitry Andric     // Don't fold the exit block of the Plan into its single predecessor for
374ac9a064cSDimitry Andric     // now.
375ac9a064cSDimitry Andric     // TODO: Remove restriction once more of the skeleton is modeled in VPlan.
376ac9a064cSDimitry Andric     if (VPBB->getNumSuccessors() == 0 && !VPBB->getParent())
377ac9a064cSDimitry Andric       continue;
378e3b55780SDimitry Andric     auto *PredVPBB =
379e3b55780SDimitry Andric         dyn_cast_or_null<VPBasicBlock>(VPBB->getSinglePredecessor());
380ac9a064cSDimitry Andric     if (!PredVPBB || PredVPBB->getNumSuccessors() != 1)
381ac9a064cSDimitry Andric       continue;
382e3b55780SDimitry Andric     WorkList.push_back(VPBB);
383e3b55780SDimitry Andric   }
384e3b55780SDimitry Andric 
385e3b55780SDimitry Andric   for (VPBasicBlock *VPBB : WorkList) {
386e3b55780SDimitry Andric     VPBasicBlock *PredVPBB = cast<VPBasicBlock>(VPBB->getSinglePredecessor());
387e3b55780SDimitry Andric     for (VPRecipeBase &R : make_early_inc_range(*VPBB))
388e3b55780SDimitry Andric       R.moveBefore(*PredVPBB, PredVPBB->end());
389e3b55780SDimitry Andric     VPBlockUtils::disconnectBlocks(PredVPBB, VPBB);
390e3b55780SDimitry Andric     auto *ParentRegion = cast_or_null<VPRegionBlock>(VPBB->getParent());
391e3b55780SDimitry Andric     if (ParentRegion && ParentRegion->getExiting() == VPBB)
392e3b55780SDimitry Andric       ParentRegion->setExiting(PredVPBB);
393e3b55780SDimitry Andric     for (auto *Succ : to_vector(VPBB->successors())) {
394e3b55780SDimitry Andric       VPBlockUtils::disconnectBlocks(VPBB, Succ);
395e3b55780SDimitry Andric       VPBlockUtils::connectBlocks(PredVPBB, Succ);
396e3b55780SDimitry Andric     }
397e3b55780SDimitry Andric     delete VPBB;
398e3b55780SDimitry Andric   }
399e3b55780SDimitry Andric   return !WorkList.empty();
400344a3780SDimitry Andric }
40177fc4c14SDimitry Andric 
createAndOptimizeReplicateRegions(VPlan & Plan)402ac9a064cSDimitry Andric void VPlanTransforms::createAndOptimizeReplicateRegions(VPlan &Plan) {
403ac9a064cSDimitry Andric   // Convert masked VPReplicateRecipes to if-then region blocks.
404ac9a064cSDimitry Andric   addReplicateRegions(Plan);
405ac9a064cSDimitry Andric 
406ac9a064cSDimitry Andric   bool ShouldSimplify = true;
407ac9a064cSDimitry Andric   while (ShouldSimplify) {
408ac9a064cSDimitry Andric     ShouldSimplify = sinkScalarOperands(Plan);
409ac9a064cSDimitry Andric     ShouldSimplify |= mergeReplicateRegionsIntoSuccessors(Plan);
410ac9a064cSDimitry Andric     ShouldSimplify |= mergeBlocksIntoPredecessors(Plan);
411ac9a064cSDimitry Andric   }
412ac9a064cSDimitry Andric }
413ac9a064cSDimitry Andric 
414ac9a064cSDimitry Andric /// Remove redundant casts of inductions.
415ac9a064cSDimitry Andric ///
416ac9a064cSDimitry Andric /// Such redundant casts are casts of induction variables that can be ignored,
417ac9a064cSDimitry Andric /// because we already proved that the casted phi is equal to the uncasted phi
418ac9a064cSDimitry Andric /// in the vectorized loop. There is no need to vectorize the cast - the same
419ac9a064cSDimitry Andric /// value can be used for both the phi and casts in the vector loop.
removeRedundantInductionCasts(VPlan & Plan)420ac9a064cSDimitry Andric static void removeRedundantInductionCasts(VPlan &Plan) {
421145449b1SDimitry Andric   for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
42277fc4c14SDimitry Andric     auto *IV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
42377fc4c14SDimitry Andric     if (!IV || IV->getTruncInst())
42477fc4c14SDimitry Andric       continue;
42577fc4c14SDimitry Andric 
426145449b1SDimitry Andric     // A sequence of IR Casts has potentially been recorded for IV, which
427145449b1SDimitry Andric     // *must be bypassed* when the IV is vectorized, because the vectorized IV
428145449b1SDimitry Andric     // will produce the desired casted value. This sequence forms a def-use
429145449b1SDimitry Andric     // chain and is provided in reverse order, ending with the cast that uses
430145449b1SDimitry Andric     // the IV phi. Search for the recipe of the last cast in the chain and
431145449b1SDimitry Andric     // replace it with the original IV. Note that only the final cast is
432145449b1SDimitry Andric     // expected to have users outside the cast-chain and the dead casts left
433145449b1SDimitry Andric     // over will be cleaned up later.
43477fc4c14SDimitry Andric     auto &Casts = IV->getInductionDescriptor().getCastInsts();
43577fc4c14SDimitry Andric     VPValue *FindMyCast = IV;
43677fc4c14SDimitry Andric     for (Instruction *IRCast : reverse(Casts)) {
4374df029ccSDimitry Andric       VPSingleDefRecipe *FoundUserCast = nullptr;
43877fc4c14SDimitry Andric       for (auto *U : FindMyCast->users()) {
4394df029ccSDimitry Andric         auto *UserCast = dyn_cast<VPSingleDefRecipe>(U);
4404df029ccSDimitry Andric         if (UserCast && UserCast->getUnderlyingValue() == IRCast) {
44177fc4c14SDimitry Andric           FoundUserCast = UserCast;
44277fc4c14SDimitry Andric           break;
44377fc4c14SDimitry Andric         }
44477fc4c14SDimitry Andric       }
4454df029ccSDimitry Andric       FindMyCast = FoundUserCast;
44677fc4c14SDimitry Andric     }
447145449b1SDimitry Andric     FindMyCast->replaceAllUsesWith(IV);
44877fc4c14SDimitry Andric   }
44977fc4c14SDimitry Andric }
4506f8fc217SDimitry Andric 
451ac9a064cSDimitry Andric /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
452ac9a064cSDimitry Andric /// recipe, if it exists.
removeRedundantCanonicalIVs(VPlan & Plan)453ac9a064cSDimitry Andric static void removeRedundantCanonicalIVs(VPlan &Plan) {
4546f8fc217SDimitry Andric   VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
4556f8fc217SDimitry Andric   VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
4566f8fc217SDimitry Andric   for (VPUser *U : CanonicalIV->users()) {
4576f8fc217SDimitry Andric     WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
4586f8fc217SDimitry Andric     if (WidenNewIV)
4596f8fc217SDimitry Andric       break;
4606f8fc217SDimitry Andric   }
4616f8fc217SDimitry Andric 
4626f8fc217SDimitry Andric   if (!WidenNewIV)
4636f8fc217SDimitry Andric     return;
4646f8fc217SDimitry Andric 
4656f8fc217SDimitry Andric   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
4666f8fc217SDimitry Andric   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
4676f8fc217SDimitry Andric     auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
4686f8fc217SDimitry Andric 
469ac9a064cSDimitry Andric     if (!WidenOriginalIV || !WidenOriginalIV->isCanonical())
470ecbca9f5SDimitry Andric       continue;
471ecbca9f5SDimitry Andric 
472ecbca9f5SDimitry Andric     // Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
473ecbca9f5SDimitry Andric     // everything WidenNewIV's users need. That is, WidenOriginalIV will
474ecbca9f5SDimitry Andric     // generate a vector phi or all users of WidenNewIV demand the first lane
475ecbca9f5SDimitry Andric     // only.
4767fa27ce4SDimitry Andric     if (any_of(WidenOriginalIV->users(),
4777fa27ce4SDimitry Andric                [WidenOriginalIV](VPUser *U) {
4787fa27ce4SDimitry Andric                  return !U->usesScalars(WidenOriginalIV);
4797fa27ce4SDimitry Andric                }) ||
480ecbca9f5SDimitry Andric         vputils::onlyFirstLaneUsed(WidenNewIV)) {
4816f8fc217SDimitry Andric       WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
4826f8fc217SDimitry Andric       WidenNewIV->eraseFromParent();
4836f8fc217SDimitry Andric       return;
4846f8fc217SDimitry Andric     }
4856f8fc217SDimitry Andric   }
4866f8fc217SDimitry Andric }
487145449b1SDimitry Andric 
488ac9a064cSDimitry Andric /// Returns true if \p R is dead and can be removed.
isDeadRecipe(VPRecipeBase & R)489ac9a064cSDimitry Andric static bool isDeadRecipe(VPRecipeBase &R) {
490ac9a064cSDimitry Andric   using namespace llvm::PatternMatch;
491ac9a064cSDimitry Andric   // Do remove conditional assume instructions as their conditions may be
492ac9a064cSDimitry Andric   // flattened.
493ac9a064cSDimitry Andric   auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
494ac9a064cSDimitry Andric   bool IsConditionalAssume =
495ac9a064cSDimitry Andric       RepR && RepR->isPredicated() &&
496ac9a064cSDimitry Andric       match(RepR->getUnderlyingInstr(), m_Intrinsic<Intrinsic::assume>());
497ac9a064cSDimitry Andric   if (IsConditionalAssume)
498ac9a064cSDimitry Andric     return true;
499ac9a064cSDimitry Andric 
500ac9a064cSDimitry Andric   if (R.mayHaveSideEffects())
501ac9a064cSDimitry Andric     return false;
502ac9a064cSDimitry Andric 
503ac9a064cSDimitry Andric   // Recipe is dead if no user keeps the recipe alive.
504ac9a064cSDimitry Andric   return all_of(R.definedValues(),
505ac9a064cSDimitry Andric                 [](VPValue *V) { return V->getNumUsers() == 0; });
506ac9a064cSDimitry Andric }
507ac9a064cSDimitry Andric 
removeDeadRecipes(VPlan & Plan)508ac9a064cSDimitry Andric static void removeDeadRecipes(VPlan &Plan) {
509e3b55780SDimitry Andric   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
510e3b55780SDimitry Andric       Plan.getEntry());
511145449b1SDimitry Andric 
512145449b1SDimitry Andric   for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
513145449b1SDimitry Andric     // The recipes in the block are processed in reverse order, to catch chains
514145449b1SDimitry Andric     // of dead recipes.
515145449b1SDimitry Andric     for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
516ac9a064cSDimitry Andric       if (isDeadRecipe(R))
517145449b1SDimitry Andric         R.eraseFromParent();
518145449b1SDimitry Andric     }
519145449b1SDimitry Andric   }
520145449b1SDimitry Andric }
521145449b1SDimitry Andric 
522ac9a064cSDimitry Andric static VPScalarIVStepsRecipe *
createScalarIVSteps(VPlan & Plan,InductionDescriptor::InductionKind Kind,Instruction::BinaryOps InductionOpcode,FPMathOperator * FPBinOp,ScalarEvolution & SE,Instruction * TruncI,VPValue * StartV,VPValue * Step,VPBasicBlock::iterator IP)523ac9a064cSDimitry Andric createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
524ac9a064cSDimitry Andric                     Instruction::BinaryOps InductionOpcode,
525ac9a064cSDimitry Andric                     FPMathOperator *FPBinOp, ScalarEvolution &SE,
526ac9a064cSDimitry Andric                     Instruction *TruncI, VPValue *StartV, VPValue *Step,
527ac9a064cSDimitry Andric                     VPBasicBlock::iterator IP) {
528b1c73532SDimitry Andric   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
529b1c73532SDimitry Andric   VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
530ac9a064cSDimitry Andric   VPSingleDefRecipe *BaseIV = CanonicalIV;
531ac9a064cSDimitry Andric   if (!CanonicalIV->isCanonical(Kind, StartV, Step)) {
532ac9a064cSDimitry Andric     BaseIV = new VPDerivedIVRecipe(Kind, FPBinOp, StartV, CanonicalIV, Step);
533ac9a064cSDimitry Andric     HeaderVPBB->insert(BaseIV, IP);
534b1c73532SDimitry Andric   }
535b1c73532SDimitry Andric 
536ac9a064cSDimitry Andric   // Truncate base induction if needed.
537ac9a064cSDimitry Andric   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(),
538ac9a064cSDimitry Andric                           SE.getContext());
539ac9a064cSDimitry Andric   Type *ResultTy = TypeInfo.inferScalarType(BaseIV);
540ac9a064cSDimitry Andric   if (TruncI) {
541ac9a064cSDimitry Andric     Type *TruncTy = TruncI->getType();
542ac9a064cSDimitry Andric     assert(ResultTy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits() &&
543ac9a064cSDimitry Andric            "Not truncating.");
544ac9a064cSDimitry Andric     assert(ResultTy->isIntegerTy() && "Truncation requires an integer type");
545ac9a064cSDimitry Andric     BaseIV = new VPScalarCastRecipe(Instruction::Trunc, BaseIV, TruncTy);
546ac9a064cSDimitry Andric     HeaderVPBB->insert(BaseIV, IP);
547ac9a064cSDimitry Andric     ResultTy = TruncTy;
548ac9a064cSDimitry Andric   }
549ac9a064cSDimitry Andric 
550ac9a064cSDimitry Andric   // Truncate step if needed.
551ac9a064cSDimitry Andric   Type *StepTy = TypeInfo.inferScalarType(Step);
552ac9a064cSDimitry Andric   if (ResultTy != StepTy) {
553ac9a064cSDimitry Andric     assert(StepTy->getScalarSizeInBits() > ResultTy->getScalarSizeInBits() &&
554ac9a064cSDimitry Andric            "Not truncating.");
555ac9a064cSDimitry Andric     assert(StepTy->isIntegerTy() && "Truncation requires an integer type");
556ac9a064cSDimitry Andric     Step = new VPScalarCastRecipe(Instruction::Trunc, Step, ResultTy);
557ac9a064cSDimitry Andric     auto *VecPreheader =
558ac9a064cSDimitry Andric         cast<VPBasicBlock>(HeaderVPBB->getSingleHierarchicalPredecessor());
559ac9a064cSDimitry Andric     VecPreheader->appendRecipe(Step->getDefiningRecipe());
560ac9a064cSDimitry Andric   }
561ac9a064cSDimitry Andric 
562ac9a064cSDimitry Andric   VPScalarIVStepsRecipe *Steps = new VPScalarIVStepsRecipe(
563ac9a064cSDimitry Andric       BaseIV, Step, InductionOpcode,
564ac9a064cSDimitry Andric       FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags());
565b1c73532SDimitry Andric   HeaderVPBB->insert(Steps, IP);
566b1c73532SDimitry Andric   return Steps;
567b1c73532SDimitry Andric }
568b1c73532SDimitry Andric 
569ac9a064cSDimitry Andric /// Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
570ac9a064cSDimitry Andric /// (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
571ac9a064cSDimitry Andric /// VPWidenPointerInductionRecipe will generate vectors only. If some users
572ac9a064cSDimitry Andric /// require vectors while other require scalars, the scalar uses need to extract
573ac9a064cSDimitry Andric /// the scalars from the generated vectors (Note that this is different to how
574ac9a064cSDimitry Andric /// int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe,
575ac9a064cSDimitry Andric /// if any of its users needs scalar values, by providing them scalar steps
576ac9a064cSDimitry Andric /// built on the canonical scalar IV and update the original IV's users. This is
577ac9a064cSDimitry Andric /// an optional optimization to reduce the needs of vector extracts.
legalizeAndOptimizeInductions(VPlan & Plan,ScalarEvolution & SE)578ac9a064cSDimitry Andric static void legalizeAndOptimizeInductions(VPlan &Plan, ScalarEvolution &SE) {
579145449b1SDimitry Andric   SmallVector<VPRecipeBase *> ToRemove;
580145449b1SDimitry Andric   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
581145449b1SDimitry Andric   bool HasOnlyVectorVFs = !Plan.hasVF(ElementCount::getFixed(1));
582ac9a064cSDimitry Andric   VPBasicBlock::iterator InsertPt = HeaderVPBB->getFirstNonPhi();
583145449b1SDimitry Andric   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
584ac9a064cSDimitry Andric     // Replace wide pointer inductions which have only their scalars used by
585ac9a064cSDimitry Andric     // PtrAdd(IndStart, ScalarIVSteps (0, Step)).
586ac9a064cSDimitry Andric     if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
587ac9a064cSDimitry Andric       if (!PtrIV->onlyScalarsGenerated(Plan.hasScalableVF()))
588ac9a064cSDimitry Andric         continue;
589ac9a064cSDimitry Andric 
590ac9a064cSDimitry Andric       const InductionDescriptor &ID = PtrIV->getInductionDescriptor();
591ac9a064cSDimitry Andric       VPValue *StartV =
592ac9a064cSDimitry Andric           Plan.getOrAddLiveIn(ConstantInt::get(ID.getStep()->getType(), 0));
593ac9a064cSDimitry Andric       VPValue *StepV = PtrIV->getOperand(1);
594ac9a064cSDimitry Andric       VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
595ac9a064cSDimitry Andric           Plan, InductionDescriptor::IK_IntInduction, Instruction::Add, nullptr,
596ac9a064cSDimitry Andric           SE, nullptr, StartV, StepV, InsertPt);
597ac9a064cSDimitry Andric 
598ac9a064cSDimitry Andric       auto *Recipe = new VPInstruction(VPInstruction::PtrAdd,
599ac9a064cSDimitry Andric                                        {PtrIV->getStartValue(), Steps},
600ac9a064cSDimitry Andric                                        PtrIV->getDebugLoc(), "next.gep");
601ac9a064cSDimitry Andric 
602ac9a064cSDimitry Andric       Recipe->insertAfter(Steps);
603ac9a064cSDimitry Andric       PtrIV->replaceAllUsesWith(Recipe);
604ac9a064cSDimitry Andric       continue;
605ac9a064cSDimitry Andric     }
606ac9a064cSDimitry Andric 
607ac9a064cSDimitry Andric     // Replace widened induction with scalar steps for users that only use
608ac9a064cSDimitry Andric     // scalars.
609e3b55780SDimitry Andric     auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
610e3b55780SDimitry Andric     if (!WideIV)
611145449b1SDimitry Andric       continue;
612e3b55780SDimitry Andric     if (HasOnlyVectorVFs && none_of(WideIV->users(), [WideIV](VPUser *U) {
613e3b55780SDimitry Andric           return U->usesScalars(WideIV);
614e3b55780SDimitry Andric         }))
615145449b1SDimitry Andric       continue;
616145449b1SDimitry Andric 
617e3b55780SDimitry Andric     const InductionDescriptor &ID = WideIV->getInductionDescriptor();
618ac9a064cSDimitry Andric     VPScalarIVStepsRecipe *Steps = createScalarIVSteps(
619ac9a064cSDimitry Andric         Plan, ID.getKind(), ID.getInductionOpcode(),
620ac9a064cSDimitry Andric         dyn_cast_or_null<FPMathOperator>(ID.getInductionBinOp()), SE,
621ac9a064cSDimitry Andric         WideIV->getTruncInst(), WideIV->getStartValue(), WideIV->getStepValue(),
622ac9a064cSDimitry Andric         InsertPt);
623145449b1SDimitry Andric 
624312c0ed1SDimitry Andric     // Update scalar users of IV to use Step instead.
625312c0ed1SDimitry Andric     if (!HasOnlyVectorVFs)
626312c0ed1SDimitry Andric       WideIV->replaceAllUsesWith(Steps);
627312c0ed1SDimitry Andric     else
628312c0ed1SDimitry Andric       WideIV->replaceUsesWithIf(Steps, [WideIV](VPUser &U, unsigned) {
629312c0ed1SDimitry Andric         return U.usesScalars(WideIV);
630b1c73532SDimitry Andric       });
631145449b1SDimitry Andric   }
632145449b1SDimitry Andric }
633145449b1SDimitry Andric 
634ac9a064cSDimitry Andric /// Remove redundant EpxandSCEVRecipes in \p Plan's entry block by replacing
635ac9a064cSDimitry Andric /// them with already existing recipes expanding the same SCEV expression.
removeRedundantExpandSCEVRecipes(VPlan & Plan)636ac9a064cSDimitry Andric static void removeRedundantExpandSCEVRecipes(VPlan &Plan) {
637145449b1SDimitry Andric   DenseMap<const SCEV *, VPValue *> SCEV2VPV;
638145449b1SDimitry Andric 
639145449b1SDimitry Andric   for (VPRecipeBase &R :
640145449b1SDimitry Andric        make_early_inc_range(*Plan.getEntry()->getEntryBasicBlock())) {
641145449b1SDimitry Andric     auto *ExpR = dyn_cast<VPExpandSCEVRecipe>(&R);
642145449b1SDimitry Andric     if (!ExpR)
643145449b1SDimitry Andric       continue;
644145449b1SDimitry Andric 
645145449b1SDimitry Andric     auto I = SCEV2VPV.insert({ExpR->getSCEV(), ExpR});
646145449b1SDimitry Andric     if (I.second)
647145449b1SDimitry Andric       continue;
648145449b1SDimitry Andric     ExpR->replaceAllUsesWith(I.first->second);
649145449b1SDimitry Andric     ExpR->eraseFromParent();
650145449b1SDimitry Andric   }
651145449b1SDimitry Andric }
652e3b55780SDimitry Andric 
recursivelyDeleteDeadRecipes(VPValue * V)653ac9a064cSDimitry Andric static void recursivelyDeleteDeadRecipes(VPValue *V) {
654ac9a064cSDimitry Andric   SmallVector<VPValue *> WorkList;
655ac9a064cSDimitry Andric   SmallPtrSet<VPValue *, 8> Seen;
656ac9a064cSDimitry Andric   WorkList.push_back(V);
657e3b55780SDimitry Andric 
658ac9a064cSDimitry Andric   while (!WorkList.empty()) {
659ac9a064cSDimitry Andric     VPValue *Cur = WorkList.pop_back_val();
660ac9a064cSDimitry Andric     if (!Seen.insert(Cur).second)
661ac9a064cSDimitry Andric       continue;
662ac9a064cSDimitry Andric     VPRecipeBase *R = Cur->getDefiningRecipe();
663ac9a064cSDimitry Andric     if (!R)
664ac9a064cSDimitry Andric       continue;
665ac9a064cSDimitry Andric     if (!isDeadRecipe(*R))
666ac9a064cSDimitry Andric       continue;
667ac9a064cSDimitry Andric     WorkList.append(R->op_begin(), R->op_end());
668ac9a064cSDimitry Andric     R->eraseFromParent();
669ac9a064cSDimitry Andric   }
670e3b55780SDimitry Andric }
671e3b55780SDimitry Andric 
optimizeForVFAndUF(VPlan & Plan,ElementCount BestVF,unsigned BestUF,PredicatedScalarEvolution & PSE)672e3b55780SDimitry Andric void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF,
673e3b55780SDimitry Andric                                          unsigned BestUF,
674e3b55780SDimitry Andric                                          PredicatedScalarEvolution &PSE) {
675e3b55780SDimitry Andric   assert(Plan.hasVF(BestVF) && "BestVF is not available in Plan");
676e3b55780SDimitry Andric   assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan");
677e3b55780SDimitry Andric   VPBasicBlock *ExitingVPBB =
678e3b55780SDimitry Andric       Plan.getVectorLoopRegion()->getExitingBasicBlock();
679ac9a064cSDimitry Andric   auto *Term = &ExitingVPBB->back();
680e3b55780SDimitry Andric   // Try to simplify the branch condition if TC <= VF * UF when preparing to
681e3b55780SDimitry Andric   // execute the plan for the main vector loop. We only do this if the
682e3b55780SDimitry Andric   // terminator is:
683e3b55780SDimitry Andric   //  1. BranchOnCount, or
684e3b55780SDimitry Andric   //  2. BranchOnCond where the input is Not(ActiveLaneMask).
685ac9a064cSDimitry Andric   using namespace llvm::VPlanPatternMatch;
686ac9a064cSDimitry Andric   if (!match(Term, m_BranchOnCount(m_VPValue(), m_VPValue())) &&
687ac9a064cSDimitry Andric       !match(Term,
688ac9a064cSDimitry Andric              m_BranchOnCond(m_Not(m_ActiveLaneMask(m_VPValue(), m_VPValue())))))
689e3b55780SDimitry Andric     return;
690e3b55780SDimitry Andric 
691e3b55780SDimitry Andric   Type *IdxTy =
692e3b55780SDimitry Andric       Plan.getCanonicalIV()->getStartValue()->getLiveInIRValue()->getType();
693e3b55780SDimitry Andric   const SCEV *TripCount = createTripCountSCEV(IdxTy, PSE);
694e3b55780SDimitry Andric   ScalarEvolution &SE = *PSE.getSE();
695ac9a064cSDimitry Andric   ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
696ac9a064cSDimitry Andric   const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
697e3b55780SDimitry Andric   if (TripCount->isZero() ||
698e3b55780SDimitry Andric       !SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
699e3b55780SDimitry Andric     return;
700e3b55780SDimitry Andric 
701e3b55780SDimitry Andric   LLVMContext &Ctx = SE.getContext();
702ac9a064cSDimitry Andric   auto *BOC =
703ac9a064cSDimitry Andric       new VPInstruction(VPInstruction::BranchOnCond,
704ac9a064cSDimitry Andric                         {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))});
705ac9a064cSDimitry Andric 
706ac9a064cSDimitry Andric   SmallVector<VPValue *> PossiblyDead(Term->operands());
707e3b55780SDimitry Andric   Term->eraseFromParent();
708ac9a064cSDimitry Andric   for (VPValue *Op : PossiblyDead)
709ac9a064cSDimitry Andric     recursivelyDeleteDeadRecipes(Op);
710e3b55780SDimitry Andric   ExitingVPBB->appendRecipe(BOC);
711e3b55780SDimitry Andric   Plan.setVF(BestVF);
712e3b55780SDimitry Andric   Plan.setUF(BestUF);
713e3b55780SDimitry Andric   // TODO: Further simplifications are possible
714e3b55780SDimitry Andric   //      1. Replace inductions with constants.
715e3b55780SDimitry Andric   //      2. Replace vector loop region with VPBasicBlock.
716e3b55780SDimitry Andric }
7177fa27ce4SDimitry Andric 
7187fa27ce4SDimitry Andric #ifndef NDEBUG
GetReplicateRegion(VPRecipeBase * R)7197fa27ce4SDimitry Andric static VPRegionBlock *GetReplicateRegion(VPRecipeBase *R) {
7207fa27ce4SDimitry Andric   auto *Region = dyn_cast_or_null<VPRegionBlock>(R->getParent()->getParent());
7217fa27ce4SDimitry Andric   if (Region && Region->isReplicator()) {
7227fa27ce4SDimitry Andric     assert(Region->getNumSuccessors() == 1 &&
7237fa27ce4SDimitry Andric            Region->getNumPredecessors() == 1 && "Expected SESE region!");
7247fa27ce4SDimitry Andric     assert(R->getParent()->size() == 1 &&
7257fa27ce4SDimitry Andric            "A recipe in an original replicator region must be the only "
7267fa27ce4SDimitry Andric            "recipe in its block");
7277fa27ce4SDimitry Andric     return Region;
7287fa27ce4SDimitry Andric   }
7297fa27ce4SDimitry Andric   return nullptr;
7307fa27ce4SDimitry Andric }
7317fa27ce4SDimitry Andric #endif
7327fa27ce4SDimitry Andric 
properlyDominates(const VPRecipeBase * A,const VPRecipeBase * B,VPDominatorTree & VPDT)7337fa27ce4SDimitry Andric static bool properlyDominates(const VPRecipeBase *A, const VPRecipeBase *B,
7347fa27ce4SDimitry Andric                               VPDominatorTree &VPDT) {
7357fa27ce4SDimitry Andric   if (A == B)
7367fa27ce4SDimitry Andric     return false;
7377fa27ce4SDimitry Andric 
7387fa27ce4SDimitry Andric   auto LocalComesBefore = [](const VPRecipeBase *A, const VPRecipeBase *B) {
7397fa27ce4SDimitry Andric     for (auto &R : *A->getParent()) {
7407fa27ce4SDimitry Andric       if (&R == A)
7417fa27ce4SDimitry Andric         return true;
7427fa27ce4SDimitry Andric       if (&R == B)
7437fa27ce4SDimitry Andric         return false;
7447fa27ce4SDimitry Andric     }
7457fa27ce4SDimitry Andric     llvm_unreachable("recipe not found");
7467fa27ce4SDimitry Andric   };
7477fa27ce4SDimitry Andric   const VPBlockBase *ParentA = A->getParent();
7487fa27ce4SDimitry Andric   const VPBlockBase *ParentB = B->getParent();
7497fa27ce4SDimitry Andric   if (ParentA == ParentB)
7507fa27ce4SDimitry Andric     return LocalComesBefore(A, B);
7517fa27ce4SDimitry Andric 
7527fa27ce4SDimitry Andric   assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(A)) &&
7537fa27ce4SDimitry Andric          "No replicate regions expected at this point");
7547fa27ce4SDimitry Andric   assert(!GetReplicateRegion(const_cast<VPRecipeBase *>(B)) &&
7557fa27ce4SDimitry Andric          "No replicate regions expected at this point");
7567fa27ce4SDimitry Andric   return VPDT.properlyDominates(ParentA, ParentB);
7577fa27ce4SDimitry Andric }
7587fa27ce4SDimitry Andric 
7597fa27ce4SDimitry Andric /// Sink users of \p FOR after the recipe defining the previous value \p
7607fa27ce4SDimitry Andric /// Previous of the recurrence. \returns true if all users of \p FOR could be
7617fa27ce4SDimitry Andric /// re-arranged as needed or false if it is not possible.
7627fa27ce4SDimitry Andric static bool
sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe * FOR,VPRecipeBase * Previous,VPDominatorTree & VPDT)7637fa27ce4SDimitry Andric sinkRecurrenceUsersAfterPrevious(VPFirstOrderRecurrencePHIRecipe *FOR,
7647fa27ce4SDimitry Andric                                  VPRecipeBase *Previous,
7657fa27ce4SDimitry Andric                                  VPDominatorTree &VPDT) {
7667fa27ce4SDimitry Andric   // Collect recipes that need sinking.
7677fa27ce4SDimitry Andric   SmallVector<VPRecipeBase *> WorkList;
7687fa27ce4SDimitry Andric   SmallPtrSet<VPRecipeBase *, 8> Seen;
7697fa27ce4SDimitry Andric   Seen.insert(Previous);
7707fa27ce4SDimitry Andric   auto TryToPushSinkCandidate = [&](VPRecipeBase *SinkCandidate) {
7717fa27ce4SDimitry Andric     // The previous value must not depend on the users of the recurrence phi. In
7727fa27ce4SDimitry Andric     // that case, FOR is not a fixed order recurrence.
7737fa27ce4SDimitry Andric     if (SinkCandidate == Previous)
7747fa27ce4SDimitry Andric       return false;
7757fa27ce4SDimitry Andric 
7767fa27ce4SDimitry Andric     if (isa<VPHeaderPHIRecipe>(SinkCandidate) ||
7777fa27ce4SDimitry Andric         !Seen.insert(SinkCandidate).second ||
7787fa27ce4SDimitry Andric         properlyDominates(Previous, SinkCandidate, VPDT))
7797fa27ce4SDimitry Andric       return true;
7807fa27ce4SDimitry Andric 
7817fa27ce4SDimitry Andric     if (SinkCandidate->mayHaveSideEffects())
7827fa27ce4SDimitry Andric       return false;
7837fa27ce4SDimitry Andric 
7847fa27ce4SDimitry Andric     WorkList.push_back(SinkCandidate);
7857fa27ce4SDimitry Andric     return true;
7867fa27ce4SDimitry Andric   };
7877fa27ce4SDimitry Andric 
7887fa27ce4SDimitry Andric   // Recursively sink users of FOR after Previous.
7897fa27ce4SDimitry Andric   WorkList.push_back(FOR);
7907fa27ce4SDimitry Andric   for (unsigned I = 0; I != WorkList.size(); ++I) {
7917fa27ce4SDimitry Andric     VPRecipeBase *Current = WorkList[I];
7927fa27ce4SDimitry Andric     assert(Current->getNumDefinedValues() == 1 &&
7937fa27ce4SDimitry Andric            "only recipes with a single defined value expected");
7947fa27ce4SDimitry Andric 
7957fa27ce4SDimitry Andric     for (VPUser *User : Current->getVPSingleValue()->users()) {
7967fa27ce4SDimitry Andric       if (auto *R = dyn_cast<VPRecipeBase>(User))
7977fa27ce4SDimitry Andric         if (!TryToPushSinkCandidate(R))
7987fa27ce4SDimitry Andric           return false;
7997fa27ce4SDimitry Andric     }
8007fa27ce4SDimitry Andric   }
8017fa27ce4SDimitry Andric 
8027fa27ce4SDimitry Andric   // Keep recipes to sink ordered by dominance so earlier instructions are
8037fa27ce4SDimitry Andric   // processed first.
8047fa27ce4SDimitry Andric   sort(WorkList, [&VPDT](const VPRecipeBase *A, const VPRecipeBase *B) {
8057fa27ce4SDimitry Andric     return properlyDominates(A, B, VPDT);
8067fa27ce4SDimitry Andric   });
8077fa27ce4SDimitry Andric 
8087fa27ce4SDimitry Andric   for (VPRecipeBase *SinkCandidate : WorkList) {
8097fa27ce4SDimitry Andric     if (SinkCandidate == FOR)
8107fa27ce4SDimitry Andric       continue;
8117fa27ce4SDimitry Andric 
8127fa27ce4SDimitry Andric     SinkCandidate->moveAfter(Previous);
8137fa27ce4SDimitry Andric     Previous = SinkCandidate;
8147fa27ce4SDimitry Andric   }
8157fa27ce4SDimitry Andric   return true;
8167fa27ce4SDimitry Andric }
8177fa27ce4SDimitry Andric 
adjustFixedOrderRecurrences(VPlan & Plan,VPBuilder & LoopBuilder)8187fa27ce4SDimitry Andric bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
819ac9a064cSDimitry Andric                                                   VPBuilder &LoopBuilder) {
8207fa27ce4SDimitry Andric   VPDominatorTree VPDT;
8217fa27ce4SDimitry Andric   VPDT.recalculate(Plan);
8227fa27ce4SDimitry Andric 
8237fa27ce4SDimitry Andric   SmallVector<VPFirstOrderRecurrencePHIRecipe *> RecurrencePhis;
8247fa27ce4SDimitry Andric   for (VPRecipeBase &R :
8257fa27ce4SDimitry Andric        Plan.getVectorLoopRegion()->getEntry()->getEntryBasicBlock()->phis())
8267fa27ce4SDimitry Andric     if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
8277fa27ce4SDimitry Andric       RecurrencePhis.push_back(FOR);
8287fa27ce4SDimitry Andric 
829ac9a064cSDimitry Andric   VPBasicBlock *MiddleVPBB =
830ac9a064cSDimitry Andric       cast<VPBasicBlock>(Plan.getVectorLoopRegion()->getSingleSuccessor());
831ac9a064cSDimitry Andric   VPBuilder MiddleBuilder;
832ac9a064cSDimitry Andric   // Set insert point so new recipes are inserted before terminator and
833ac9a064cSDimitry Andric   // condition, if there is either the former or both.
834ac9a064cSDimitry Andric   if (auto *Term =
835ac9a064cSDimitry Andric           dyn_cast_or_null<VPInstruction>(MiddleVPBB->getTerminator())) {
836ac9a064cSDimitry Andric     if (auto *Cmp = dyn_cast<VPInstruction>(Term->getOperand(0)))
837ac9a064cSDimitry Andric       MiddleBuilder.setInsertPoint(Cmp);
838ac9a064cSDimitry Andric     else
839ac9a064cSDimitry Andric       MiddleBuilder.setInsertPoint(Term);
840ac9a064cSDimitry Andric   } else
841ac9a064cSDimitry Andric     MiddleBuilder.setInsertPoint(MiddleVPBB);
842ac9a064cSDimitry Andric 
8437fa27ce4SDimitry Andric   for (VPFirstOrderRecurrencePHIRecipe *FOR : RecurrencePhis) {
8447fa27ce4SDimitry Andric     SmallPtrSet<VPFirstOrderRecurrencePHIRecipe *, 4> SeenPhis;
8457fa27ce4SDimitry Andric     VPRecipeBase *Previous = FOR->getBackedgeValue()->getDefiningRecipe();
8467fa27ce4SDimitry Andric     // Fixed-order recurrences do not contain cycles, so this loop is guaranteed
8477fa27ce4SDimitry Andric     // to terminate.
8487fa27ce4SDimitry Andric     while (auto *PrevPhi =
8497fa27ce4SDimitry Andric                dyn_cast_or_null<VPFirstOrderRecurrencePHIRecipe>(Previous)) {
8507fa27ce4SDimitry Andric       assert(PrevPhi->getParent() == FOR->getParent());
8517fa27ce4SDimitry Andric       assert(SeenPhis.insert(PrevPhi).second);
8527fa27ce4SDimitry Andric       Previous = PrevPhi->getBackedgeValue()->getDefiningRecipe();
8537fa27ce4SDimitry Andric     }
8547fa27ce4SDimitry Andric 
8557fa27ce4SDimitry Andric     if (!sinkRecurrenceUsersAfterPrevious(FOR, Previous, VPDT))
8567fa27ce4SDimitry Andric       return false;
8577fa27ce4SDimitry Andric 
8587fa27ce4SDimitry Andric     // Introduce a recipe to combine the incoming and previous values of a
8597fa27ce4SDimitry Andric     // fixed-order recurrence.
8607fa27ce4SDimitry Andric     VPBasicBlock *InsertBlock = Previous->getParent();
8617fa27ce4SDimitry Andric     if (isa<VPHeaderPHIRecipe>(Previous))
862ac9a064cSDimitry Andric       LoopBuilder.setInsertPoint(InsertBlock, InsertBlock->getFirstNonPhi());
8637fa27ce4SDimitry Andric     else
864ac9a064cSDimitry Andric       LoopBuilder.setInsertPoint(InsertBlock,
865ac9a064cSDimitry Andric                                  std::next(Previous->getIterator()));
8667fa27ce4SDimitry Andric 
8677fa27ce4SDimitry Andric     auto *RecurSplice = cast<VPInstruction>(
868ac9a064cSDimitry Andric         LoopBuilder.createNaryOp(VPInstruction::FirstOrderRecurrenceSplice,
8697fa27ce4SDimitry Andric                                  {FOR, FOR->getBackedgeValue()}));
8707fa27ce4SDimitry Andric 
8717fa27ce4SDimitry Andric     FOR->replaceAllUsesWith(RecurSplice);
8727fa27ce4SDimitry Andric     // Set the first operand of RecurSplice to FOR again, after replacing
8737fa27ce4SDimitry Andric     // all users.
8747fa27ce4SDimitry Andric     RecurSplice->setOperand(0, FOR);
875ac9a064cSDimitry Andric 
876ac9a064cSDimitry Andric     // This is the second phase of vectorizing first-order recurrences. An
877ac9a064cSDimitry Andric     // overview of the transformation is described below. Suppose we have the
878ac9a064cSDimitry Andric     // following loop with some use after the loop of the last a[i-1],
879ac9a064cSDimitry Andric     //
880ac9a064cSDimitry Andric     //   for (int i = 0; i < n; ++i) {
881ac9a064cSDimitry Andric     //     t = a[i - 1];
882ac9a064cSDimitry Andric     //     b[i] = a[i] - t;
883ac9a064cSDimitry Andric     //   }
884ac9a064cSDimitry Andric     //   use t;
885ac9a064cSDimitry Andric     //
886ac9a064cSDimitry Andric     // There is a first-order recurrence on "a". For this loop, the shorthand
887ac9a064cSDimitry Andric     // scalar IR looks like:
888ac9a064cSDimitry Andric     //
889ac9a064cSDimitry Andric     //   scalar.ph:
890ac9a064cSDimitry Andric     //     s_init = a[-1]
891ac9a064cSDimitry Andric     //     br scalar.body
892ac9a064cSDimitry Andric     //
893ac9a064cSDimitry Andric     //   scalar.body:
894ac9a064cSDimitry Andric     //     i = phi [0, scalar.ph], [i+1, scalar.body]
895ac9a064cSDimitry Andric     //     s1 = phi [s_init, scalar.ph], [s2, scalar.body]
896ac9a064cSDimitry Andric     //     s2 = a[i]
897ac9a064cSDimitry Andric     //     b[i] = s2 - s1
898ac9a064cSDimitry Andric     //     br cond, scalar.body, exit.block
899ac9a064cSDimitry Andric     //
900ac9a064cSDimitry Andric     //   exit.block:
901ac9a064cSDimitry Andric     //     use = lcssa.phi [s1, scalar.body]
902ac9a064cSDimitry Andric     //
903ac9a064cSDimitry Andric     // In this example, s1 is a recurrence because it's value depends on the
904ac9a064cSDimitry Andric     // previous iteration. In the first phase of vectorization, we created a
905ac9a064cSDimitry Andric     // vector phi v1 for s1. We now complete the vectorization and produce the
906ac9a064cSDimitry Andric     // shorthand vector IR shown below (for VF = 4, UF = 1).
907ac9a064cSDimitry Andric     //
908ac9a064cSDimitry Andric     //   vector.ph:
909ac9a064cSDimitry Andric     //     v_init = vector(..., ..., ..., a[-1])
910ac9a064cSDimitry Andric     //     br vector.body
911ac9a064cSDimitry Andric     //
912ac9a064cSDimitry Andric     //   vector.body
913ac9a064cSDimitry Andric     //     i = phi [0, vector.ph], [i+4, vector.body]
914ac9a064cSDimitry Andric     //     v1 = phi [v_init, vector.ph], [v2, vector.body]
915ac9a064cSDimitry Andric     //     v2 = a[i, i+1, i+2, i+3];
916ac9a064cSDimitry Andric     //     v3 = vector(v1(3), v2(0, 1, 2))
917ac9a064cSDimitry Andric     //     b[i, i+1, i+2, i+3] = v2 - v3
918ac9a064cSDimitry Andric     //     br cond, vector.body, middle.block
919ac9a064cSDimitry Andric     //
920ac9a064cSDimitry Andric     //   middle.block:
921ac9a064cSDimitry Andric     //     s_penultimate = v2(2) = v3(3)
922ac9a064cSDimitry Andric     //     s_resume = v2(3)
923ac9a064cSDimitry Andric     //     br cond, scalar.ph, exit.block
924ac9a064cSDimitry Andric     //
925ac9a064cSDimitry Andric     //   scalar.ph:
926ac9a064cSDimitry Andric     //     s_init' = phi [s_resume, middle.block], [s_init, otherwise]
927ac9a064cSDimitry Andric     //     br scalar.body
928ac9a064cSDimitry Andric     //
929ac9a064cSDimitry Andric     //   scalar.body:
930ac9a064cSDimitry Andric     //     i = phi [0, scalar.ph], [i+1, scalar.body]
931ac9a064cSDimitry Andric     //     s1 = phi [s_init', scalar.ph], [s2, scalar.body]
932ac9a064cSDimitry Andric     //     s2 = a[i]
933ac9a064cSDimitry Andric     //     b[i] = s2 - s1
934ac9a064cSDimitry Andric     //     br cond, scalar.body, exit.block
935ac9a064cSDimitry Andric     //
936ac9a064cSDimitry Andric     //   exit.block:
937ac9a064cSDimitry Andric     //     lo = lcssa.phi [s1, scalar.body], [s.penultimate, middle.block]
938ac9a064cSDimitry Andric     //
939ac9a064cSDimitry Andric     // After execution completes the vector loop, we extract the next value of
940ac9a064cSDimitry Andric     // the recurrence (x) to use as the initial value in the scalar loop. This
941ac9a064cSDimitry Andric     // is modeled by ExtractFromEnd.
942ac9a064cSDimitry Andric     Type *IntTy = Plan.getCanonicalIV()->getScalarType();
943ac9a064cSDimitry Andric 
944ac9a064cSDimitry Andric     // Extract the penultimate value of the recurrence and update VPLiveOut
945ac9a064cSDimitry Andric     // users of the recurrence splice. Note that the extract of the final value
946ac9a064cSDimitry Andric     // used to resume in the scalar loop is created earlier during VPlan
947ac9a064cSDimitry Andric     // construction.
948ac9a064cSDimitry Andric     auto *Penultimate = cast<VPInstruction>(MiddleBuilder.createNaryOp(
949ac9a064cSDimitry Andric         VPInstruction::ExtractFromEnd,
950ac9a064cSDimitry Andric         {FOR->getBackedgeValue(),
951ac9a064cSDimitry Andric          Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 2))},
952ac9a064cSDimitry Andric         {}, "vector.recur.extract.for.phi"));
953ac9a064cSDimitry Andric     RecurSplice->replaceUsesWithIf(
954ac9a064cSDimitry Andric         Penultimate, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });
9557fa27ce4SDimitry Andric   }
9567fa27ce4SDimitry Andric   return true;
9577fa27ce4SDimitry Andric }
9587fa27ce4SDimitry Andric 
collectUsersRecursively(VPValue * V)959ac9a064cSDimitry Andric static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {
960ac9a064cSDimitry Andric   SetVector<VPUser *> Users(V->user_begin(), V->user_end());
961ac9a064cSDimitry Andric   for (unsigned I = 0; I != Users.size(); ++I) {
962ac9a064cSDimitry Andric     VPRecipeBase *Cur = dyn_cast<VPRecipeBase>(Users[I]);
963ac9a064cSDimitry Andric     if (!Cur || isa<VPHeaderPHIRecipe>(Cur))
964ac9a064cSDimitry Andric       continue;
965ac9a064cSDimitry Andric     for (VPValue *V : Cur->definedValues())
966ac9a064cSDimitry Andric       Users.insert(V->user_begin(), V->user_end());
967ac9a064cSDimitry Andric   }
968ac9a064cSDimitry Andric   return Users.takeVector();
969ac9a064cSDimitry Andric }
970ac9a064cSDimitry Andric 
clearReductionWrapFlags(VPlan & Plan)9717fa27ce4SDimitry Andric void VPlanTransforms::clearReductionWrapFlags(VPlan &Plan) {
9727fa27ce4SDimitry Andric   for (VPRecipeBase &R :
9737fa27ce4SDimitry Andric        Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
9747fa27ce4SDimitry Andric     auto *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9757fa27ce4SDimitry Andric     if (!PhiR)
9767fa27ce4SDimitry Andric       continue;
9777fa27ce4SDimitry Andric     const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
9787fa27ce4SDimitry Andric     RecurKind RK = RdxDesc.getRecurrenceKind();
9797fa27ce4SDimitry Andric     if (RK != RecurKind::Add && RK != RecurKind::Mul)
9807fa27ce4SDimitry Andric       continue;
9817fa27ce4SDimitry Andric 
982ac9a064cSDimitry Andric     for (VPUser *U : collectUsersRecursively(PhiR))
983ac9a064cSDimitry Andric       if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(U)) {
9847fa27ce4SDimitry Andric         RecWithFlags->dropPoisonGeneratingFlags();
9857fa27ce4SDimitry Andric       }
9867fa27ce4SDimitry Andric   }
9877fa27ce4SDimitry Andric }
988b1c73532SDimitry Andric 
989b1c73532SDimitry Andric /// Try to simplify recipe \p R.
simplifyRecipe(VPRecipeBase & R,VPTypeAnalysis & TypeInfo)990b1c73532SDimitry Andric static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
991ac9a064cSDimitry Andric   using namespace llvm::VPlanPatternMatch;
992ac9a064cSDimitry Andric   // Try to remove redundant blend recipes.
993ac9a064cSDimitry Andric   if (auto *Blend = dyn_cast<VPBlendRecipe>(&R)) {
994ac9a064cSDimitry Andric     VPValue *Inc0 = Blend->getIncomingValue(0);
995ac9a064cSDimitry Andric     for (unsigned I = 1; I != Blend->getNumIncomingValues(); ++I)
996ac9a064cSDimitry Andric       if (Inc0 != Blend->getIncomingValue(I) &&
997ac9a064cSDimitry Andric           !match(Blend->getMask(I), m_False()))
998ac9a064cSDimitry Andric         return;
999ac9a064cSDimitry Andric     Blend->replaceAllUsesWith(Inc0);
1000ac9a064cSDimitry Andric     Blend->eraseFromParent();
1001ac9a064cSDimitry Andric     return;
1002b1c73532SDimitry Andric   }
1003ac9a064cSDimitry Andric 
1004ac9a064cSDimitry Andric   VPValue *A;
1005ac9a064cSDimitry Andric   if (match(&R, m_Trunc(m_ZExtOrSExt(m_VPValue(A))))) {
1006b1c73532SDimitry Andric     VPValue *Trunc = R.getVPSingleValue();
1007b1c73532SDimitry Andric     Type *TruncTy = TypeInfo.inferScalarType(Trunc);
1008b1c73532SDimitry Andric     Type *ATy = TypeInfo.inferScalarType(A);
1009b1c73532SDimitry Andric     if (TruncTy == ATy) {
1010b1c73532SDimitry Andric       Trunc->replaceAllUsesWith(A);
1011aca2e42cSDimitry Andric     } else {
1012aca2e42cSDimitry Andric       // Don't replace a scalarizing recipe with a widened cast.
1013aca2e42cSDimitry Andric       if (isa<VPReplicateRecipe>(&R))
1014ac9a064cSDimitry Andric         return;
1015aca2e42cSDimitry Andric       if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
1016ac9a064cSDimitry Andric 
1017ac9a064cSDimitry Andric         unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
1018ac9a064cSDimitry Andric                                  ? Instruction::SExt
1019ac9a064cSDimitry Andric                                  : Instruction::ZExt;
1020b1c73532SDimitry Andric         auto *VPC =
1021b1c73532SDimitry Andric             new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
1022ac9a064cSDimitry Andric         if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
1023ac9a064cSDimitry Andric           // UnderlyingExt has distinct return type, used to retain legacy cost.
1024ac9a064cSDimitry Andric           VPC->setUnderlyingValue(UnderlyingExt);
1025ac9a064cSDimitry Andric         }
1026b1c73532SDimitry Andric         VPC->insertBefore(&R);
1027b1c73532SDimitry Andric         Trunc->replaceAllUsesWith(VPC);
1028b1c73532SDimitry Andric       } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
1029b1c73532SDimitry Andric         auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
1030b1c73532SDimitry Andric         VPC->insertBefore(&R);
1031b1c73532SDimitry Andric         Trunc->replaceAllUsesWith(VPC);
1032b1c73532SDimitry Andric       }
1033aca2e42cSDimitry Andric     }
1034b1c73532SDimitry Andric #ifndef NDEBUG
1035b1c73532SDimitry Andric     // Verify that the cached type info is for both A and its users is still
1036b1c73532SDimitry Andric     // accurate by comparing it to freshly computed types.
1037ac9a064cSDimitry Andric     VPTypeAnalysis TypeInfo2(
1038ac9a064cSDimitry Andric         R.getParent()->getPlan()->getCanonicalIV()->getScalarType(),
1039ac9a064cSDimitry Andric         TypeInfo.getContext());
1040b1c73532SDimitry Andric     assert(TypeInfo.inferScalarType(A) == TypeInfo2.inferScalarType(A));
1041b1c73532SDimitry Andric     for (VPUser *U : A->users()) {
1042b1c73532SDimitry Andric       auto *R = dyn_cast<VPRecipeBase>(U);
1043b1c73532SDimitry Andric       if (!R)
1044b1c73532SDimitry Andric         continue;
1045b1c73532SDimitry Andric       for (VPValue *VPV : R->definedValues())
1046b1c73532SDimitry Andric         assert(TypeInfo.inferScalarType(VPV) == TypeInfo2.inferScalarType(VPV));
1047b1c73532SDimitry Andric     }
1048b1c73532SDimitry Andric #endif
1049b1c73532SDimitry Andric   }
1050ac9a064cSDimitry Andric 
1051ac9a064cSDimitry Andric   // Simplify (X && Y) || (X && !Y) -> X.
1052ac9a064cSDimitry Andric   // TODO: Split up into simpler, modular combines: (X && Y) || (X && Z) into X
1053ac9a064cSDimitry Andric   // && (Y || Z) and (X || !X) into true. This requires queuing newly created
1054ac9a064cSDimitry Andric   // recipes to be visited during simplification.
1055ac9a064cSDimitry Andric   VPValue *X, *Y, *X1, *Y1;
1056ac9a064cSDimitry Andric   if (match(&R,
1057ac9a064cSDimitry Andric             m_c_BinaryOr(m_LogicalAnd(m_VPValue(X), m_VPValue(Y)),
1058ac9a064cSDimitry Andric                          m_LogicalAnd(m_VPValue(X1), m_Not(m_VPValue(Y1))))) &&
1059ac9a064cSDimitry Andric       X == X1 && Y == Y1) {
1060ac9a064cSDimitry Andric     R.getVPSingleValue()->replaceAllUsesWith(X);
1061ac9a064cSDimitry Andric     return;
1062b1c73532SDimitry Andric   }
1063ac9a064cSDimitry Andric 
1064ac9a064cSDimitry Andric   if (match(&R, m_c_Mul(m_VPValue(A), m_SpecificInt(1))))
1065ac9a064cSDimitry Andric     return R.getVPSingleValue()->replaceAllUsesWith(A);
1066b1c73532SDimitry Andric }
1067b1c73532SDimitry Andric 
1068b1c73532SDimitry Andric /// Try to simplify the recipes in \p Plan.
simplifyRecipes(VPlan & Plan,LLVMContext & Ctx)1069b1c73532SDimitry Andric static void simplifyRecipes(VPlan &Plan, LLVMContext &Ctx) {
1070b1c73532SDimitry Andric   ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
1071b1c73532SDimitry Andric       Plan.getEntry());
1072ac9a064cSDimitry Andric   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
1073b1c73532SDimitry Andric   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {
1074b1c73532SDimitry Andric     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1075b1c73532SDimitry Andric       simplifyRecipe(R, TypeInfo);
1076b1c73532SDimitry Andric     }
1077b1c73532SDimitry Andric   }
1078b1c73532SDimitry Andric }
1079b1c73532SDimitry Andric 
truncateToMinimalBitwidths(VPlan & Plan,const MapVector<Instruction *,uint64_t> & MinBWs,LLVMContext & Ctx)1080b1c73532SDimitry Andric void VPlanTransforms::truncateToMinimalBitwidths(
1081b1c73532SDimitry Andric     VPlan &Plan, const MapVector<Instruction *, uint64_t> &MinBWs,
1082b1c73532SDimitry Andric     LLVMContext &Ctx) {
1083b1c73532SDimitry Andric #ifndef NDEBUG
1084b1c73532SDimitry Andric   // Count the processed recipes and cross check the count later with MinBWs
1085b1c73532SDimitry Andric   // size, to make sure all entries in MinBWs have been handled.
1086b1c73532SDimitry Andric   unsigned NumProcessedRecipes = 0;
1087b1c73532SDimitry Andric #endif
1088b1c73532SDimitry Andric   // Keep track of created truncates, so they can be re-used. Note that we
1089b1c73532SDimitry Andric   // cannot use RAUW after creating a new truncate, as this would could make
1090b1c73532SDimitry Andric   // other uses have different types for their operands, making them invalidly
1091b1c73532SDimitry Andric   // typed.
1092b1c73532SDimitry Andric   DenseMap<VPValue *, VPWidenCastRecipe *> ProcessedTruncs;
1093ac9a064cSDimitry Andric   VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType(), Ctx);
1094b1c73532SDimitry Andric   VPBasicBlock *PH = Plan.getEntry();
1095b1c73532SDimitry Andric   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
1096b1c73532SDimitry Andric            vp_depth_first_deep(Plan.getVectorLoopRegion()))) {
1097b1c73532SDimitry Andric     for (VPRecipeBase &R : make_early_inc_range(*VPBB)) {
1098b1c73532SDimitry Andric       if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPReplicateRecipe,
1099ac9a064cSDimitry Andric                VPWidenSelectRecipe, VPWidenLoadRecipe>(&R))
1100b1c73532SDimitry Andric         continue;
1101b1c73532SDimitry Andric 
1102b1c73532SDimitry Andric       VPValue *ResultVPV = R.getVPSingleValue();
1103b1c73532SDimitry Andric       auto *UI = cast_or_null<Instruction>(ResultVPV->getUnderlyingValue());
1104b1c73532SDimitry Andric       unsigned NewResSizeInBits = MinBWs.lookup(UI);
1105b1c73532SDimitry Andric       if (!NewResSizeInBits)
1106b1c73532SDimitry Andric         continue;
1107b1c73532SDimitry Andric 
1108b1c73532SDimitry Andric #ifndef NDEBUG
1109b1c73532SDimitry Andric       NumProcessedRecipes++;
1110b1c73532SDimitry Andric #endif
1111b1c73532SDimitry Andric       // If the value wasn't vectorized, we must maintain the original scalar
1112b1c73532SDimitry Andric       // type. Skip those here, after incrementing NumProcessedRecipes. Also
1113b1c73532SDimitry Andric       // skip casts which do not need to be handled explicitly here, as
1114b1c73532SDimitry Andric       // redundant casts will be removed during recipe simplification.
1115b1c73532SDimitry Andric       if (isa<VPReplicateRecipe, VPWidenCastRecipe>(&R)) {
1116b1c73532SDimitry Andric #ifndef NDEBUG
1117b1c73532SDimitry Andric         // If any of the operands is a live-in and not used by VPWidenRecipe or
1118b1c73532SDimitry Andric         // VPWidenSelectRecipe, but in MinBWs, make sure it is counted as
1119b1c73532SDimitry Andric         // processed as well. When MinBWs is currently constructed, there is no
1120b1c73532SDimitry Andric         // information about whether recipes are widened or replicated and in
1121b1c73532SDimitry Andric         // case they are reciplicated the operands are not truncated. Counting
1122b1c73532SDimitry Andric         // them them here ensures we do not miss any recipes in MinBWs.
1123b1c73532SDimitry Andric         // TODO: Remove once the analysis is done on VPlan.
1124b1c73532SDimitry Andric         for (VPValue *Op : R.operands()) {
1125b1c73532SDimitry Andric           if (!Op->isLiveIn())
1126b1c73532SDimitry Andric             continue;
1127b1c73532SDimitry Andric           auto *UV = dyn_cast_or_null<Instruction>(Op->getUnderlyingValue());
1128b1c73532SDimitry Andric           if (UV && MinBWs.contains(UV) && !ProcessedTruncs.contains(Op) &&
1129b1c73532SDimitry Andric               all_of(Op->users(), [](VPUser *U) {
1130b1c73532SDimitry Andric                 return !isa<VPWidenRecipe, VPWidenSelectRecipe>(U);
1131b1c73532SDimitry Andric               })) {
1132b1c73532SDimitry Andric             // Add an entry to ProcessedTruncs to avoid counting the same
1133b1c73532SDimitry Andric             // operand multiple times.
1134b1c73532SDimitry Andric             ProcessedTruncs[Op] = nullptr;
1135b1c73532SDimitry Andric             NumProcessedRecipes += 1;
1136b1c73532SDimitry Andric           }
1137b1c73532SDimitry Andric         }
1138b1c73532SDimitry Andric #endif
1139b1c73532SDimitry Andric         continue;
1140b1c73532SDimitry Andric       }
1141b1c73532SDimitry Andric 
1142b1c73532SDimitry Andric       Type *OldResTy = TypeInfo.inferScalarType(ResultVPV);
1143b1c73532SDimitry Andric       unsigned OldResSizeInBits = OldResTy->getScalarSizeInBits();
1144b1c73532SDimitry Andric       assert(OldResTy->isIntegerTy() && "only integer types supported");
1145b1c73532SDimitry Andric       (void)OldResSizeInBits;
1146b1c73532SDimitry Andric 
1147b1c73532SDimitry Andric       auto *NewResTy = IntegerType::get(Ctx, NewResSizeInBits);
1148b1c73532SDimitry Andric 
11494df029ccSDimitry Andric       // Any wrapping introduced by shrinking this operation shouldn't be
11504df029ccSDimitry Andric       // considered undefined behavior. So, we can't unconditionally copy
11514df029ccSDimitry Andric       // arithmetic wrapping flags to VPW.
11524df029ccSDimitry Andric       if (auto *VPW = dyn_cast<VPRecipeWithIRFlags>(&R))
11534df029ccSDimitry Andric         VPW->dropPoisonGeneratingFlags();
11544df029ccSDimitry Andric 
1155ac9a064cSDimitry Andric       using namespace llvm::VPlanPatternMatch;
1156ac9a064cSDimitry Andric       if (OldResSizeInBits != NewResSizeInBits &&
1157ac9a064cSDimitry Andric           !match(&R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue()))) {
11584df029ccSDimitry Andric         // Extend result to original width.
1159ac9a064cSDimitry Andric         auto *Ext =
1160ac9a064cSDimitry Andric             new VPWidenCastRecipe(Instruction::ZExt, ResultVPV, OldResTy);
11614df029ccSDimitry Andric         Ext->insertAfter(&R);
11624df029ccSDimitry Andric         ResultVPV->replaceAllUsesWith(Ext);
11634df029ccSDimitry Andric         Ext->setOperand(0, ResultVPV);
1164ac9a064cSDimitry Andric         assert(OldResSizeInBits > NewResSizeInBits && "Nothing to shrink?");
1165ac9a064cSDimitry Andric       } else
1166ac9a064cSDimitry Andric         assert(
1167ac9a064cSDimitry Andric             match(&R, m_Binary<Instruction::ICmp>(m_VPValue(), m_VPValue())) &&
1168ac9a064cSDimitry Andric             "Only ICmps should not need extending the result.");
11694df029ccSDimitry Andric 
1170ac9a064cSDimitry Andric       assert(!isa<VPWidenStoreRecipe>(&R) && "stores cannot be narrowed");
1171ac9a064cSDimitry Andric       if (isa<VPWidenLoadRecipe>(&R))
11724df029ccSDimitry Andric         continue;
11734df029ccSDimitry Andric 
1174b1c73532SDimitry Andric       // Shrink operands by introducing truncates as needed.
1175b1c73532SDimitry Andric       unsigned StartIdx = isa<VPWidenSelectRecipe>(&R) ? 1 : 0;
1176b1c73532SDimitry Andric       for (unsigned Idx = StartIdx; Idx != R.getNumOperands(); ++Idx) {
1177b1c73532SDimitry Andric         auto *Op = R.getOperand(Idx);
1178b1c73532SDimitry Andric         unsigned OpSizeInBits =
1179b1c73532SDimitry Andric             TypeInfo.inferScalarType(Op)->getScalarSizeInBits();
1180b1c73532SDimitry Andric         if (OpSizeInBits == NewResSizeInBits)
1181b1c73532SDimitry Andric           continue;
1182b1c73532SDimitry Andric         assert(OpSizeInBits > NewResSizeInBits && "nothing to truncate");
1183b1c73532SDimitry Andric         auto [ProcessedIter, IterIsEmpty] =
1184b1c73532SDimitry Andric             ProcessedTruncs.insert({Op, nullptr});
1185b1c73532SDimitry Andric         VPWidenCastRecipe *NewOp =
1186b1c73532SDimitry Andric             IterIsEmpty
1187b1c73532SDimitry Andric                 ? new VPWidenCastRecipe(Instruction::Trunc, Op, NewResTy)
1188b1c73532SDimitry Andric                 : ProcessedIter->second;
1189b1c73532SDimitry Andric         R.setOperand(Idx, NewOp);
1190b1c73532SDimitry Andric         if (!IterIsEmpty)
1191b1c73532SDimitry Andric           continue;
1192b1c73532SDimitry Andric         ProcessedIter->second = NewOp;
1193b1c73532SDimitry Andric         if (!Op->isLiveIn()) {
1194b1c73532SDimitry Andric           NewOp->insertBefore(&R);
1195b1c73532SDimitry Andric         } else {
1196b1c73532SDimitry Andric           PH->appendRecipe(NewOp);
1197b1c73532SDimitry Andric #ifndef NDEBUG
1198b1c73532SDimitry Andric           auto *OpInst = dyn_cast<Instruction>(Op->getLiveInIRValue());
1199b1c73532SDimitry Andric           bool IsContained = MinBWs.contains(OpInst);
1200b1c73532SDimitry Andric           NumProcessedRecipes += IsContained;
1201b1c73532SDimitry Andric #endif
1202b1c73532SDimitry Andric         }
1203b1c73532SDimitry Andric       }
1204b1c73532SDimitry Andric 
1205b1c73532SDimitry Andric     }
1206b1c73532SDimitry Andric   }
1207b1c73532SDimitry Andric 
1208b1c73532SDimitry Andric   assert(MinBWs.size() == NumProcessedRecipes &&
1209b1c73532SDimitry Andric          "some entries in MinBWs haven't been processed");
1210b1c73532SDimitry Andric }
1211b1c73532SDimitry Andric 
optimize(VPlan & Plan,ScalarEvolution & SE)1212b1c73532SDimitry Andric void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
1213b1c73532SDimitry Andric   removeRedundantCanonicalIVs(Plan);
1214b1c73532SDimitry Andric   removeRedundantInductionCasts(Plan);
1215b1c73532SDimitry Andric 
1216b1c73532SDimitry Andric   simplifyRecipes(Plan, SE.getContext());
1217ac9a064cSDimitry Andric   legalizeAndOptimizeInductions(Plan, SE);
1218b1c73532SDimitry Andric   removeDeadRecipes(Plan);
1219b1c73532SDimitry Andric 
1220b1c73532SDimitry Andric   createAndOptimizeReplicateRegions(Plan);
1221b1c73532SDimitry Andric 
1222b1c73532SDimitry Andric   removeRedundantExpandSCEVRecipes(Plan);
1223b1c73532SDimitry Andric   mergeBlocksIntoPredecessors(Plan);
1224b1c73532SDimitry Andric }
1225b1c73532SDimitry Andric 
1226b1c73532SDimitry Andric // Add a VPActiveLaneMaskPHIRecipe and related recipes to \p Plan and replace
1227b1c73532SDimitry Andric // the loop terminator with a branch-on-cond recipe with the negated
1228b1c73532SDimitry Andric // active-lane-mask as operand. Note that this turns the loop into an
1229b1c73532SDimitry Andric // uncountable one. Only the existing terminator is replaced, all other existing
1230b1c73532SDimitry Andric // recipes/users remain unchanged, except for poison-generating flags being
1231b1c73532SDimitry Andric // dropped from the canonical IV increment. Return the created
1232b1c73532SDimitry Andric // VPActiveLaneMaskPHIRecipe.
1233b1c73532SDimitry Andric //
1234b1c73532SDimitry Andric // The function uses the following definitions:
1235b1c73532SDimitry Andric //
1236b1c73532SDimitry Andric //  %TripCount = DataWithControlFlowWithoutRuntimeCheck ?
1237b1c73532SDimitry Andric //    calculate-trip-count-minus-VF (original TC) : original TC
1238b1c73532SDimitry Andric //  %IncrementValue = DataWithControlFlowWithoutRuntimeCheck ?
1239b1c73532SDimitry Andric //     CanonicalIVPhi : CanonicalIVIncrement
1240b1c73532SDimitry Andric //  %StartV is the canonical induction start value.
1241b1c73532SDimitry Andric //
1242b1c73532SDimitry Andric // The function adds the following recipes:
1243b1c73532SDimitry Andric //
1244b1c73532SDimitry Andric // vector.ph:
1245b1c73532SDimitry Andric //   %TripCount = calculate-trip-count-minus-VF (original TC)
1246b1c73532SDimitry Andric //       [if DataWithControlFlowWithoutRuntimeCheck]
1247b1c73532SDimitry Andric //   %EntryInc = canonical-iv-increment-for-part %StartV
1248b1c73532SDimitry Andric //   %EntryALM = active-lane-mask %EntryInc, %TripCount
1249b1c73532SDimitry Andric //
1250b1c73532SDimitry Andric // vector.body:
1251b1c73532SDimitry Andric //   ...
1252b1c73532SDimitry Andric //   %P = active-lane-mask-phi [ %EntryALM, %vector.ph ], [ %ALM, %vector.body ]
1253b1c73532SDimitry Andric //   ...
1254b1c73532SDimitry Andric //   %InLoopInc = canonical-iv-increment-for-part %IncrementValue
1255b1c73532SDimitry Andric //   %ALM = active-lane-mask %InLoopInc, TripCount
1256b1c73532SDimitry Andric //   %Negated = Not %ALM
1257b1c73532SDimitry Andric //   branch-on-cond %Negated
1258b1c73532SDimitry Andric //
addVPLaneMaskPhiAndUpdateExitBranch(VPlan & Plan,bool DataAndControlFlowWithoutRuntimeCheck)1259b1c73532SDimitry Andric static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
1260b1c73532SDimitry Andric     VPlan &Plan, bool DataAndControlFlowWithoutRuntimeCheck) {
1261b1c73532SDimitry Andric   VPRegionBlock *TopRegion = Plan.getVectorLoopRegion();
1262b1c73532SDimitry Andric   VPBasicBlock *EB = TopRegion->getExitingBasicBlock();
1263b1c73532SDimitry Andric   auto *CanonicalIVPHI = Plan.getCanonicalIV();
1264b1c73532SDimitry Andric   VPValue *StartV = CanonicalIVPHI->getStartValue();
1265b1c73532SDimitry Andric 
1266b1c73532SDimitry Andric   auto *CanonicalIVIncrement =
1267b1c73532SDimitry Andric       cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
1268b1c73532SDimitry Andric   // TODO: Check if dropping the flags is needed if
1269b1c73532SDimitry Andric   // !DataAndControlFlowWithoutRuntimeCheck.
1270b1c73532SDimitry Andric   CanonicalIVIncrement->dropPoisonGeneratingFlags();
1271b1c73532SDimitry Andric   DebugLoc DL = CanonicalIVIncrement->getDebugLoc();
1272b1c73532SDimitry Andric   // We can't use StartV directly in the ActiveLaneMask VPInstruction, since
1273b1c73532SDimitry Andric   // we have to take unrolling into account. Each part needs to start at
1274b1c73532SDimitry Andric   //   Part * VF
1275b1c73532SDimitry Andric   auto *VecPreheader = cast<VPBasicBlock>(TopRegion->getSinglePredecessor());
1276b1c73532SDimitry Andric   VPBuilder Builder(VecPreheader);
1277b1c73532SDimitry Andric 
1278b1c73532SDimitry Andric   // Create the ActiveLaneMask instruction using the correct start values.
1279b1c73532SDimitry Andric   VPValue *TC = Plan.getTripCount();
1280b1c73532SDimitry Andric 
1281b1c73532SDimitry Andric   VPValue *TripCount, *IncrementValue;
1282b1c73532SDimitry Andric   if (!DataAndControlFlowWithoutRuntimeCheck) {
1283b1c73532SDimitry Andric     // When the loop is guarded by a runtime overflow check for the loop
1284b1c73532SDimitry Andric     // induction variable increment by VF, we can increment the value before
1285b1c73532SDimitry Andric     // the get.active.lane mask and use the unmodified tripcount.
1286b1c73532SDimitry Andric     IncrementValue = CanonicalIVIncrement;
1287b1c73532SDimitry Andric     TripCount = TC;
1288b1c73532SDimitry Andric   } else {
1289b1c73532SDimitry Andric     // When avoiding a runtime check, the active.lane.mask inside the loop
1290b1c73532SDimitry Andric     // uses a modified trip count and the induction variable increment is
1291b1c73532SDimitry Andric     // done after the active.lane.mask intrinsic is called.
1292b1c73532SDimitry Andric     IncrementValue = CanonicalIVPHI;
1293b1c73532SDimitry Andric     TripCount = Builder.createNaryOp(VPInstruction::CalculateTripCountMinusVF,
1294b1c73532SDimitry Andric                                      {TC}, DL);
1295b1c73532SDimitry Andric   }
1296b1c73532SDimitry Andric   auto *EntryIncrement = Builder.createOverflowingOp(
1297b1c73532SDimitry Andric       VPInstruction::CanonicalIVIncrementForPart, {StartV}, {false, false}, DL,
1298b1c73532SDimitry Andric       "index.part.next");
1299b1c73532SDimitry Andric 
1300b1c73532SDimitry Andric   // Create the active lane mask instruction in the VPlan preheader.
1301b1c73532SDimitry Andric   auto *EntryALM =
1302b1c73532SDimitry Andric       Builder.createNaryOp(VPInstruction::ActiveLaneMask, {EntryIncrement, TC},
1303b1c73532SDimitry Andric                            DL, "active.lane.mask.entry");
1304b1c73532SDimitry Andric 
1305b1c73532SDimitry Andric   // Now create the ActiveLaneMaskPhi recipe in the main loop using the
1306b1c73532SDimitry Andric   // preheader ActiveLaneMask instruction.
1307b1c73532SDimitry Andric   auto LaneMaskPhi = new VPActiveLaneMaskPHIRecipe(EntryALM, DebugLoc());
1308b1c73532SDimitry Andric   LaneMaskPhi->insertAfter(CanonicalIVPHI);
1309b1c73532SDimitry Andric 
1310b1c73532SDimitry Andric   // Create the active lane mask for the next iteration of the loop before the
1311b1c73532SDimitry Andric   // original terminator.
1312b1c73532SDimitry Andric   VPRecipeBase *OriginalTerminator = EB->getTerminator();
1313b1c73532SDimitry Andric   Builder.setInsertPoint(OriginalTerminator);
1314b1c73532SDimitry Andric   auto *InLoopIncrement =
1315b1c73532SDimitry Andric       Builder.createOverflowingOp(VPInstruction::CanonicalIVIncrementForPart,
1316b1c73532SDimitry Andric                                   {IncrementValue}, {false, false}, DL);
1317b1c73532SDimitry Andric   auto *ALM = Builder.createNaryOp(VPInstruction::ActiveLaneMask,
1318b1c73532SDimitry Andric                                    {InLoopIncrement, TripCount}, DL,
1319b1c73532SDimitry Andric                                    "active.lane.mask.next");
1320b1c73532SDimitry Andric   LaneMaskPhi->addOperand(ALM);
1321b1c73532SDimitry Andric 
1322b1c73532SDimitry Andric   // Replace the original terminator with BranchOnCond. We have to invert the
1323b1c73532SDimitry Andric   // mask here because a true condition means jumping to the exit block.
1324b1c73532SDimitry Andric   auto *NotMask = Builder.createNot(ALM, DL);
1325b1c73532SDimitry Andric   Builder.createNaryOp(VPInstruction::BranchOnCond, {NotMask}, DL);
1326b1c73532SDimitry Andric   OriginalTerminator->eraseFromParent();
1327b1c73532SDimitry Andric   return LaneMaskPhi;
1328b1c73532SDimitry Andric }
1329b1c73532SDimitry Andric 
1330ac9a064cSDimitry Andric /// Collect all VPValues representing a header mask through the (ICMP_ULE,
1331ac9a064cSDimitry Andric /// WideCanonicalIV, backedge-taken-count) pattern.
1332ac9a064cSDimitry Andric /// TODO: Introduce explicit recipe for header-mask instead of searching
1333ac9a064cSDimitry Andric /// for the header-mask pattern manually.
collectAllHeaderMasks(VPlan & Plan)1334ac9a064cSDimitry Andric static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
1335ac9a064cSDimitry Andric   SmallVector<VPValue *> WideCanonicalIVs;
1336ac9a064cSDimitry Andric   auto *FoundWidenCanonicalIVUser =
1337ac9a064cSDimitry Andric       find_if(Plan.getCanonicalIV()->users(),
1338ac9a064cSDimitry Andric               [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
1339ac9a064cSDimitry Andric   assert(count_if(Plan.getCanonicalIV()->users(),
1340ac9a064cSDimitry Andric                   [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }) <=
1341ac9a064cSDimitry Andric              1 &&
1342ac9a064cSDimitry Andric          "Must have at most one VPWideCanonicalIVRecipe");
1343ac9a064cSDimitry Andric   if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) {
1344ac9a064cSDimitry Andric     auto *WideCanonicalIV =
1345ac9a064cSDimitry Andric         cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
1346ac9a064cSDimitry Andric     WideCanonicalIVs.push_back(WideCanonicalIV);
1347ac9a064cSDimitry Andric   }
1348ac9a064cSDimitry Andric 
1349ac9a064cSDimitry Andric   // Also include VPWidenIntOrFpInductionRecipes that represent a widened
1350ac9a064cSDimitry Andric   // version of the canonical induction.
1351ac9a064cSDimitry Andric   VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
1352ac9a064cSDimitry Andric   for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
1353ac9a064cSDimitry Andric     auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
1354ac9a064cSDimitry Andric     if (WidenOriginalIV && WidenOriginalIV->isCanonical())
1355ac9a064cSDimitry Andric       WideCanonicalIVs.push_back(WidenOriginalIV);
1356ac9a064cSDimitry Andric   }
1357ac9a064cSDimitry Andric 
1358ac9a064cSDimitry Andric   // Walk users of wide canonical IVs and collect to all compares of the form
1359ac9a064cSDimitry Andric   // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
1360ac9a064cSDimitry Andric   SmallVector<VPValue *> HeaderMasks;
1361ac9a064cSDimitry Andric   for (auto *Wide : WideCanonicalIVs) {
1362ac9a064cSDimitry Andric     for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
1363ac9a064cSDimitry Andric       auto *HeaderMask = dyn_cast<VPInstruction>(U);
1364ac9a064cSDimitry Andric       if (!HeaderMask || !vputils::isHeaderMask(HeaderMask, Plan))
1365ac9a064cSDimitry Andric         continue;
1366ac9a064cSDimitry Andric 
1367ac9a064cSDimitry Andric       assert(HeaderMask->getOperand(0) == Wide &&
1368ac9a064cSDimitry Andric              "WidenCanonicalIV must be the first operand of the compare");
1369ac9a064cSDimitry Andric       HeaderMasks.push_back(HeaderMask);
1370ac9a064cSDimitry Andric     }
1371ac9a064cSDimitry Andric   }
1372ac9a064cSDimitry Andric   return HeaderMasks;
1373ac9a064cSDimitry Andric }
1374ac9a064cSDimitry Andric 
addActiveLaneMask(VPlan & Plan,bool UseActiveLaneMaskForControlFlow,bool DataAndControlFlowWithoutRuntimeCheck)1375b1c73532SDimitry Andric void VPlanTransforms::addActiveLaneMask(
1376b1c73532SDimitry Andric     VPlan &Plan, bool UseActiveLaneMaskForControlFlow,
1377b1c73532SDimitry Andric     bool DataAndControlFlowWithoutRuntimeCheck) {
1378b1c73532SDimitry Andric   assert((!DataAndControlFlowWithoutRuntimeCheck ||
1379b1c73532SDimitry Andric           UseActiveLaneMaskForControlFlow) &&
1380b1c73532SDimitry Andric          "DataAndControlFlowWithoutRuntimeCheck implies "
1381b1c73532SDimitry Andric          "UseActiveLaneMaskForControlFlow");
1382b1c73532SDimitry Andric 
1383b1c73532SDimitry Andric   auto FoundWidenCanonicalIVUser =
1384b1c73532SDimitry Andric       find_if(Plan.getCanonicalIV()->users(),
1385b1c73532SDimitry Andric               [](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
1386b1c73532SDimitry Andric   assert(FoundWidenCanonicalIVUser &&
1387b1c73532SDimitry Andric          "Must have widened canonical IV when tail folding!");
1388b1c73532SDimitry Andric   auto *WideCanonicalIV =
1389b1c73532SDimitry Andric       cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
13904df029ccSDimitry Andric   VPSingleDefRecipe *LaneMask;
1391b1c73532SDimitry Andric   if (UseActiveLaneMaskForControlFlow) {
1392b1c73532SDimitry Andric     LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(
1393b1c73532SDimitry Andric         Plan, DataAndControlFlowWithoutRuntimeCheck);
1394b1c73532SDimitry Andric   } else {
1395ac9a064cSDimitry Andric     VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
1396ac9a064cSDimitry Andric     LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask,
1397ac9a064cSDimitry Andric                               {WideCanonicalIV, Plan.getTripCount()}, nullptr,
1398ac9a064cSDimitry Andric                               "active.lane.mask");
1399b1c73532SDimitry Andric   }
1400b1c73532SDimitry Andric 
1401b1c73532SDimitry Andric   // Walk users of WideCanonicalIV and replace all compares of the form
1402b1c73532SDimitry Andric   // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
1403b1c73532SDimitry Andric   // active-lane-mask.
1404ac9a064cSDimitry Andric   for (VPValue *HeaderMask : collectAllHeaderMasks(Plan))
1405ac9a064cSDimitry Andric     HeaderMask->replaceAllUsesWith(LaneMask);
1406ac9a064cSDimitry Andric }
1407ac9a064cSDimitry Andric 
1408ac9a064cSDimitry Andric /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
1409ac9a064cSDimitry Andric /// replaces all uses except the canonical IV increment of
1410ac9a064cSDimitry Andric /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
1411ac9a064cSDimitry Andric /// is used only for loop iterations counting after this transformation.
1412ac9a064cSDimitry Andric ///
1413ac9a064cSDimitry Andric /// The function uses the following definitions:
1414ac9a064cSDimitry Andric ///  %StartV is the canonical induction start value.
1415ac9a064cSDimitry Andric ///
1416ac9a064cSDimitry Andric /// The function adds the following recipes:
1417ac9a064cSDimitry Andric ///
1418ac9a064cSDimitry Andric /// vector.ph:
1419ac9a064cSDimitry Andric /// ...
1420ac9a064cSDimitry Andric ///
1421ac9a064cSDimitry Andric /// vector.body:
1422ac9a064cSDimitry Andric /// ...
1423ac9a064cSDimitry Andric /// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ],
1424ac9a064cSDimitry Andric ///                                               [ %NextEVLIV, %vector.body ]
1425ac9a064cSDimitry Andric /// %VPEVL = EXPLICIT-VECTOR-LENGTH %EVLPhi, original TC
1426ac9a064cSDimitry Andric /// ...
1427ac9a064cSDimitry Andric /// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi
1428ac9a064cSDimitry Andric /// ...
1429ac9a064cSDimitry Andric ///
tryAddExplicitVectorLength(VPlan & Plan)1430ac9a064cSDimitry Andric bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
1431ac9a064cSDimitry Andric   VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
1432ac9a064cSDimitry Andric   // The transform updates all users of inductions to work based on EVL, instead
1433ac9a064cSDimitry Andric   // of the VF directly. At the moment, widened inductions cannot be updated, so
1434ac9a064cSDimitry Andric   // bail out if the plan contains any.
1435ac9a064cSDimitry Andric   bool ContainsWidenInductions = any_of(Header->phis(), [](VPRecipeBase &Phi) {
1436ac9a064cSDimitry Andric     return isa<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>(
1437ac9a064cSDimitry Andric         &Phi);
1438ac9a064cSDimitry Andric   });
1439ac9a064cSDimitry Andric   // FIXME: Remove this once we can transform (select header_mask, true_value,
1440ac9a064cSDimitry Andric   // false_value) into vp.merge.
1441ac9a064cSDimitry Andric   bool ContainsOutloopReductions =
1442ac9a064cSDimitry Andric       any_of(Header->phis(), [&](VPRecipeBase &Phi) {
1443ac9a064cSDimitry Andric         auto *R = dyn_cast<VPReductionPHIRecipe>(&Phi);
1444ac9a064cSDimitry Andric         return R && !R->isInLoop();
1445ac9a064cSDimitry Andric       });
1446ac9a064cSDimitry Andric   if (ContainsWidenInductions || ContainsOutloopReductions)
1447ac9a064cSDimitry Andric     return false;
1448ac9a064cSDimitry Andric 
1449ac9a064cSDimitry Andric   auto *CanonicalIVPHI = Plan.getCanonicalIV();
1450ac9a064cSDimitry Andric   VPValue *StartV = CanonicalIVPHI->getStartValue();
1451ac9a064cSDimitry Andric 
1452ac9a064cSDimitry Andric   // Create the ExplicitVectorLengthPhi recipe in the main loop.
1453ac9a064cSDimitry Andric   auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc());
1454ac9a064cSDimitry Andric   EVLPhi->insertAfter(CanonicalIVPHI);
1455ac9a064cSDimitry Andric   auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength,
1456ac9a064cSDimitry Andric                                   {EVLPhi, Plan.getTripCount()});
1457ac9a064cSDimitry Andric   VPEVL->insertBefore(*Header, Header->getFirstNonPhi());
1458ac9a064cSDimitry Andric 
1459ac9a064cSDimitry Andric   auto *CanonicalIVIncrement =
1460ac9a064cSDimitry Andric       cast<VPInstruction>(CanonicalIVPHI->getBackedgeValue());
1461ac9a064cSDimitry Andric   VPSingleDefRecipe *OpVPEVL = VPEVL;
1462ac9a064cSDimitry Andric   if (unsigned IVSize = CanonicalIVPHI->getScalarType()->getScalarSizeInBits();
1463ac9a064cSDimitry Andric       IVSize != 32) {
1464ac9a064cSDimitry Andric     OpVPEVL = new VPScalarCastRecipe(IVSize < 32 ? Instruction::Trunc
1465ac9a064cSDimitry Andric                                                  : Instruction::ZExt,
1466ac9a064cSDimitry Andric                                      OpVPEVL, CanonicalIVPHI->getScalarType());
1467ac9a064cSDimitry Andric     OpVPEVL->insertBefore(CanonicalIVIncrement);
1468ac9a064cSDimitry Andric   }
1469ac9a064cSDimitry Andric   auto *NextEVLIV =
1470ac9a064cSDimitry Andric       new VPInstruction(Instruction::Add, {OpVPEVL, EVLPhi},
1471ac9a064cSDimitry Andric                         {CanonicalIVIncrement->hasNoUnsignedWrap(),
1472ac9a064cSDimitry Andric                          CanonicalIVIncrement->hasNoSignedWrap()},
1473ac9a064cSDimitry Andric                         CanonicalIVIncrement->getDebugLoc(), "index.evl.next");
1474ac9a064cSDimitry Andric   NextEVLIV->insertBefore(CanonicalIVIncrement);
1475ac9a064cSDimitry Andric   EVLPhi->addOperand(NextEVLIV);
1476ac9a064cSDimitry Andric 
1477ac9a064cSDimitry Andric   for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
1478ac9a064cSDimitry Andric     for (VPUser *U : collectUsersRecursively(HeaderMask)) {
1479ac9a064cSDimitry Andric       VPRecipeBase *NewRecipe = nullptr;
1480ac9a064cSDimitry Andric       auto *CurRecipe = dyn_cast<VPRecipeBase>(U);
1481ac9a064cSDimitry Andric       if (!CurRecipe)
1482b1c73532SDimitry Andric         continue;
1483b1c73532SDimitry Andric 
1484ac9a064cSDimitry Andric       auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
1485ac9a064cSDimitry Andric         assert(OrigMask && "Unmasked recipe when folding tail");
1486ac9a064cSDimitry Andric         return HeaderMask == OrigMask ? nullptr : OrigMask;
1487ac9a064cSDimitry Andric       };
1488ac9a064cSDimitry Andric       if (auto *MemR = dyn_cast<VPWidenMemoryRecipe>(CurRecipe)) {
1489ac9a064cSDimitry Andric         VPValue *NewMask = GetNewMask(MemR->getMask());
1490ac9a064cSDimitry Andric         if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR))
1491ac9a064cSDimitry Andric           NewRecipe = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
1492ac9a064cSDimitry Andric         else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR))
1493ac9a064cSDimitry Andric           NewRecipe = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
1494ac9a064cSDimitry Andric         else
1495ac9a064cSDimitry Andric           llvm_unreachable("unsupported recipe");
1496ac9a064cSDimitry Andric       } else if (auto *RedR = dyn_cast<VPReductionRecipe>(CurRecipe)) {
1497ac9a064cSDimitry Andric         NewRecipe = new VPReductionEVLRecipe(RedR, VPEVL,
1498ac9a064cSDimitry Andric                                              GetNewMask(RedR->getCondOp()));
1499ac9a064cSDimitry Andric       }
1500ac9a064cSDimitry Andric 
1501ac9a064cSDimitry Andric       if (NewRecipe) {
1502ac9a064cSDimitry Andric         [[maybe_unused]] unsigned NumDefVal = NewRecipe->getNumDefinedValues();
1503ac9a064cSDimitry Andric         assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
1504ac9a064cSDimitry Andric                "New recipe must define the same number of values as the "
1505ac9a064cSDimitry Andric                "original.");
1506ac9a064cSDimitry Andric         assert(
1507ac9a064cSDimitry Andric             NumDefVal <= 1 &&
1508ac9a064cSDimitry Andric             "Only supports recipes with a single definition or without users.");
1509ac9a064cSDimitry Andric         NewRecipe->insertBefore(CurRecipe);
1510ac9a064cSDimitry Andric         if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(NewRecipe)) {
1511ac9a064cSDimitry Andric           VPValue *CurVPV = CurRecipe->getVPSingleValue();
1512ac9a064cSDimitry Andric           CurVPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
1513ac9a064cSDimitry Andric         }
1514ac9a064cSDimitry Andric         CurRecipe->eraseFromParent();
1515ac9a064cSDimitry Andric       }
1516ac9a064cSDimitry Andric     }
1517ac9a064cSDimitry Andric     recursivelyDeleteDeadRecipes(HeaderMask);
1518ac9a064cSDimitry Andric   }
1519ac9a064cSDimitry Andric   // Replace all uses of VPCanonicalIVPHIRecipe by
1520ac9a064cSDimitry Andric   // VPEVLBasedIVPHIRecipe except for the canonical IV increment.
1521ac9a064cSDimitry Andric   CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
1522ac9a064cSDimitry Andric   CanonicalIVIncrement->setOperand(0, CanonicalIVPHI);
1523ac9a064cSDimitry Andric   // TODO: support unroll factor > 1.
1524ac9a064cSDimitry Andric   Plan.setUF(1);
1525ac9a064cSDimitry Andric   return true;
1526ac9a064cSDimitry Andric }
1527ac9a064cSDimitry Andric 
dropPoisonGeneratingRecipes(VPlan & Plan,function_ref<bool (BasicBlock *)> BlockNeedsPredication)1528ac9a064cSDimitry Andric void VPlanTransforms::dropPoisonGeneratingRecipes(
1529ac9a064cSDimitry Andric     VPlan &Plan, function_ref<bool(BasicBlock *)> BlockNeedsPredication) {
1530ac9a064cSDimitry Andric   // Collect recipes in the backward slice of `Root` that may generate a poison
1531ac9a064cSDimitry Andric   // value that is used after vectorization.
1532ac9a064cSDimitry Andric   SmallPtrSet<VPRecipeBase *, 16> Visited;
1533ac9a064cSDimitry Andric   auto collectPoisonGeneratingInstrsInBackwardSlice([&](VPRecipeBase *Root) {
1534ac9a064cSDimitry Andric     SmallVector<VPRecipeBase *, 16> Worklist;
1535ac9a064cSDimitry Andric     Worklist.push_back(Root);
1536ac9a064cSDimitry Andric 
1537ac9a064cSDimitry Andric     // Traverse the backward slice of Root through its use-def chain.
1538ac9a064cSDimitry Andric     while (!Worklist.empty()) {
1539ac9a064cSDimitry Andric       VPRecipeBase *CurRec = Worklist.back();
1540ac9a064cSDimitry Andric       Worklist.pop_back();
1541ac9a064cSDimitry Andric 
1542ac9a064cSDimitry Andric       if (!Visited.insert(CurRec).second)
1543ac9a064cSDimitry Andric         continue;
1544ac9a064cSDimitry Andric 
1545ac9a064cSDimitry Andric       // Prune search if we find another recipe generating a widen memory
1546ac9a064cSDimitry Andric       // instruction. Widen memory instructions involved in address computation
1547ac9a064cSDimitry Andric       // will lead to gather/scatter instructions, which don't need to be
1548ac9a064cSDimitry Andric       // handled.
1549ac9a064cSDimitry Andric       if (isa<VPWidenMemoryRecipe>(CurRec) || isa<VPInterleaveRecipe>(CurRec) ||
1550ac9a064cSDimitry Andric           isa<VPScalarIVStepsRecipe>(CurRec) || isa<VPHeaderPHIRecipe>(CurRec))
1551ac9a064cSDimitry Andric         continue;
1552ac9a064cSDimitry Andric 
1553ac9a064cSDimitry Andric       // This recipe contributes to the address computation of a widen
1554ac9a064cSDimitry Andric       // load/store. If the underlying instruction has poison-generating flags,
1555ac9a064cSDimitry Andric       // drop them directly.
1556ac9a064cSDimitry Andric       if (auto *RecWithFlags = dyn_cast<VPRecipeWithIRFlags>(CurRec)) {
1557ac9a064cSDimitry Andric         VPValue *A, *B;
1558ac9a064cSDimitry Andric         using namespace llvm::VPlanPatternMatch;
1559ac9a064cSDimitry Andric         // Dropping disjoint from an OR may yield incorrect results, as some
1560ac9a064cSDimitry Andric         // analysis may have converted it to an Add implicitly (e.g. SCEV used
1561ac9a064cSDimitry Andric         // for dependence analysis). Instead, replace it with an equivalent Add.
1562ac9a064cSDimitry Andric         // This is possible as all users of the disjoint OR only access lanes
1563ac9a064cSDimitry Andric         // where the operands are disjoint or poison otherwise.
1564ac9a064cSDimitry Andric         if (match(RecWithFlags, m_BinaryOr(m_VPValue(A), m_VPValue(B))) &&
1565ac9a064cSDimitry Andric             RecWithFlags->isDisjoint()) {
1566ac9a064cSDimitry Andric           VPBuilder Builder(RecWithFlags);
1567ac9a064cSDimitry Andric           VPInstruction *New = Builder.createOverflowingOp(
1568ac9a064cSDimitry Andric               Instruction::Add, {A, B}, {false, false},
1569ac9a064cSDimitry Andric               RecWithFlags->getDebugLoc());
1570ac9a064cSDimitry Andric           New->setUnderlyingValue(RecWithFlags->getUnderlyingValue());
1571ac9a064cSDimitry Andric           RecWithFlags->replaceAllUsesWith(New);
1572ac9a064cSDimitry Andric           RecWithFlags->eraseFromParent();
1573ac9a064cSDimitry Andric           CurRec = New;
1574ac9a064cSDimitry Andric         } else
1575ac9a064cSDimitry Andric           RecWithFlags->dropPoisonGeneratingFlags();
1576ac9a064cSDimitry Andric       } else {
1577ac9a064cSDimitry Andric         Instruction *Instr = dyn_cast_or_null<Instruction>(
1578ac9a064cSDimitry Andric             CurRec->getVPSingleValue()->getUnderlyingValue());
1579ac9a064cSDimitry Andric         (void)Instr;
1580ac9a064cSDimitry Andric         assert((!Instr || !Instr->hasPoisonGeneratingFlags()) &&
1581ac9a064cSDimitry Andric                "found instruction with poison generating flags not covered by "
1582ac9a064cSDimitry Andric                "VPRecipeWithIRFlags");
1583ac9a064cSDimitry Andric       }
1584ac9a064cSDimitry Andric 
1585ac9a064cSDimitry Andric       // Add new definitions to the worklist.
1586ac9a064cSDimitry Andric       for (VPValue *operand : CurRec->operands())
1587ac9a064cSDimitry Andric         if (VPRecipeBase *OpDef = operand->getDefiningRecipe())
1588ac9a064cSDimitry Andric           Worklist.push_back(OpDef);
1589ac9a064cSDimitry Andric     }
1590ac9a064cSDimitry Andric   });
1591ac9a064cSDimitry Andric 
1592ac9a064cSDimitry Andric   // Traverse all the recipes in the VPlan and collect the poison-generating
1593ac9a064cSDimitry Andric   // recipes in the backward slice starting at the address of a VPWidenRecipe or
1594ac9a064cSDimitry Andric   // VPInterleaveRecipe.
1595ac9a064cSDimitry Andric   auto Iter = vp_depth_first_deep(Plan.getEntry());
1596ac9a064cSDimitry Andric   for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
1597ac9a064cSDimitry Andric     for (VPRecipeBase &Recipe : *VPBB) {
1598ac9a064cSDimitry Andric       if (auto *WidenRec = dyn_cast<VPWidenMemoryRecipe>(&Recipe)) {
1599ac9a064cSDimitry Andric         Instruction &UnderlyingInstr = WidenRec->getIngredient();
1600ac9a064cSDimitry Andric         VPRecipeBase *AddrDef = WidenRec->getAddr()->getDefiningRecipe();
1601ac9a064cSDimitry Andric         if (AddrDef && WidenRec->isConsecutive() &&
1602ac9a064cSDimitry Andric             BlockNeedsPredication(UnderlyingInstr.getParent()))
1603ac9a064cSDimitry Andric           collectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
1604ac9a064cSDimitry Andric       } else if (auto *InterleaveRec = dyn_cast<VPInterleaveRecipe>(&Recipe)) {
1605ac9a064cSDimitry Andric         VPRecipeBase *AddrDef = InterleaveRec->getAddr()->getDefiningRecipe();
1606ac9a064cSDimitry Andric         if (AddrDef) {
1607ac9a064cSDimitry Andric           // Check if any member of the interleave group needs predication.
1608ac9a064cSDimitry Andric           const InterleaveGroup<Instruction> *InterGroup =
1609ac9a064cSDimitry Andric               InterleaveRec->getInterleaveGroup();
1610ac9a064cSDimitry Andric           bool NeedPredication = false;
1611ac9a064cSDimitry Andric           for (int I = 0, NumMembers = InterGroup->getNumMembers();
1612ac9a064cSDimitry Andric                I < NumMembers; ++I) {
1613ac9a064cSDimitry Andric             Instruction *Member = InterGroup->getMember(I);
1614ac9a064cSDimitry Andric             if (Member)
1615ac9a064cSDimitry Andric               NeedPredication |= BlockNeedsPredication(Member->getParent());
1616ac9a064cSDimitry Andric           }
1617ac9a064cSDimitry Andric 
1618ac9a064cSDimitry Andric           if (NeedPredication)
1619ac9a064cSDimitry Andric             collectPoisonGeneratingInstrsInBackwardSlice(AddrDef);
1620ac9a064cSDimitry Andric         }
1621ac9a064cSDimitry Andric       }
1622ac9a064cSDimitry Andric     }
1623b1c73532SDimitry Andric   }
1624b1c73532SDimitry Andric }
1625