xref: /src/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1eb11fae6SDimitry Andric //===- LoopVectorizationLegality.cpp --------------------------------------===//
2eb11fae6SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6eb11fae6SDimitry Andric //
7eb11fae6SDimitry Andric //===----------------------------------------------------------------------===//
8eb11fae6SDimitry Andric //
9eb11fae6SDimitry Andric // This file provides loop vectorization legality analysis. Original code
10eb11fae6SDimitry Andric // resided in LoopVectorize.cpp for a long time.
11eb11fae6SDimitry Andric //
12eb11fae6SDimitry Andric // At this point, it is implemented as a utility class, not as an analysis
13eb11fae6SDimitry Andric // pass. It should be easy to create an analysis pass around it if there
14eb11fae6SDimitry Andric // is a need (but D45420 needs to happen first).
15eb11fae6SDimitry Andric //
16b60736ecSDimitry Andric 
17eb11fae6SDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
181d5ae102SDimitry Andric #include "llvm/Analysis/Loads.h"
19cfca06d7SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
20145449b1SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
21b60736ecSDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
22145449b1SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
231d5ae102SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
24eb11fae6SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
25eb11fae6SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
26cfca06d7SDimitry Andric #include "llvm/IR/PatternMatch.h"
27b60736ecSDimitry Andric #include "llvm/Transforms/Utils/SizeOpts.h"
28cfca06d7SDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorize.h"
29eb11fae6SDimitry Andric 
30eb11fae6SDimitry Andric using namespace llvm;
31cfca06d7SDimitry Andric using namespace PatternMatch;
32eb11fae6SDimitry Andric 
33eb11fae6SDimitry Andric #define LV_NAME "loop-vectorize"
34eb11fae6SDimitry Andric #define DEBUG_TYPE LV_NAME
35eb11fae6SDimitry Andric 
36eb11fae6SDimitry Andric static cl::opt<bool>
37eb11fae6SDimitry Andric     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
38eb11fae6SDimitry Andric                        cl::desc("Enable if-conversion during vectorization."));
39eb11fae6SDimitry Andric 
407fa27ce4SDimitry Andric static cl::opt<bool>
417fa27ce4SDimitry Andric AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
427fa27ce4SDimitry Andric                        cl::desc("Enable recognition of non-constant strided "
437fa27ce4SDimitry Andric                                 "pointer induction variables."));
447fa27ce4SDimitry Andric 
45344a3780SDimitry Andric namespace llvm {
46344a3780SDimitry Andric cl::opt<bool>
47344a3780SDimitry Andric     HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
48344a3780SDimitry Andric                          cl::desc("Allow enabling loop hints to reorder "
49344a3780SDimitry Andric                                   "FP operations during vectorization."));
50344a3780SDimitry Andric }
51eb11fae6SDimitry Andric 
52344a3780SDimitry Andric // TODO: Move size-based thresholds out of legality checking, make cost based
53344a3780SDimitry Andric // decisions instead of hard thresholds.
54eb11fae6SDimitry Andric static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
55eb11fae6SDimitry Andric     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
56eb11fae6SDimitry Andric     cl::desc("The maximum number of SCEV checks allowed."));
57eb11fae6SDimitry Andric 
58eb11fae6SDimitry Andric static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
59eb11fae6SDimitry Andric     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
60eb11fae6SDimitry Andric     cl::desc("The maximum number of SCEV checks allowed with a "
61eb11fae6SDimitry Andric              "vectorize(enable) pragma"));
62eb11fae6SDimitry Andric 
6377fc4c14SDimitry Andric static cl::opt<LoopVectorizeHints::ScalableForceKind>
6477fc4c14SDimitry Andric     ForceScalableVectorization(
6577fc4c14SDimitry Andric         "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
66344a3780SDimitry Andric         cl::Hidden,
67344a3780SDimitry Andric         cl::desc("Control whether the compiler can use scalable vectors to "
68344a3780SDimitry Andric                  "vectorize a loop"),
69344a3780SDimitry Andric         cl::values(
70344a3780SDimitry Andric             clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
71344a3780SDimitry Andric                        "Scalable vectorization is disabled."),
7277fc4c14SDimitry Andric             clEnumValN(
7377fc4c14SDimitry Andric                 LoopVectorizeHints::SK_PreferScalable, "preferred",
7477fc4c14SDimitry Andric                 "Scalable vectorization is available and favored when the "
7577fc4c14SDimitry Andric                 "cost is inconclusive."),
7677fc4c14SDimitry Andric             clEnumValN(
7777fc4c14SDimitry Andric                 LoopVectorizeHints::SK_PreferScalable, "on",
78344a3780SDimitry Andric                 "Scalable vectorization is available and favored when the "
79344a3780SDimitry Andric                 "cost is inconclusive.")));
80344a3780SDimitry Andric 
81eb11fae6SDimitry Andric /// Maximum vectorization interleave count.
82eb11fae6SDimitry Andric static const unsigned MaxInterleaveFactor = 16;
83eb11fae6SDimitry Andric 
84eb11fae6SDimitry Andric namespace llvm {
85eb11fae6SDimitry Andric 
validate(unsigned Val)86eb11fae6SDimitry Andric bool LoopVectorizeHints::Hint::validate(unsigned Val) {
87eb11fae6SDimitry Andric   switch (Kind) {
88eb11fae6SDimitry Andric   case HK_WIDTH:
89eb11fae6SDimitry Andric     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
90344a3780SDimitry Andric   case HK_INTERLEAVE:
91eb11fae6SDimitry Andric     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
92eb11fae6SDimitry Andric   case HK_FORCE:
93eb11fae6SDimitry Andric     return (Val <= 1);
94eb11fae6SDimitry Andric   case HK_ISVECTORIZED:
951d5ae102SDimitry Andric   case HK_PREDICATE:
96b60736ecSDimitry Andric   case HK_SCALABLE:
97eb11fae6SDimitry Andric     return (Val == 0 || Val == 1);
98eb11fae6SDimitry Andric   }
99eb11fae6SDimitry Andric   return false;
100eb11fae6SDimitry Andric }
101eb11fae6SDimitry Andric 
LoopVectorizeHints(const Loop * L,bool InterleaveOnlyWhenForced,OptimizationRemarkEmitter & ORE,const TargetTransformInfo * TTI)102d8e91e46SDimitry Andric LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
103d8e91e46SDimitry Andric                                        bool InterleaveOnlyWhenForced,
10477fc4c14SDimitry Andric                                        OptimizationRemarkEmitter &ORE,
10577fc4c14SDimitry Andric                                        const TargetTransformInfo *TTI)
106eb11fae6SDimitry Andric     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
107344a3780SDimitry Andric       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
108eb11fae6SDimitry Andric       Force("vectorize.enable", FK_Undefined, HK_FORCE),
1091d5ae102SDimitry Andric       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
110b60736ecSDimitry Andric       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
111344a3780SDimitry Andric       Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
112344a3780SDimitry Andric       TheLoop(L), ORE(ORE) {
113eb11fae6SDimitry Andric   // Populate values with existing loop metadata.
114eb11fae6SDimitry Andric   getHintsFromMetadata();
115eb11fae6SDimitry Andric 
116eb11fae6SDimitry Andric   // force-vector-interleave overrides DisableInterleaving.
117eb11fae6SDimitry Andric   if (VectorizerParams::isInterleaveForced())
118eb11fae6SDimitry Andric     Interleave.Value = VectorizerParams::VectorizationInterleave;
119eb11fae6SDimitry Andric 
12077fc4c14SDimitry Andric   // If the metadata doesn't explicitly specify whether to enable scalable
12177fc4c14SDimitry Andric   // vectorization, then decide based on the following criteria (increasing
12277fc4c14SDimitry Andric   // level of priority):
12377fc4c14SDimitry Andric   //  - Target default
12477fc4c14SDimitry Andric   //  - Metadata width
12577fc4c14SDimitry Andric   //  - Force option (always overrides)
12677fc4c14SDimitry Andric   if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) {
12777fc4c14SDimitry Andric     if (TTI)
12877fc4c14SDimitry Andric       Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
12977fc4c14SDimitry Andric                                                           : SK_FixedWidthOnly;
13077fc4c14SDimitry Andric 
13177fc4c14SDimitry Andric     if (Width.Value)
132344a3780SDimitry Andric       // If the width is set, but the metadata says nothing about the scalable
133344a3780SDimitry Andric       // property, then assume it concerns only a fixed-width UserVF.
134344a3780SDimitry Andric       // If width is not set, the flag takes precedence.
13577fc4c14SDimitry Andric       Scalable.Value = SK_FixedWidthOnly;
13677fc4c14SDimitry Andric   }
13777fc4c14SDimitry Andric 
13877fc4c14SDimitry Andric   // If the flag is set to force any use of scalable vectors, override the loop
13977fc4c14SDimitry Andric   // hints.
14077fc4c14SDimitry Andric   if (ForceScalableVectorization.getValue() !=
14177fc4c14SDimitry Andric       LoopVectorizeHints::SK_Unspecified)
14277fc4c14SDimitry Andric     Scalable.Value = ForceScalableVectorization.getValue();
14377fc4c14SDimitry Andric 
14477fc4c14SDimitry Andric   // Scalable vectorization is disabled if no preference is specified.
14577fc4c14SDimitry Andric   if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
146344a3780SDimitry Andric     Scalable.Value = SK_FixedWidthOnly;
147344a3780SDimitry Andric 
148eb11fae6SDimitry Andric   if (IsVectorized.Value != 1)
149eb11fae6SDimitry Andric     // If the vectorization width and interleaving count are both 1 then
150eb11fae6SDimitry Andric     // consider the loop to have been already vectorized because there's
151eb11fae6SDimitry Andric     // nothing more that we can do.
152b60736ecSDimitry Andric     IsVectorized.Value =
153344a3780SDimitry Andric         getWidth() == ElementCount::getFixed(1) && getInterleave() == 1;
154344a3780SDimitry Andric   LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
155eb11fae6SDimitry Andric              << "LV: Interleaving disabled by the pass manager\n");
156eb11fae6SDimitry Andric }
157eb11fae6SDimitry Andric 
setAlreadyVectorized()158e6d15924SDimitry Andric void LoopVectorizeHints::setAlreadyVectorized() {
159e6d15924SDimitry Andric   LLVMContext &Context = TheLoop->getHeader()->getContext();
160e6d15924SDimitry Andric 
161e6d15924SDimitry Andric   MDNode *IsVectorizedMD = MDNode::get(
162e6d15924SDimitry Andric       Context,
163e6d15924SDimitry Andric       {MDString::get(Context, "llvm.loop.isvectorized"),
164e6d15924SDimitry Andric        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
165e6d15924SDimitry Andric   MDNode *LoopID = TheLoop->getLoopID();
166e6d15924SDimitry Andric   MDNode *NewLoopID =
167e6d15924SDimitry Andric       makePostTransformationMetadata(Context, LoopID,
168e6d15924SDimitry Andric                                      {Twine(Prefix(), "vectorize.").str(),
169e6d15924SDimitry Andric                                       Twine(Prefix(), "interleave.").str()},
170e6d15924SDimitry Andric                                      {IsVectorizedMD});
171e6d15924SDimitry Andric   TheLoop->setLoopID(NewLoopID);
172e6d15924SDimitry Andric 
173e6d15924SDimitry Andric   // Update internal cache.
174e6d15924SDimitry Andric   IsVectorized.Value = 1;
175e6d15924SDimitry Andric }
176e6d15924SDimitry Andric 
allowVectorization(Function * F,Loop * L,bool VectorizeOnlyWhenForced) const177d8e91e46SDimitry Andric bool LoopVectorizeHints::allowVectorization(
178d8e91e46SDimitry Andric     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
179eb11fae6SDimitry Andric   if (getForce() == LoopVectorizeHints::FK_Disabled) {
180eb11fae6SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
181eb11fae6SDimitry Andric     emitRemarkWithHints();
182eb11fae6SDimitry Andric     return false;
183eb11fae6SDimitry Andric   }
184eb11fae6SDimitry Andric 
185d8e91e46SDimitry Andric   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
186eb11fae6SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
187eb11fae6SDimitry Andric     emitRemarkWithHints();
188eb11fae6SDimitry Andric     return false;
189eb11fae6SDimitry Andric   }
190eb11fae6SDimitry Andric 
191eb11fae6SDimitry Andric   if (getIsVectorized() == 1) {
192eb11fae6SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
193eb11fae6SDimitry Andric     // FIXME: Add interleave.disable metadata. This will allow
194eb11fae6SDimitry Andric     // vectorize.disable to be used without disabling the pass and errors
195eb11fae6SDimitry Andric     // to differentiate between disabled vectorization and a width of 1.
196eb11fae6SDimitry Andric     ORE.emit([&]() {
197eb11fae6SDimitry Andric       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
198eb11fae6SDimitry Andric                                         "AllDisabled", L->getStartLoc(),
199eb11fae6SDimitry Andric                                         L->getHeader())
200eb11fae6SDimitry Andric              << "loop not vectorized: vectorization and interleaving are "
201eb11fae6SDimitry Andric                 "explicitly disabled, or the loop has already been "
202eb11fae6SDimitry Andric                 "vectorized";
203eb11fae6SDimitry Andric     });
204eb11fae6SDimitry Andric     return false;
205eb11fae6SDimitry Andric   }
206eb11fae6SDimitry Andric 
207eb11fae6SDimitry Andric   return true;
208eb11fae6SDimitry Andric }
209eb11fae6SDimitry Andric 
emitRemarkWithHints() const210eb11fae6SDimitry Andric void LoopVectorizeHints::emitRemarkWithHints() const {
211eb11fae6SDimitry Andric   using namespace ore;
212eb11fae6SDimitry Andric 
213eb11fae6SDimitry Andric   ORE.emit([&]() {
214eb11fae6SDimitry Andric     if (Force.Value == LoopVectorizeHints::FK_Disabled)
215eb11fae6SDimitry Andric       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
216eb11fae6SDimitry Andric                                       TheLoop->getStartLoc(),
217eb11fae6SDimitry Andric                                       TheLoop->getHeader())
218eb11fae6SDimitry Andric              << "loop not vectorized: vectorization is explicitly disabled";
219eb11fae6SDimitry Andric     else {
220eb11fae6SDimitry Andric       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
221eb11fae6SDimitry Andric                                  TheLoop->getStartLoc(), TheLoop->getHeader());
222eb11fae6SDimitry Andric       R << "loop not vectorized";
223eb11fae6SDimitry Andric       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
224eb11fae6SDimitry Andric         R << " (Force=" << NV("Force", true);
225eb11fae6SDimitry Andric         if (Width.Value != 0)
226b60736ecSDimitry Andric           R << ", Vector Width=" << NV("VectorWidth", getWidth());
227344a3780SDimitry Andric         if (getInterleave() != 0)
228344a3780SDimitry Andric           R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
229eb11fae6SDimitry Andric         R << ")";
230eb11fae6SDimitry Andric       }
231eb11fae6SDimitry Andric       return R;
232eb11fae6SDimitry Andric     }
233eb11fae6SDimitry Andric   });
234eb11fae6SDimitry Andric }
235eb11fae6SDimitry Andric 
vectorizeAnalysisPassName() const236eb11fae6SDimitry Andric const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
237b60736ecSDimitry Andric   if (getWidth() == ElementCount::getFixed(1))
238eb11fae6SDimitry Andric     return LV_NAME;
239eb11fae6SDimitry Andric   if (getForce() == LoopVectorizeHints::FK_Disabled)
240eb11fae6SDimitry Andric     return LV_NAME;
241b60736ecSDimitry Andric   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero())
242eb11fae6SDimitry Andric     return LV_NAME;
243eb11fae6SDimitry Andric   return OptimizationRemarkAnalysis::AlwaysPrint;
244eb11fae6SDimitry Andric }
245eb11fae6SDimitry Andric 
allowReordering() const246344a3780SDimitry Andric bool LoopVectorizeHints::allowReordering() const {
247344a3780SDimitry Andric   // Allow the vectorizer to change the order of operations if enabling
248344a3780SDimitry Andric   // loop hints are provided
249344a3780SDimitry Andric   ElementCount EC = getWidth();
250344a3780SDimitry Andric   return HintsAllowReordering &&
251344a3780SDimitry Andric          (getForce() == LoopVectorizeHints::FK_Enabled ||
252344a3780SDimitry Andric           EC.getKnownMinValue() > 1);
253344a3780SDimitry Andric }
254344a3780SDimitry Andric 
getHintsFromMetadata()255eb11fae6SDimitry Andric void LoopVectorizeHints::getHintsFromMetadata() {
256eb11fae6SDimitry Andric   MDNode *LoopID = TheLoop->getLoopID();
257eb11fae6SDimitry Andric   if (!LoopID)
258eb11fae6SDimitry Andric     return;
259eb11fae6SDimitry Andric 
260eb11fae6SDimitry Andric   // First operand should refer to the loop id itself.
261eb11fae6SDimitry Andric   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
262eb11fae6SDimitry Andric   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
263eb11fae6SDimitry Andric 
264ac9a064cSDimitry Andric   for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
265eb11fae6SDimitry Andric     const MDString *S = nullptr;
266eb11fae6SDimitry Andric     SmallVector<Metadata *, 4> Args;
267eb11fae6SDimitry Andric 
268eb11fae6SDimitry Andric     // The expected hint is either a MDString or a MDNode with the first
269eb11fae6SDimitry Andric     // operand a MDString.
270ac9a064cSDimitry Andric     if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
271eb11fae6SDimitry Andric       if (!MD || MD->getNumOperands() == 0)
272eb11fae6SDimitry Andric         continue;
273eb11fae6SDimitry Andric       S = dyn_cast<MDString>(MD->getOperand(0));
274eb11fae6SDimitry Andric       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
275eb11fae6SDimitry Andric         Args.push_back(MD->getOperand(i));
276eb11fae6SDimitry Andric     } else {
277ac9a064cSDimitry Andric       S = dyn_cast<MDString>(MDO);
278eb11fae6SDimitry Andric       assert(Args.size() == 0 && "too many arguments for MDString");
279eb11fae6SDimitry Andric     }
280eb11fae6SDimitry Andric 
281eb11fae6SDimitry Andric     if (!S)
282eb11fae6SDimitry Andric       continue;
283eb11fae6SDimitry Andric 
284eb11fae6SDimitry Andric     // Check if the hint starts with the loop metadata prefix.
285eb11fae6SDimitry Andric     StringRef Name = S->getString();
286eb11fae6SDimitry Andric     if (Args.size() == 1)
287eb11fae6SDimitry Andric       setHint(Name, Args[0]);
288eb11fae6SDimitry Andric   }
289eb11fae6SDimitry Andric }
290eb11fae6SDimitry Andric 
setHint(StringRef Name,Metadata * Arg)291eb11fae6SDimitry Andric void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
292b1c73532SDimitry Andric   if (!Name.starts_with(Prefix()))
293eb11fae6SDimitry Andric     return;
294eb11fae6SDimitry Andric   Name = Name.substr(Prefix().size(), StringRef::npos);
295eb11fae6SDimitry Andric 
296eb11fae6SDimitry Andric   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
297eb11fae6SDimitry Andric   if (!C)
298eb11fae6SDimitry Andric     return;
299eb11fae6SDimitry Andric   unsigned Val = C->getZExtValue();
300eb11fae6SDimitry Andric 
301b60736ecSDimitry Andric   Hint *Hints[] = {&Width,        &Interleave, &Force,
302b60736ecSDimitry Andric                    &IsVectorized, &Predicate,  &Scalable};
303e3b55780SDimitry Andric   for (auto *H : Hints) {
304eb11fae6SDimitry Andric     if (Name == H->Name) {
305eb11fae6SDimitry Andric       if (H->validate(Val))
306eb11fae6SDimitry Andric         H->Value = Val;
307eb11fae6SDimitry Andric       else
308eb11fae6SDimitry Andric         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
309eb11fae6SDimitry Andric       break;
310eb11fae6SDimitry Andric     }
311eb11fae6SDimitry Andric   }
312eb11fae6SDimitry Andric }
313eb11fae6SDimitry Andric 
314eb11fae6SDimitry Andric // Return true if the inner loop \p Lp is uniform with regard to the outer loop
315eb11fae6SDimitry Andric // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
316eb11fae6SDimitry Andric // executing the inner loop will execute the same iterations). This check is
317eb11fae6SDimitry Andric // very constrained for now but it will be relaxed in the future. \p Lp is
318eb11fae6SDimitry Andric // considered uniform if it meets all the following conditions:
319eb11fae6SDimitry Andric //   1) it has a canonical IV (starting from 0 and with stride 1),
320eb11fae6SDimitry Andric //   2) its latch terminator is a conditional branch and,
321eb11fae6SDimitry Andric //   3) its latch condition is a compare instruction whose operands are the
322eb11fae6SDimitry Andric //      canonical IV and an OuterLp invariant.
323eb11fae6SDimitry Andric // This check doesn't take into account the uniformity of other conditions not
324eb11fae6SDimitry Andric // related to the loop latch because they don't affect the loop uniformity.
325eb11fae6SDimitry Andric //
326eb11fae6SDimitry Andric // NOTE: We decided to keep all these checks and its associated documentation
327eb11fae6SDimitry Andric // together so that we can easily have a picture of the current supported loop
328eb11fae6SDimitry Andric // nests. However, some of the current checks don't depend on \p OuterLp and
329eb11fae6SDimitry Andric // would be redundantly executed for each \p Lp if we invoked this function for
330eb11fae6SDimitry Andric // different candidate outer loops. This is not the case for now because we
331eb11fae6SDimitry Andric // don't currently have the infrastructure to evaluate multiple candidate outer
332eb11fae6SDimitry Andric // loops and \p OuterLp will be a fixed parameter while we only support explicit
333eb11fae6SDimitry Andric // outer loop vectorization. It's also very likely that these checks go away
334eb11fae6SDimitry Andric // before introducing the aforementioned infrastructure. However, if this is not
335eb11fae6SDimitry Andric // the case, we should move the \p OuterLp independent checks to a separate
336eb11fae6SDimitry Andric // function that is only executed once for each \p Lp.
isUniformLoop(Loop * Lp,Loop * OuterLp)337eb11fae6SDimitry Andric static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
338eb11fae6SDimitry Andric   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
339eb11fae6SDimitry Andric 
340eb11fae6SDimitry Andric   // If Lp is the outer loop, it's uniform by definition.
341eb11fae6SDimitry Andric   if (Lp == OuterLp)
342eb11fae6SDimitry Andric     return true;
343eb11fae6SDimitry Andric   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
344eb11fae6SDimitry Andric 
345eb11fae6SDimitry Andric   // 1.
346eb11fae6SDimitry Andric   PHINode *IV = Lp->getCanonicalInductionVariable();
347eb11fae6SDimitry Andric   if (!IV) {
348eb11fae6SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
349eb11fae6SDimitry Andric     return false;
350eb11fae6SDimitry Andric   }
351eb11fae6SDimitry Andric 
352eb11fae6SDimitry Andric   // 2.
353eb11fae6SDimitry Andric   BasicBlock *Latch = Lp->getLoopLatch();
354eb11fae6SDimitry Andric   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
355eb11fae6SDimitry Andric   if (!LatchBr || LatchBr->isUnconditional()) {
356eb11fae6SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
357eb11fae6SDimitry Andric     return false;
358eb11fae6SDimitry Andric   }
359eb11fae6SDimitry Andric 
360eb11fae6SDimitry Andric   // 3.
361eb11fae6SDimitry Andric   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
362eb11fae6SDimitry Andric   if (!LatchCmp) {
363eb11fae6SDimitry Andric     LLVM_DEBUG(
364eb11fae6SDimitry Andric         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
365eb11fae6SDimitry Andric     return false;
366eb11fae6SDimitry Andric   }
367eb11fae6SDimitry Andric 
368eb11fae6SDimitry Andric   Value *CondOp0 = LatchCmp->getOperand(0);
369eb11fae6SDimitry Andric   Value *CondOp1 = LatchCmp->getOperand(1);
370eb11fae6SDimitry Andric   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
371eb11fae6SDimitry Andric   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
372eb11fae6SDimitry Andric       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
373eb11fae6SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
374eb11fae6SDimitry Andric     return false;
375eb11fae6SDimitry Andric   }
376eb11fae6SDimitry Andric 
377eb11fae6SDimitry Andric   return true;
378eb11fae6SDimitry Andric }
379eb11fae6SDimitry Andric 
380eb11fae6SDimitry Andric // Return true if \p Lp and all its nested loops are uniform with regard to \p
381eb11fae6SDimitry Andric // OuterLp.
isUniformLoopNest(Loop * Lp,Loop * OuterLp)382eb11fae6SDimitry Andric static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
383eb11fae6SDimitry Andric   if (!isUniformLoop(Lp, OuterLp))
384eb11fae6SDimitry Andric     return false;
385eb11fae6SDimitry Andric 
386eb11fae6SDimitry Andric   // Check if nested loops are uniform.
387eb11fae6SDimitry Andric   for (Loop *SubLp : *Lp)
388eb11fae6SDimitry Andric     if (!isUniformLoopNest(SubLp, OuterLp))
389eb11fae6SDimitry Andric       return false;
390eb11fae6SDimitry Andric 
391eb11fae6SDimitry Andric   return true;
392eb11fae6SDimitry Andric }
393eb11fae6SDimitry Andric 
convertPointerToIntegerType(const DataLayout & DL,Type * Ty)394eb11fae6SDimitry Andric static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
395eb11fae6SDimitry Andric   if (Ty->isPointerTy())
396eb11fae6SDimitry Andric     return DL.getIntPtrType(Ty);
397eb11fae6SDimitry Andric 
398eb11fae6SDimitry Andric   // It is possible that char's or short's overflow when we ask for the loop's
399eb11fae6SDimitry Andric   // trip count, work around this by changing the type size.
400eb11fae6SDimitry Andric   if (Ty->getScalarSizeInBits() < 32)
401eb11fae6SDimitry Andric     return Type::getInt32Ty(Ty->getContext());
402eb11fae6SDimitry Andric 
403eb11fae6SDimitry Andric   return Ty;
404eb11fae6SDimitry Andric }
405eb11fae6SDimitry Andric 
getWiderType(const DataLayout & DL,Type * Ty0,Type * Ty1)406eb11fae6SDimitry Andric static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
407eb11fae6SDimitry Andric   Ty0 = convertPointerToIntegerType(DL, Ty0);
408eb11fae6SDimitry Andric   Ty1 = convertPointerToIntegerType(DL, Ty1);
409eb11fae6SDimitry Andric   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
410eb11fae6SDimitry Andric     return Ty0;
411eb11fae6SDimitry Andric   return Ty1;
412eb11fae6SDimitry Andric }
413eb11fae6SDimitry Andric 
414eb11fae6SDimitry Andric /// Check that the instruction has outside loop users and is not an
415eb11fae6SDimitry Andric /// identified reduction variable.
hasOutsideLoopUser(const Loop * TheLoop,Instruction * Inst,SmallPtrSetImpl<Value * > & AllowedExit)416eb11fae6SDimitry Andric static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
417eb11fae6SDimitry Andric                                SmallPtrSetImpl<Value *> &AllowedExit) {
418d8e91e46SDimitry Andric   // Reductions, Inductions and non-header phis are allowed to have exit users. All
419eb11fae6SDimitry Andric   // other instructions must not have external users.
420eb11fae6SDimitry Andric   if (!AllowedExit.count(Inst))
421eb11fae6SDimitry Andric     // Check that all of the users of the loop are inside the BB.
422eb11fae6SDimitry Andric     for (User *U : Inst->users()) {
423eb11fae6SDimitry Andric       Instruction *UI = cast<Instruction>(U);
424eb11fae6SDimitry Andric       // This user may be a reduction exit value.
425eb11fae6SDimitry Andric       if (!TheLoop->contains(UI)) {
426eb11fae6SDimitry Andric         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
427eb11fae6SDimitry Andric         return true;
428eb11fae6SDimitry Andric       }
429eb11fae6SDimitry Andric     }
430eb11fae6SDimitry Andric   return false;
431eb11fae6SDimitry Andric }
432eb11fae6SDimitry Andric 
433145449b1SDimitry Andric /// Returns true if A and B have same pointer operands or same SCEVs addresses
storeToSameAddress(ScalarEvolution * SE,StoreInst * A,StoreInst * B)434145449b1SDimitry Andric static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A,
435145449b1SDimitry Andric                                StoreInst *B) {
436145449b1SDimitry Andric   // Compare store
437145449b1SDimitry Andric   if (A == B)
438145449b1SDimitry Andric     return true;
439145449b1SDimitry Andric 
440145449b1SDimitry Andric   // Otherwise Compare pointers
441145449b1SDimitry Andric   Value *APtr = A->getPointerOperand();
442145449b1SDimitry Andric   Value *BPtr = B->getPointerOperand();
443145449b1SDimitry Andric   if (APtr == BPtr)
444145449b1SDimitry Andric     return true;
445145449b1SDimitry Andric 
446145449b1SDimitry Andric   // Otherwise compare address SCEVs
447145449b1SDimitry Andric   if (SE->getSCEV(APtr) == SE->getSCEV(BPtr))
448145449b1SDimitry Andric     return true;
449145449b1SDimitry Andric 
450145449b1SDimitry Andric   return false;
451145449b1SDimitry Andric }
452145449b1SDimitry Andric 
isConsecutivePtr(Type * AccessTy,Value * Ptr) const453c0981da4SDimitry Andric int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
454c0981da4SDimitry Andric                                                 Value *Ptr) const {
4557fa27ce4SDimitry Andric   // FIXME: Currently, the set of symbolic strides is sometimes queried before
4567fa27ce4SDimitry Andric   // it's collected.  This happens from canVectorizeWithIfConvert, when the
4577fa27ce4SDimitry Andric   // pointer is checked to reference consecutive elements suitable for a
4587fa27ce4SDimitry Andric   // masked access.
4597fa27ce4SDimitry Andric   const auto &Strides =
4607fa27ce4SDimitry Andric     LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
461eb11fae6SDimitry Andric 
462b60736ecSDimitry Andric   Function *F = TheLoop->getHeader()->getParent();
463b60736ecSDimitry Andric   bool OptForSize = F->hasOptSize() ||
464b60736ecSDimitry Andric                     llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
465b60736ecSDimitry Andric                                                 PGSOQueryType::IRPass);
466b60736ecSDimitry Andric   bool CanAddPredicate = !OptForSize;
467c0981da4SDimitry Andric   int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
468e3b55780SDimitry Andric                             CanAddPredicate, false).value_or(0);
469eb11fae6SDimitry Andric   if (Stride == 1 || Stride == -1)
470eb11fae6SDimitry Andric     return Stride;
471eb11fae6SDimitry Andric   return 0;
472eb11fae6SDimitry Andric }
473eb11fae6SDimitry Andric 
isInvariant(Value * V) const4747fa27ce4SDimitry Andric bool LoopVectorizationLegality::isInvariant(Value *V) const {
4757fa27ce4SDimitry Andric   return LAI->isInvariant(V);
476eb11fae6SDimitry Andric }
477eb11fae6SDimitry Andric 
4787fa27ce4SDimitry Andric namespace {
4797fa27ce4SDimitry Andric /// A rewriter to build the SCEVs for each of the VF lanes in the expected
4807fa27ce4SDimitry Andric /// vectorized loop, which can then be compared to detect their uniformity. This
4817fa27ce4SDimitry Andric /// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
4827fa27ce4SDimitry Andric /// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
4837fa27ce4SDimitry Andric /// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
4847fa27ce4SDimitry Andric /// uniformity.
4857fa27ce4SDimitry Andric class SCEVAddRecForUniformityRewriter
4867fa27ce4SDimitry Andric     : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
4877fa27ce4SDimitry Andric   /// Multiplier to be applied to the step of AddRecs in TheLoop.
4887fa27ce4SDimitry Andric   unsigned StepMultiplier;
4897fa27ce4SDimitry Andric 
4907fa27ce4SDimitry Andric   /// Offset to be added to the AddRecs in TheLoop.
4917fa27ce4SDimitry Andric   unsigned Offset;
4927fa27ce4SDimitry Andric 
4937fa27ce4SDimitry Andric   /// Loop for which to rewrite AddRecsFor.
4947fa27ce4SDimitry Andric   Loop *TheLoop;
4957fa27ce4SDimitry Andric 
4967fa27ce4SDimitry Andric   /// Is any sub-expressions not analyzable w.r.t. uniformity?
4977fa27ce4SDimitry Andric   bool CannotAnalyze = false;
4987fa27ce4SDimitry Andric 
canAnalyze() const4997fa27ce4SDimitry Andric   bool canAnalyze() const { return !CannotAnalyze; }
5007fa27ce4SDimitry Andric 
5017fa27ce4SDimitry Andric public:
SCEVAddRecForUniformityRewriter(ScalarEvolution & SE,unsigned StepMultiplier,unsigned Offset,Loop * TheLoop)5027fa27ce4SDimitry Andric   SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
5037fa27ce4SDimitry Andric                                   unsigned Offset, Loop *TheLoop)
5047fa27ce4SDimitry Andric       : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
5057fa27ce4SDimitry Andric         TheLoop(TheLoop) {}
5067fa27ce4SDimitry Andric 
visitAddRecExpr(const SCEVAddRecExpr * Expr)5077fa27ce4SDimitry Andric   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
5087fa27ce4SDimitry Andric     assert(Expr->getLoop() == TheLoop &&
5097fa27ce4SDimitry Andric            "addrec outside of TheLoop must be invariant and should have been "
5107fa27ce4SDimitry Andric            "handled earlier");
5117fa27ce4SDimitry Andric     // Build a new AddRec by multiplying the step by StepMultiplier and
5127fa27ce4SDimitry Andric     // incrementing the start by Offset * step.
5137fa27ce4SDimitry Andric     Type *Ty = Expr->getType();
5147fa27ce4SDimitry Andric     auto *Step = Expr->getStepRecurrence(SE);
5157fa27ce4SDimitry Andric     if (!SE.isLoopInvariant(Step, TheLoop)) {
5167fa27ce4SDimitry Andric       CannotAnalyze = true;
5177fa27ce4SDimitry Andric       return Expr;
5187fa27ce4SDimitry Andric     }
5197fa27ce4SDimitry Andric     auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
5207fa27ce4SDimitry Andric     auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
5217fa27ce4SDimitry Andric     auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
5227fa27ce4SDimitry Andric     return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
5237fa27ce4SDimitry Andric   }
5247fa27ce4SDimitry Andric 
visit(const SCEV * S)5257fa27ce4SDimitry Andric   const SCEV *visit(const SCEV *S) {
5267fa27ce4SDimitry Andric     if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
5277fa27ce4SDimitry Andric       return S;
5287fa27ce4SDimitry Andric     return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
5297fa27ce4SDimitry Andric   }
5307fa27ce4SDimitry Andric 
visitUnknown(const SCEVUnknown * S)5317fa27ce4SDimitry Andric   const SCEV *visitUnknown(const SCEVUnknown *S) {
5327fa27ce4SDimitry Andric     if (SE.isLoopInvariant(S, TheLoop))
5337fa27ce4SDimitry Andric       return S;
5347fa27ce4SDimitry Andric     // The value could vary across iterations.
5357fa27ce4SDimitry Andric     CannotAnalyze = true;
5367fa27ce4SDimitry Andric     return S;
5377fa27ce4SDimitry Andric   }
5387fa27ce4SDimitry Andric 
visitCouldNotCompute(const SCEVCouldNotCompute * S)5397fa27ce4SDimitry Andric   const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
5407fa27ce4SDimitry Andric     // Could not analyze the expression.
5417fa27ce4SDimitry Andric     CannotAnalyze = true;
5427fa27ce4SDimitry Andric     return S;
5437fa27ce4SDimitry Andric   }
5447fa27ce4SDimitry Andric 
rewrite(const SCEV * S,ScalarEvolution & SE,unsigned StepMultiplier,unsigned Offset,Loop * TheLoop)5457fa27ce4SDimitry Andric   static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
5467fa27ce4SDimitry Andric                              unsigned StepMultiplier, unsigned Offset,
5477fa27ce4SDimitry Andric                              Loop *TheLoop) {
5487fa27ce4SDimitry Andric     /// Bail out if the expression does not contain an UDiv expression.
5497fa27ce4SDimitry Andric     /// Uniform values which are not loop invariant require operations to strip
5507fa27ce4SDimitry Andric     /// out the lowest bits. For now just look for UDivs and use it to avoid
5517fa27ce4SDimitry Andric     /// re-writing UDIV-free expressions for other lanes to limit compile time.
5527fa27ce4SDimitry Andric     if (!SCEVExprContains(S,
5537fa27ce4SDimitry Andric                           [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
5547fa27ce4SDimitry Andric       return SE.getCouldNotCompute();
5557fa27ce4SDimitry Andric 
5567fa27ce4SDimitry Andric     SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
5577fa27ce4SDimitry Andric                                              TheLoop);
5587fa27ce4SDimitry Andric     const SCEV *Result = Rewriter.visit(S);
5597fa27ce4SDimitry Andric 
5607fa27ce4SDimitry Andric     if (Rewriter.canAnalyze())
5617fa27ce4SDimitry Andric       return Result;
5627fa27ce4SDimitry Andric     return SE.getCouldNotCompute();
5637fa27ce4SDimitry Andric   }
5647fa27ce4SDimitry Andric };
5657fa27ce4SDimitry Andric 
5667fa27ce4SDimitry Andric } // namespace
5677fa27ce4SDimitry Andric 
isUniform(Value * V,ElementCount VF) const5687fa27ce4SDimitry Andric bool LoopVectorizationLegality::isUniform(Value *V, ElementCount VF) const {
5697fa27ce4SDimitry Andric   if (isInvariant(V))
5707fa27ce4SDimitry Andric     return true;
5717fa27ce4SDimitry Andric   if (VF.isScalable())
5727fa27ce4SDimitry Andric     return false;
5737fa27ce4SDimitry Andric   if (VF.isScalar())
5747fa27ce4SDimitry Andric     return true;
5757fa27ce4SDimitry Andric 
5767fa27ce4SDimitry Andric   // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
5777fa27ce4SDimitry Andric   // never considered uniform.
5787fa27ce4SDimitry Andric   auto *SE = PSE.getSE();
5797fa27ce4SDimitry Andric   if (!SE->isSCEVable(V->getType()))
5807fa27ce4SDimitry Andric     return false;
5817fa27ce4SDimitry Andric   const SCEV *S = SE->getSCEV(V);
5827fa27ce4SDimitry Andric 
5837fa27ce4SDimitry Andric   // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
5847fa27ce4SDimitry Andric   // lane 0 matches the expressions for all other lanes.
5857fa27ce4SDimitry Andric   unsigned FixedVF = VF.getKnownMinValue();
5867fa27ce4SDimitry Andric   const SCEV *FirstLaneExpr =
5877fa27ce4SDimitry Andric       SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
5887fa27ce4SDimitry Andric   if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
5897fa27ce4SDimitry Andric     return false;
5907fa27ce4SDimitry Andric 
5917fa27ce4SDimitry Andric   // Make sure the expressions for lanes FixedVF-1..1 match the expression for
5927fa27ce4SDimitry Andric   // lane 0. We check lanes in reverse order for compile-time, as frequently
5937fa27ce4SDimitry Andric   // checking the last lane is sufficient to rule out uniformity.
5947fa27ce4SDimitry Andric   return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
5957fa27ce4SDimitry Andric     const SCEV *IthLaneExpr =
5967fa27ce4SDimitry Andric         SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
5977fa27ce4SDimitry Andric     return FirstLaneExpr == IthLaneExpr;
5987fa27ce4SDimitry Andric   });
5997fa27ce4SDimitry Andric }
6007fa27ce4SDimitry Andric 
isUniformMemOp(Instruction & I,ElementCount VF) const6017fa27ce4SDimitry Andric bool LoopVectorizationLegality::isUniformMemOp(Instruction &I,
6027fa27ce4SDimitry Andric                                                ElementCount VF) const {
603e3b55780SDimitry Andric   Value *Ptr = getLoadStorePointerOperand(&I);
604e3b55780SDimitry Andric   if (!Ptr)
605e3b55780SDimitry Andric     return false;
606e3b55780SDimitry Andric   // Note: There's nothing inherent which prevents predicated loads and
607e3b55780SDimitry Andric   // stores from being uniform.  The current lowering simply doesn't handle
608e3b55780SDimitry Andric   // it; in particular, the cost model distinguishes scatter/gather from
609e3b55780SDimitry Andric   // scalar w/predication, and we currently rely on the scalar path.
6107fa27ce4SDimitry Andric   return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
611e3b55780SDimitry Andric }
612e3b55780SDimitry Andric 
canVectorizeOuterLoop()613eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorizeOuterLoop() {
614b60736ecSDimitry Andric   assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
615eb11fae6SDimitry Andric   // Store the result and return it at the end instead of exiting early, in case
616eb11fae6SDimitry Andric   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
617eb11fae6SDimitry Andric   bool Result = true;
618eb11fae6SDimitry Andric   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
619eb11fae6SDimitry Andric 
620eb11fae6SDimitry Andric   for (BasicBlock *BB : TheLoop->blocks()) {
621eb11fae6SDimitry Andric     // Check whether the BB terminator is a BranchInst. Any other terminator is
622eb11fae6SDimitry Andric     // not supported yet.
623eb11fae6SDimitry Andric     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
624eb11fae6SDimitry Andric     if (!Br) {
625e6d15924SDimitry Andric       reportVectorizationFailure("Unsupported basic block terminator",
626e6d15924SDimitry Andric           "loop control flow is not understood by vectorizer",
6271d5ae102SDimitry Andric           "CFGNotUnderstood", ORE, TheLoop);
628eb11fae6SDimitry Andric       if (DoExtraAnalysis)
629eb11fae6SDimitry Andric         Result = false;
630eb11fae6SDimitry Andric       else
631eb11fae6SDimitry Andric         return false;
632eb11fae6SDimitry Andric     }
633eb11fae6SDimitry Andric 
634eb11fae6SDimitry Andric     // Check whether the BranchInst is a supported one. Only unconditional
635eb11fae6SDimitry Andric     // branches, conditional branches with an outer loop invariant condition or
636eb11fae6SDimitry Andric     // backedges are supported.
637e6d15924SDimitry Andric     // FIXME: We skip these checks when VPlan predication is enabled as we
638e6d15924SDimitry Andric     // want to allow divergent branches. This whole check will be removed
639e6d15924SDimitry Andric     // once VPlan predication is on by default.
640145449b1SDimitry Andric     if (Br && Br->isConditional() &&
641eb11fae6SDimitry Andric         !TheLoop->isLoopInvariant(Br->getCondition()) &&
642eb11fae6SDimitry Andric         !LI->isLoopHeader(Br->getSuccessor(0)) &&
643eb11fae6SDimitry Andric         !LI->isLoopHeader(Br->getSuccessor(1))) {
644e6d15924SDimitry Andric       reportVectorizationFailure("Unsupported conditional branch",
645e6d15924SDimitry Andric           "loop control flow is not understood by vectorizer",
6461d5ae102SDimitry Andric           "CFGNotUnderstood", ORE, TheLoop);
647eb11fae6SDimitry Andric       if (DoExtraAnalysis)
648eb11fae6SDimitry Andric         Result = false;
649eb11fae6SDimitry Andric       else
650eb11fae6SDimitry Andric         return false;
651eb11fae6SDimitry Andric     }
652eb11fae6SDimitry Andric   }
653eb11fae6SDimitry Andric 
654eb11fae6SDimitry Andric   // Check whether inner loops are uniform. At this point, we only support
655eb11fae6SDimitry Andric   // simple outer loops scenarios with uniform nested loops.
656eb11fae6SDimitry Andric   if (!isUniformLoopNest(TheLoop /*loop nest*/,
657eb11fae6SDimitry Andric                          TheLoop /*context outer loop*/)) {
658e6d15924SDimitry Andric     reportVectorizationFailure("Outer loop contains divergent loops",
659e6d15924SDimitry Andric         "loop control flow is not understood by vectorizer",
6601d5ae102SDimitry Andric         "CFGNotUnderstood", ORE, TheLoop);
661eb11fae6SDimitry Andric     if (DoExtraAnalysis)
662eb11fae6SDimitry Andric       Result = false;
663eb11fae6SDimitry Andric     else
664eb11fae6SDimitry Andric       return false;
665eb11fae6SDimitry Andric   }
666eb11fae6SDimitry Andric 
667d8e91e46SDimitry Andric   // Check whether we are able to set up outer loop induction.
668d8e91e46SDimitry Andric   if (!setupOuterLoopInductions()) {
669e6d15924SDimitry Andric     reportVectorizationFailure("Unsupported outer loop Phi(s)",
670e6d15924SDimitry Andric                                "Unsupported outer loop Phi(s)",
6711d5ae102SDimitry Andric                                "UnsupportedPhi", ORE, TheLoop);
672d8e91e46SDimitry Andric     if (DoExtraAnalysis)
673d8e91e46SDimitry Andric       Result = false;
674d8e91e46SDimitry Andric     else
675d8e91e46SDimitry Andric       return false;
676d8e91e46SDimitry Andric   }
677d8e91e46SDimitry Andric 
678eb11fae6SDimitry Andric   return Result;
679eb11fae6SDimitry Andric }
680eb11fae6SDimitry Andric 
addInductionPhi(PHINode * Phi,const InductionDescriptor & ID,SmallPtrSetImpl<Value * > & AllowedExit)681eb11fae6SDimitry Andric void LoopVectorizationLegality::addInductionPhi(
682eb11fae6SDimitry Andric     PHINode *Phi, const InductionDescriptor &ID,
683eb11fae6SDimitry Andric     SmallPtrSetImpl<Value *> &AllowedExit) {
684eb11fae6SDimitry Andric   Inductions[Phi] = ID;
685eb11fae6SDimitry Andric 
686eb11fae6SDimitry Andric   // In case this induction also comes with casts that we know we can ignore
687eb11fae6SDimitry Andric   // in the vectorized loop body, record them here. All casts could be recorded
688eb11fae6SDimitry Andric   // here for ignoring, but suffices to record only the first (as it is the
689eb11fae6SDimitry Andric   // only one that may bw used outside the cast sequence).
690eb11fae6SDimitry Andric   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
691eb11fae6SDimitry Andric   if (!Casts.empty())
692eb11fae6SDimitry Andric     InductionCastsToIgnore.insert(*Casts.begin());
693eb11fae6SDimitry Andric 
694eb11fae6SDimitry Andric   Type *PhiTy = Phi->getType();
695ac9a064cSDimitry Andric   const DataLayout &DL = Phi->getDataLayout();
696eb11fae6SDimitry Andric 
697eb11fae6SDimitry Andric   // Get the widest type.
698eb11fae6SDimitry Andric   if (!PhiTy->isFloatingPointTy()) {
699eb11fae6SDimitry Andric     if (!WidestIndTy)
700eb11fae6SDimitry Andric       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
701eb11fae6SDimitry Andric     else
702eb11fae6SDimitry Andric       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
703eb11fae6SDimitry Andric   }
704eb11fae6SDimitry Andric 
705eb11fae6SDimitry Andric   // Int inductions are special because we only allow one IV.
706eb11fae6SDimitry Andric   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
707eb11fae6SDimitry Andric       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
708eb11fae6SDimitry Andric       isa<Constant>(ID.getStartValue()) &&
709eb11fae6SDimitry Andric       cast<Constant>(ID.getStartValue())->isNullValue()) {
710eb11fae6SDimitry Andric 
711eb11fae6SDimitry Andric     // Use the phi node with the widest type as induction. Use the last
712eb11fae6SDimitry Andric     // one if there are multiple (no good reason for doing this other
713eb11fae6SDimitry Andric     // than it is expedient). We've checked that it begins at zero and
714eb11fae6SDimitry Andric     // steps by one, so this is a canonical induction variable.
715eb11fae6SDimitry Andric     if (!PrimaryInduction || PhiTy == WidestIndTy)
716eb11fae6SDimitry Andric       PrimaryInduction = Phi;
717eb11fae6SDimitry Andric   }
718eb11fae6SDimitry Andric 
719eb11fae6SDimitry Andric   // Both the PHI node itself, and the "post-increment" value feeding
720eb11fae6SDimitry Andric   // back into the PHI node may have external users.
721eb11fae6SDimitry Andric   // We can allow those uses, except if the SCEVs we have for them rely
722eb11fae6SDimitry Andric   // on predicates that only hold within the loop, since allowing the exit
723d8e91e46SDimitry Andric   // currently means re-using this SCEV outside the loop (see PR33706 for more
724d8e91e46SDimitry Andric   // details).
725145449b1SDimitry Andric   if (PSE.getPredicate().isAlwaysTrue()) {
726eb11fae6SDimitry Andric     AllowedExit.insert(Phi);
727eb11fae6SDimitry Andric     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
728eb11fae6SDimitry Andric   }
729eb11fae6SDimitry Andric 
730eb11fae6SDimitry Andric   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
731eb11fae6SDimitry Andric }
732eb11fae6SDimitry Andric 
setupOuterLoopInductions()733d8e91e46SDimitry Andric bool LoopVectorizationLegality::setupOuterLoopInductions() {
734d8e91e46SDimitry Andric   BasicBlock *Header = TheLoop->getHeader();
735d8e91e46SDimitry Andric 
736d8e91e46SDimitry Andric   // Returns true if a given Phi is a supported induction.
737d8e91e46SDimitry Andric   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
738d8e91e46SDimitry Andric     InductionDescriptor ID;
739d8e91e46SDimitry Andric     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
740d8e91e46SDimitry Andric         ID.getKind() == InductionDescriptor::IK_IntInduction) {
741d8e91e46SDimitry Andric       addInductionPhi(&Phi, ID, AllowedExit);
742d8e91e46SDimitry Andric       return true;
743d8e91e46SDimitry Andric     } else {
744d8e91e46SDimitry Andric       // Bail out for any Phi in the outer loop header that is not a supported
745d8e91e46SDimitry Andric       // induction.
746d8e91e46SDimitry Andric       LLVM_DEBUG(
747d8e91e46SDimitry Andric           dbgs()
748d8e91e46SDimitry Andric           << "LV: Found unsupported PHI for outer loop vectorization.\n");
749d8e91e46SDimitry Andric       return false;
750d8e91e46SDimitry Andric     }
751d8e91e46SDimitry Andric   };
752d8e91e46SDimitry Andric 
753d8e91e46SDimitry Andric   if (llvm::all_of(Header->phis(), isSupportedPhi))
754d8e91e46SDimitry Andric     return true;
755d8e91e46SDimitry Andric   else
756d8e91e46SDimitry Andric     return false;
757d8e91e46SDimitry Andric }
758d8e91e46SDimitry Andric 
759cfca06d7SDimitry Andric /// Checks if a function is scalarizable according to the TLI, in
760cfca06d7SDimitry Andric /// the sense that it should be vectorized and then expanded in
761cfca06d7SDimitry Andric /// multiple scalar calls. This is represented in the
762cfca06d7SDimitry Andric /// TLI via mappings that do not specify a vector name, as in the
763cfca06d7SDimitry Andric /// following example:
764cfca06d7SDimitry Andric ///
765cfca06d7SDimitry Andric ///    const VecDesc VecIntrinsics[] = {
766cfca06d7SDimitry Andric ///      {"llvm.phx.abs.i32", "", 4}
767cfca06d7SDimitry Andric ///    };
isTLIScalarize(const TargetLibraryInfo & TLI,const CallInst & CI)768cfca06d7SDimitry Andric static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
769cfca06d7SDimitry Andric   const StringRef ScalarName = CI.getCalledFunction()->getName();
770cfca06d7SDimitry Andric   bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
771cfca06d7SDimitry Andric   // Check that all known VFs are not associated to a vector
772cfca06d7SDimitry Andric   // function, i.e. the vector name is emty.
773344a3780SDimitry Andric   if (Scalarize) {
774344a3780SDimitry Andric     ElementCount WidestFixedVF, WidestScalableVF;
775344a3780SDimitry Andric     TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
776344a3780SDimitry Andric     for (ElementCount VF = ElementCount::getFixed(2);
777344a3780SDimitry Andric          ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
778cfca06d7SDimitry Andric       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
779344a3780SDimitry Andric     for (ElementCount VF = ElementCount::getScalable(1);
780344a3780SDimitry Andric          ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
781344a3780SDimitry Andric       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
782344a3780SDimitry Andric     assert((WidestScalableVF.isZero() || !Scalarize) &&
783344a3780SDimitry Andric            "Caller may decide to scalarize a variant using a scalable VF");
784cfca06d7SDimitry Andric   }
785cfca06d7SDimitry Andric   return Scalarize;
786cfca06d7SDimitry Andric }
787cfca06d7SDimitry Andric 
canVectorizeInstrs()788eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorizeInstrs() {
789eb11fae6SDimitry Andric   BasicBlock *Header = TheLoop->getHeader();
790eb11fae6SDimitry Andric 
791eb11fae6SDimitry Andric   // For each block in the loop.
792eb11fae6SDimitry Andric   for (BasicBlock *BB : TheLoop->blocks()) {
793eb11fae6SDimitry Andric     // Scan the instructions in the block and look for hazards.
794eb11fae6SDimitry Andric     for (Instruction &I : *BB) {
795eb11fae6SDimitry Andric       if (auto *Phi = dyn_cast<PHINode>(&I)) {
796eb11fae6SDimitry Andric         Type *PhiTy = Phi->getType();
797eb11fae6SDimitry Andric         // Check that this PHI type is allowed.
798eb11fae6SDimitry Andric         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
799eb11fae6SDimitry Andric             !PhiTy->isPointerTy()) {
800e6d15924SDimitry Andric           reportVectorizationFailure("Found a non-int non-pointer PHI",
801e6d15924SDimitry Andric                                      "loop control flow is not understood by vectorizer",
8021d5ae102SDimitry Andric                                      "CFGNotUnderstood", ORE, TheLoop);
803eb11fae6SDimitry Andric           return false;
804eb11fae6SDimitry Andric         }
805eb11fae6SDimitry Andric 
806eb11fae6SDimitry Andric         // If this PHINode is not in the header block, then we know that we
807eb11fae6SDimitry Andric         // can convert it to select during if-conversion. No need to check if
808eb11fae6SDimitry Andric         // the PHIs in this block are induction or reduction variables.
809eb11fae6SDimitry Andric         if (BB != Header) {
810d8e91e46SDimitry Andric           // Non-header phi nodes that have outside uses can be vectorized. Add
811d8e91e46SDimitry Andric           // them to the list of allowed exits.
812d8e91e46SDimitry Andric           // Unsafe cyclic dependencies with header phis are identified during
813e3b55780SDimitry Andric           // legalization for reduction, induction and fixed order
814d8e91e46SDimitry Andric           // recurrences.
8151d5ae102SDimitry Andric           AllowedExit.insert(&I);
816eb11fae6SDimitry Andric           continue;
817eb11fae6SDimitry Andric         }
818eb11fae6SDimitry Andric 
819eb11fae6SDimitry Andric         // We only allow if-converted PHIs with exactly two incoming values.
820eb11fae6SDimitry Andric         if (Phi->getNumIncomingValues() != 2) {
821e6d15924SDimitry Andric           reportVectorizationFailure("Found an invalid PHI",
822e6d15924SDimitry Andric               "loop control flow is not understood by vectorizer",
8231d5ae102SDimitry Andric               "CFGNotUnderstood", ORE, TheLoop, Phi);
824eb11fae6SDimitry Andric           return false;
825eb11fae6SDimitry Andric         }
826eb11fae6SDimitry Andric 
827eb11fae6SDimitry Andric         RecurrenceDescriptor RedDes;
828eb11fae6SDimitry Andric         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
829145449b1SDimitry Andric                                                  DT, PSE.getSE())) {
830344a3780SDimitry Andric           Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
831eb11fae6SDimitry Andric           AllowedExit.insert(RedDes.getLoopExitInstr());
832eb11fae6SDimitry Andric           Reductions[Phi] = RedDes;
833eb11fae6SDimitry Andric           continue;
834eb11fae6SDimitry Andric         }
835eb11fae6SDimitry Andric 
8367fa27ce4SDimitry Andric         // We prevent matching non-constant strided pointer IVS to preserve
8377fa27ce4SDimitry Andric         // historical vectorizer behavior after a generalization of the
8387fa27ce4SDimitry Andric         // IVDescriptor code.  The intent is to remove this check, but we
8397fa27ce4SDimitry Andric         // have to fix issues around code quality for such loops first.
8407fa27ce4SDimitry Andric         auto isDisallowedStridedPointerInduction =
8417fa27ce4SDimitry Andric           [](const InductionDescriptor &ID) {
8427fa27ce4SDimitry Andric           if (AllowStridedPointerIVs)
8437fa27ce4SDimitry Andric             return false;
8447fa27ce4SDimitry Andric           return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
8457fa27ce4SDimitry Andric             ID.getConstIntStepValue() == nullptr;
8467fa27ce4SDimitry Andric         };
8477fa27ce4SDimitry Andric 
848e3b55780SDimitry Andric         // TODO: Instead of recording the AllowedExit, it would be good to
849e3b55780SDimitry Andric         // record the complementary set: NotAllowedExit. These include (but may
850e3b55780SDimitry Andric         // not be limited to):
851d8e91e46SDimitry Andric         // 1. Reduction phis as they represent the one-before-last value, which
852d8e91e46SDimitry Andric         // is not available when vectorized
853d8e91e46SDimitry Andric         // 2. Induction phis and increment when SCEV predicates cannot be used
854d8e91e46SDimitry Andric         // outside the loop - see addInductionPhi
855d8e91e46SDimitry Andric         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
856d8e91e46SDimitry Andric         // outside the loop - see call to hasOutsideLoopUser in the non-phi
857d8e91e46SDimitry Andric         // handling below
858e3b55780SDimitry Andric         // 4. FixedOrderRecurrence phis that can possibly be handled by
859d8e91e46SDimitry Andric         // extraction.
860d8e91e46SDimitry Andric         // By recording these, we can then reason about ways to vectorize each
861d8e91e46SDimitry Andric         // of these NotAllowedExit.
862eb11fae6SDimitry Andric         InductionDescriptor ID;
8637fa27ce4SDimitry Andric         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
8647fa27ce4SDimitry Andric             !isDisallowedStridedPointerInduction(ID)) {
865eb11fae6SDimitry Andric           addInductionPhi(Phi, ID, AllowedExit);
866344a3780SDimitry Andric           Requirements->addExactFPMathInst(ID.getExactFPMathInst());
867eb11fae6SDimitry Andric           continue;
868eb11fae6SDimitry Andric         }
869eb11fae6SDimitry Andric 
8707fa27ce4SDimitry Andric         if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
871cfca06d7SDimitry Andric           AllowedExit.insert(Phi);
872e3b55780SDimitry Andric           FixedOrderRecurrences.insert(Phi);
873eb11fae6SDimitry Andric           continue;
874eb11fae6SDimitry Andric         }
875eb11fae6SDimitry Andric 
876eb11fae6SDimitry Andric         // As a last resort, coerce the PHI to a AddRec expression
877eb11fae6SDimitry Andric         // and re-try classifying it a an induction PHI.
8787fa27ce4SDimitry Andric         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
8797fa27ce4SDimitry Andric             !isDisallowedStridedPointerInduction(ID)) {
880eb11fae6SDimitry Andric           addInductionPhi(Phi, ID, AllowedExit);
881eb11fae6SDimitry Andric           continue;
882eb11fae6SDimitry Andric         }
883eb11fae6SDimitry Andric 
884e6d15924SDimitry Andric         reportVectorizationFailure("Found an unidentified PHI",
885e6d15924SDimitry Andric             "value that could not be identified as "
886e6d15924SDimitry Andric             "reduction is used outside the loop",
8871d5ae102SDimitry Andric             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
888eb11fae6SDimitry Andric         return false;
889eb11fae6SDimitry Andric       } // end of PHI handling
890eb11fae6SDimitry Andric 
891eb11fae6SDimitry Andric       // We handle calls that:
892eb11fae6SDimitry Andric       //   * Are debug info intrinsics.
893eb11fae6SDimitry Andric       //   * Have a mapping to an IR intrinsic.
894eb11fae6SDimitry Andric       //   * Have a vector version available.
895eb11fae6SDimitry Andric       auto *CI = dyn_cast<CallInst>(&I);
896cfca06d7SDimitry Andric 
897eb11fae6SDimitry Andric       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
898eb11fae6SDimitry Andric           !isa<DbgInfoIntrinsic>(CI) &&
899eb11fae6SDimitry Andric           !(CI->getCalledFunction() && TLI &&
900cfca06d7SDimitry Andric             (!VFDatabase::getMappings(*CI).empty() ||
901cfca06d7SDimitry Andric              isTLIScalarize(*TLI, *CI)))) {
902d8e91e46SDimitry Andric         // If the call is a recognized math libary call, it is likely that
903d8e91e46SDimitry Andric         // we can vectorize it given loosened floating-point constraints.
904d8e91e46SDimitry Andric         LibFunc Func;
905d8e91e46SDimitry Andric         bool IsMathLibCall =
906d8e91e46SDimitry Andric             TLI && CI->getCalledFunction() &&
907d8e91e46SDimitry Andric             CI->getType()->isFloatingPointTy() &&
908d8e91e46SDimitry Andric             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
909d8e91e46SDimitry Andric             TLI->hasOptimizedCodeGen(Func);
910d8e91e46SDimitry Andric 
911d8e91e46SDimitry Andric         if (IsMathLibCall) {
912d8e91e46SDimitry Andric           // TODO: Ideally, we should not use clang-specific language here,
913d8e91e46SDimitry Andric           // but it's hard to provide meaningful yet generic advice.
914d8e91e46SDimitry Andric           // Also, should this be guarded by allowExtraAnalysis() and/or be part
915d8e91e46SDimitry Andric           // of the returned info from isFunctionVectorizable()?
916cfca06d7SDimitry Andric           reportVectorizationFailure(
917cfca06d7SDimitry Andric               "Found a non-intrinsic callsite",
918e6d15924SDimitry Andric               "library call cannot be vectorized. "
919d8e91e46SDimitry Andric               "Try compiling with -fno-math-errno, -ffast-math, "
920e6d15924SDimitry Andric               "or similar flags",
9211d5ae102SDimitry Andric               "CantVectorizeLibcall", ORE, TheLoop, CI);
922d8e91e46SDimitry Andric         } else {
923e6d15924SDimitry Andric           reportVectorizationFailure("Found a non-intrinsic callsite",
924e6d15924SDimitry Andric                                      "call instruction cannot be vectorized",
9251d5ae102SDimitry Andric                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
926d8e91e46SDimitry Andric         }
927eb11fae6SDimitry Andric         return false;
928eb11fae6SDimitry Andric       }
929eb11fae6SDimitry Andric 
930e6d15924SDimitry Andric       // Some intrinsics have scalar arguments and should be same in order for
931e6d15924SDimitry Andric       // them to be vectorized (i.e. loop invariant).
932e6d15924SDimitry Andric       if (CI) {
933eb11fae6SDimitry Andric         auto *SE = PSE.getSE();
934e6d15924SDimitry Andric         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
935c0981da4SDimitry Andric         for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
936145449b1SDimitry Andric           if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) {
937e6d15924SDimitry Andric             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
938e6d15924SDimitry Andric               reportVectorizationFailure("Found unvectorizable intrinsic",
939e6d15924SDimitry Andric                   "intrinsic instruction cannot be vectorized",
9401d5ae102SDimitry Andric                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
941eb11fae6SDimitry Andric               return false;
942eb11fae6SDimitry Andric             }
943eb11fae6SDimitry Andric           }
944e6d15924SDimitry Andric       }
945eb11fae6SDimitry Andric 
946b1c73532SDimitry Andric       // If we found a vectorized variant of a function, note that so LV can
947b1c73532SDimitry Andric       // make better decisions about maximum VF.
948b1c73532SDimitry Andric       if (CI && !VFDatabase::getMappings(*CI).empty())
949b1c73532SDimitry Andric         VecCallVariantsFound = true;
950b1c73532SDimitry Andric 
951eb11fae6SDimitry Andric       // Check that the instruction return type is vectorizable.
952eb11fae6SDimitry Andric       // Also, we can't vectorize extractelement instructions.
953eb11fae6SDimitry Andric       if ((!VectorType::isValidElementType(I.getType()) &&
954eb11fae6SDimitry Andric            !I.getType()->isVoidTy()) ||
955eb11fae6SDimitry Andric           isa<ExtractElementInst>(I)) {
956e6d15924SDimitry Andric         reportVectorizationFailure("Found unvectorizable type",
957e6d15924SDimitry Andric             "instruction return type cannot be vectorized",
9581d5ae102SDimitry Andric             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
959eb11fae6SDimitry Andric         return false;
960eb11fae6SDimitry Andric       }
961eb11fae6SDimitry Andric 
962eb11fae6SDimitry Andric       // Check that the stored type is vectorizable.
963eb11fae6SDimitry Andric       if (auto *ST = dyn_cast<StoreInst>(&I)) {
964eb11fae6SDimitry Andric         Type *T = ST->getValueOperand()->getType();
965eb11fae6SDimitry Andric         if (!VectorType::isValidElementType(T)) {
966e6d15924SDimitry Andric           reportVectorizationFailure("Store instruction cannot be vectorized",
967e6d15924SDimitry Andric                                      "store instruction cannot be vectorized",
9681d5ae102SDimitry Andric                                      "CantVectorizeStore", ORE, TheLoop, ST);
969eb11fae6SDimitry Andric           return false;
970eb11fae6SDimitry Andric         }
971eb11fae6SDimitry Andric 
972e6d15924SDimitry Andric         // For nontemporal stores, check that a nontemporal vector version is
973e6d15924SDimitry Andric         // supported on the target.
974e6d15924SDimitry Andric         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
975e6d15924SDimitry Andric           // Arbitrarily try a vector of 2 elements.
976b60736ecSDimitry Andric           auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
977e6d15924SDimitry Andric           assert(VecTy && "did not find vectorized version of stored type");
978cfca06d7SDimitry Andric           if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
979e6d15924SDimitry Andric             reportVectorizationFailure(
980e6d15924SDimitry Andric                 "nontemporal store instruction cannot be vectorized",
981e6d15924SDimitry Andric                 "nontemporal store instruction cannot be vectorized",
9821d5ae102SDimitry Andric                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
983e6d15924SDimitry Andric             return false;
984e6d15924SDimitry Andric           }
985e6d15924SDimitry Andric         }
986e6d15924SDimitry Andric 
987e6d15924SDimitry Andric       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
988e6d15924SDimitry Andric         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
989e6d15924SDimitry Andric           // For nontemporal loads, check that a nontemporal vector version is
990e6d15924SDimitry Andric           // supported on the target (arbitrarily try a vector of 2 elements).
991b60736ecSDimitry Andric           auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
992e6d15924SDimitry Andric           assert(VecTy && "did not find vectorized version of load type");
993cfca06d7SDimitry Andric           if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
994e6d15924SDimitry Andric             reportVectorizationFailure(
995e6d15924SDimitry Andric                 "nontemporal load instruction cannot be vectorized",
996e6d15924SDimitry Andric                 "nontemporal load instruction cannot be vectorized",
9971d5ae102SDimitry Andric                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
998e6d15924SDimitry Andric             return false;
999e6d15924SDimitry Andric           }
1000e6d15924SDimitry Andric         }
1001e6d15924SDimitry Andric 
1002eb11fae6SDimitry Andric         // FP instructions can allow unsafe algebra, thus vectorizable by
1003eb11fae6SDimitry Andric         // non-IEEE-754 compliant SIMD units.
1004eb11fae6SDimitry Andric         // This applies to floating-point math operations and calls, not memory
1005eb11fae6SDimitry Andric         // operations, shuffles, or casts, as they don't change precision or
1006eb11fae6SDimitry Andric         // semantics.
1007eb11fae6SDimitry Andric       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1008eb11fae6SDimitry Andric                  !I.isFast()) {
1009eb11fae6SDimitry Andric         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1010eb11fae6SDimitry Andric         Hints->setPotentiallyUnsafe();
1011eb11fae6SDimitry Andric       }
1012eb11fae6SDimitry Andric 
1013eb11fae6SDimitry Andric       // Reduction instructions are allowed to have exit users.
1014eb11fae6SDimitry Andric       // All other instructions must not have external users.
1015eb11fae6SDimitry Andric       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1016d8e91e46SDimitry Andric         // We can safely vectorize loops where instructions within the loop are
1017d8e91e46SDimitry Andric         // used outside the loop only if the SCEV predicates within the loop is
1018d8e91e46SDimitry Andric         // same as outside the loop. Allowing the exit means reusing the SCEV
1019d8e91e46SDimitry Andric         // outside the loop.
1020145449b1SDimitry Andric         if (PSE.getPredicate().isAlwaysTrue()) {
1021d8e91e46SDimitry Andric           AllowedExit.insert(&I);
1022d8e91e46SDimitry Andric           continue;
1023d8e91e46SDimitry Andric         }
1024e6d15924SDimitry Andric         reportVectorizationFailure("Value cannot be used outside the loop",
1025e6d15924SDimitry Andric                                    "value cannot be used outside the loop",
10261d5ae102SDimitry Andric                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1027eb11fae6SDimitry Andric         return false;
1028eb11fae6SDimitry Andric       }
1029eb11fae6SDimitry Andric     } // next instr.
1030eb11fae6SDimitry Andric   }
1031eb11fae6SDimitry Andric 
1032eb11fae6SDimitry Andric   if (!PrimaryInduction) {
1033eb11fae6SDimitry Andric     if (Inductions.empty()) {
1034e6d15924SDimitry Andric       reportVectorizationFailure("Did not find one integer induction var",
1035e6d15924SDimitry Andric           "loop induction variable could not be identified",
10361d5ae102SDimitry Andric           "NoInductionVariable", ORE, TheLoop);
1037eb11fae6SDimitry Andric       return false;
1038d8e91e46SDimitry Andric     } else if (!WidestIndTy) {
1039e6d15924SDimitry Andric       reportVectorizationFailure("Did not find one integer induction var",
1040e6d15924SDimitry Andric           "integer loop induction variable could not be identified",
10411d5ae102SDimitry Andric           "NoIntegerInductionVariable", ORE, TheLoop);
1042d8e91e46SDimitry Andric       return false;
1043e6d15924SDimitry Andric     } else {
1044e6d15924SDimitry Andric       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
1045eb11fae6SDimitry Andric     }
1046eb11fae6SDimitry Andric   }
1047eb11fae6SDimitry Andric 
1048eb11fae6SDimitry Andric   // Now we know the widest induction type, check if our found induction
1049eb11fae6SDimitry Andric   // is the same size. If it's not, unset it here and InnerLoopVectorizer
1050eb11fae6SDimitry Andric   // will create another.
1051eb11fae6SDimitry Andric   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
1052eb11fae6SDimitry Andric     PrimaryInduction = nullptr;
1053eb11fae6SDimitry Andric 
1054eb11fae6SDimitry Andric   return true;
1055eb11fae6SDimitry Andric }
1056eb11fae6SDimitry Andric 
canVectorizeMemory()1057eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorizeMemory() {
1058e3b55780SDimitry Andric   LAI = &LAIs.getInfo(*TheLoop);
1059eb11fae6SDimitry Andric   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1060eb11fae6SDimitry Andric   if (LAR) {
1061eb11fae6SDimitry Andric     ORE->emit([&]() {
1062eb11fae6SDimitry Andric       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1063eb11fae6SDimitry Andric                                         "loop not vectorized: ", *LAR);
1064eb11fae6SDimitry Andric     });
1065eb11fae6SDimitry Andric   }
1066344a3780SDimitry Andric 
1067eb11fae6SDimitry Andric   if (!LAI->canVectorizeMemory())
1068eb11fae6SDimitry Andric     return false;
1069eb11fae6SDimitry Andric 
1070ac9a064cSDimitry Andric   if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1071ac9a064cSDimitry Andric     reportVectorizationFailure("We don't allow storing to uniform addresses",
1072ac9a064cSDimitry Andric                                "write to a loop invariant address could not "
1073ac9a064cSDimitry Andric                                "be vectorized",
1074ac9a064cSDimitry Andric                                "CantVectorizeStoreToLoopInvariantAddress", ORE,
1075ac9a064cSDimitry Andric                                TheLoop);
1076ac9a064cSDimitry Andric     return false;
1077ac9a064cSDimitry Andric   }
1078ac9a064cSDimitry Andric 
1079145449b1SDimitry Andric   // We can vectorize stores to invariant address when final reduction value is
1080145449b1SDimitry Andric   // guaranteed to be stored at the end of the loop. Also, if decision to
1081145449b1SDimitry Andric   // vectorize loop is made, runtime checks are added so as to make sure that
1082145449b1SDimitry Andric   // invariant address won't alias with any other objects.
1083145449b1SDimitry Andric   if (!LAI->getStoresToInvariantAddresses().empty()) {
1084e3b55780SDimitry Andric     // For each invariant address, check if last stored value is unconditional
1085e3b55780SDimitry Andric     // and the address is not calculated inside the loop.
1086145449b1SDimitry Andric     for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1087e3b55780SDimitry Andric       if (!isInvariantStoreOfReduction(SI))
1088e3b55780SDimitry Andric         continue;
1089e3b55780SDimitry Andric 
1090e3b55780SDimitry Andric       if (blockNeedsPredication(SI->getParent())) {
1091145449b1SDimitry Andric         reportVectorizationFailure(
1092145449b1SDimitry Andric             "We don't allow storing to uniform addresses",
1093145449b1SDimitry Andric             "write of conditional recurring variant value to a loop "
1094145449b1SDimitry Andric             "invariant address could not be vectorized",
10951d5ae102SDimitry Andric             "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1096eb11fae6SDimitry Andric         return false;
1097eb11fae6SDimitry Andric       }
1098e3b55780SDimitry Andric 
1099e3b55780SDimitry Andric       // Invariant address should be defined outside of loop. LICM pass usually
1100e3b55780SDimitry Andric       // makes sure it happens, but in rare cases it does not, we do not want
1101e3b55780SDimitry Andric       // to overcomplicate vectorization to support this case.
1102e3b55780SDimitry Andric       if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1103e3b55780SDimitry Andric         if (TheLoop->contains(Ptr)) {
1104e3b55780SDimitry Andric           reportVectorizationFailure(
1105e3b55780SDimitry Andric               "Invariant address is calculated inside the loop",
1106e3b55780SDimitry Andric               "write to a loop invariant address could not "
1107e3b55780SDimitry Andric               "be vectorized",
1108e3b55780SDimitry Andric               "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1109e3b55780SDimitry Andric           return false;
1110e3b55780SDimitry Andric         }
1111e3b55780SDimitry Andric       }
1112145449b1SDimitry Andric     }
1113145449b1SDimitry Andric 
1114ac9a064cSDimitry Andric     if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1115145449b1SDimitry Andric       // For each invariant address, check its last stored value is the result
1116145449b1SDimitry Andric       // of one of our reductions.
1117145449b1SDimitry Andric       //
1118ac9a064cSDimitry Andric       // We do not check if dependence with loads exists because that is already
1119ac9a064cSDimitry Andric       // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1120145449b1SDimitry Andric       ScalarEvolution *SE = PSE.getSE();
1121145449b1SDimitry Andric       SmallVector<StoreInst *, 4> UnhandledStores;
1122145449b1SDimitry Andric       for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1123145449b1SDimitry Andric         if (isInvariantStoreOfReduction(SI)) {
1124145449b1SDimitry Andric           // Earlier stores to this address are effectively deadcode.
1125145449b1SDimitry Andric           // With opaque pointers it is possible for one pointer to be used with
1126145449b1SDimitry Andric           // different sizes of stored values:
1127145449b1SDimitry Andric           //    store i32 0, ptr %x
1128145449b1SDimitry Andric           //    store i8 0, ptr %x
1129145449b1SDimitry Andric           // The latest store doesn't complitely overwrite the first one in the
1130145449b1SDimitry Andric           // example. That is why we have to make sure that types of stored
1131145449b1SDimitry Andric           // values are same.
1132145449b1SDimitry Andric           // TODO: Check that bitwidth of unhandled store is smaller then the
1133145449b1SDimitry Andric           // one that overwrites it and add a test.
1134145449b1SDimitry Andric           erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1135145449b1SDimitry Andric             return storeToSameAddress(SE, SI, I) &&
1136145449b1SDimitry Andric                    I->getValueOperand()->getType() ==
1137145449b1SDimitry Andric                        SI->getValueOperand()->getType();
1138145449b1SDimitry Andric           });
1139145449b1SDimitry Andric           continue;
1140145449b1SDimitry Andric         }
1141145449b1SDimitry Andric         UnhandledStores.push_back(SI);
1142145449b1SDimitry Andric       }
1143145449b1SDimitry Andric 
1144145449b1SDimitry Andric       bool IsOK = UnhandledStores.empty();
1145145449b1SDimitry Andric       // TODO: we should also validate against InvariantMemSets.
1146145449b1SDimitry Andric       if (!IsOK) {
1147145449b1SDimitry Andric         reportVectorizationFailure(
1148145449b1SDimitry Andric             "We don't allow storing to uniform addresses",
1149145449b1SDimitry Andric             "write to a loop invariant address could not "
1150145449b1SDimitry Andric             "be vectorized",
1151145449b1SDimitry Andric             "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1152145449b1SDimitry Andric         return false;
1153145449b1SDimitry Andric       }
1154145449b1SDimitry Andric     }
1155145449b1SDimitry Andric   }
1156344a3780SDimitry Andric 
1157145449b1SDimitry Andric   PSE.addPredicate(LAI->getPSE().getPredicate());
1158eb11fae6SDimitry Andric   return true;
1159eb11fae6SDimitry Andric }
1160eb11fae6SDimitry Andric 
canVectorizeFPMath(bool EnableStrictReductions)1161344a3780SDimitry Andric bool LoopVectorizationLegality::canVectorizeFPMath(
1162344a3780SDimitry Andric     bool EnableStrictReductions) {
1163344a3780SDimitry Andric 
1164344a3780SDimitry Andric   // First check if there is any ExactFP math or if we allow reassociations
1165344a3780SDimitry Andric   if (!Requirements->getExactFPInst() || Hints->allowReordering())
1166344a3780SDimitry Andric     return true;
1167344a3780SDimitry Andric 
1168344a3780SDimitry Andric   // If the above is false, we have ExactFPMath & do not allow reordering.
1169344a3780SDimitry Andric   // If the EnableStrictReductions flag is set, first check if we have any
1170344a3780SDimitry Andric   // Exact FP induction vars, which we cannot vectorize.
1171344a3780SDimitry Andric   if (!EnableStrictReductions ||
1172344a3780SDimitry Andric       any_of(getInductionVars(), [&](auto &Induction) -> bool {
1173344a3780SDimitry Andric         InductionDescriptor IndDesc = Induction.second;
1174344a3780SDimitry Andric         return IndDesc.getExactFPMathInst();
1175344a3780SDimitry Andric       }))
1176344a3780SDimitry Andric     return false;
1177344a3780SDimitry Andric 
1178344a3780SDimitry Andric   // We can now only vectorize if all reductions with Exact FP math also
1179344a3780SDimitry Andric   // have the isOrdered flag set, which indicates that we can move the
1180344a3780SDimitry Andric   // reduction operations in-loop.
1181344a3780SDimitry Andric   return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1182344a3780SDimitry Andric     const RecurrenceDescriptor &RdxDesc = Reduction.second;
1183344a3780SDimitry Andric     return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1184344a3780SDimitry Andric   }));
1185344a3780SDimitry Andric }
1186344a3780SDimitry Andric 
isInvariantStoreOfReduction(StoreInst * SI)1187145449b1SDimitry Andric bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) {
1188145449b1SDimitry Andric   return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1189145449b1SDimitry Andric     const RecurrenceDescriptor &RdxDesc = Reduction.second;
1190145449b1SDimitry Andric     return RdxDesc.IntermediateStore == SI;
1191145449b1SDimitry Andric   });
1192145449b1SDimitry Andric }
1193145449b1SDimitry Andric 
isInvariantAddressOfReduction(Value * V)1194145449b1SDimitry Andric bool LoopVectorizationLegality::isInvariantAddressOfReduction(Value *V) {
1195145449b1SDimitry Andric   return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1196145449b1SDimitry Andric     const RecurrenceDescriptor &RdxDesc = Reduction.second;
1197145449b1SDimitry Andric     if (!RdxDesc.IntermediateStore)
1198145449b1SDimitry Andric       return false;
1199145449b1SDimitry Andric 
1200145449b1SDimitry Andric     ScalarEvolution *SE = PSE.getSE();
1201145449b1SDimitry Andric     Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1202145449b1SDimitry Andric     return V == InvariantAddress ||
1203145449b1SDimitry Andric            SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1204145449b1SDimitry Andric   });
1205145449b1SDimitry Andric }
1206145449b1SDimitry Andric 
isInductionPhi(const Value * V) const120777fc4c14SDimitry Andric bool LoopVectorizationLegality::isInductionPhi(const Value *V) const {
1208eb11fae6SDimitry Andric   Value *In0 = const_cast<Value *>(V);
1209eb11fae6SDimitry Andric   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1210eb11fae6SDimitry Andric   if (!PN)
1211eb11fae6SDimitry Andric     return false;
1212eb11fae6SDimitry Andric 
1213eb11fae6SDimitry Andric   return Inductions.count(PN);
1214eb11fae6SDimitry Andric }
1215eb11fae6SDimitry Andric 
121677fc4c14SDimitry Andric const InductionDescriptor *
getIntOrFpInductionDescriptor(PHINode * Phi) const121777fc4c14SDimitry Andric LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const {
121877fc4c14SDimitry Andric   if (!isInductionPhi(Phi))
121977fc4c14SDimitry Andric     return nullptr;
122077fc4c14SDimitry Andric   auto &ID = getInductionVars().find(Phi)->second;
122177fc4c14SDimitry Andric   if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
122277fc4c14SDimitry Andric       ID.getKind() == InductionDescriptor::IK_FpInduction)
122377fc4c14SDimitry Andric     return &ID;
122477fc4c14SDimitry Andric   return nullptr;
122577fc4c14SDimitry Andric }
122677fc4c14SDimitry Andric 
1227145449b1SDimitry Andric const InductionDescriptor *
getPointerInductionDescriptor(PHINode * Phi) const1228145449b1SDimitry Andric LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const {
1229145449b1SDimitry Andric   if (!isInductionPhi(Phi))
1230145449b1SDimitry Andric     return nullptr;
1231145449b1SDimitry Andric   auto &ID = getInductionVars().find(Phi)->second;
1232145449b1SDimitry Andric   if (ID.getKind() == InductionDescriptor::IK_PtrInduction)
1233145449b1SDimitry Andric     return &ID;
1234145449b1SDimitry Andric   return nullptr;
1235145449b1SDimitry Andric }
1236145449b1SDimitry Andric 
isCastedInductionVariable(const Value * V) const123777fc4c14SDimitry Andric bool LoopVectorizationLegality::isCastedInductionVariable(
123877fc4c14SDimitry Andric     const Value *V) const {
1239eb11fae6SDimitry Andric   auto *Inst = dyn_cast<Instruction>(V);
1240eb11fae6SDimitry Andric   return (Inst && InductionCastsToIgnore.count(Inst));
1241eb11fae6SDimitry Andric }
1242eb11fae6SDimitry Andric 
isInductionVariable(const Value * V) const124377fc4c14SDimitry Andric bool LoopVectorizationLegality::isInductionVariable(const Value *V) const {
1244eb11fae6SDimitry Andric   return isInductionPhi(V) || isCastedInductionVariable(V);
1245eb11fae6SDimitry Andric }
1246eb11fae6SDimitry Andric 
isFixedOrderRecurrence(const PHINode * Phi) const1247e3b55780SDimitry Andric bool LoopVectorizationLegality::isFixedOrderRecurrence(
124877fc4c14SDimitry Andric     const PHINode *Phi) const {
1249e3b55780SDimitry Andric   return FixedOrderRecurrences.count(Phi);
1250eb11fae6SDimitry Andric }
1251eb11fae6SDimitry Andric 
blockNeedsPredication(BasicBlock * BB) const1252344a3780SDimitry Andric bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
1253eb11fae6SDimitry Andric   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1254eb11fae6SDimitry Andric }
1255eb11fae6SDimitry Andric 
blockCanBePredicated(BasicBlock * BB,SmallPtrSetImpl<Value * > & SafePtrs,SmallPtrSetImpl<const Instruction * > & MaskedOp) const1256eb11fae6SDimitry Andric bool LoopVectorizationLegality::blockCanBePredicated(
1257b60736ecSDimitry Andric     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1258b1c73532SDimitry Andric     SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1259eb11fae6SDimitry Andric   for (Instruction &I : *BB) {
1260cfca06d7SDimitry Andric     // We can predicate blocks with calls to assume, as long as we drop them in
1261cfca06d7SDimitry Andric     // case we flatten the CFG via predication.
1262cfca06d7SDimitry Andric     if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1263b1c73532SDimitry Andric       MaskedOp.insert(&I);
1264cfca06d7SDimitry Andric       continue;
1265cfca06d7SDimitry Andric     }
1266cfca06d7SDimitry Andric 
1267b60736ecSDimitry Andric     // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1268b60736ecSDimitry Andric     // TODO: there might be cases that it should block the vectorization. Let's
1269b60736ecSDimitry Andric     // ignore those for now.
1270b60736ecSDimitry Andric     if (isa<NoAliasScopeDeclInst>(&I))
1271b60736ecSDimitry Andric       continue;
1272b60736ecSDimitry Andric 
12737fa27ce4SDimitry Andric     // We can allow masked calls if there's at least one vector variant, even
12747fa27ce4SDimitry Andric     // if we end up scalarizing due to the cost model calculations.
12757fa27ce4SDimitry Andric     // TODO: Allow other calls if they have appropriate attributes... readonly
12767fa27ce4SDimitry Andric     // and argmemonly?
12777fa27ce4SDimitry Andric     if (CallInst *CI = dyn_cast<CallInst>(&I))
12787fa27ce4SDimitry Andric       if (VFDatabase::hasMaskedVariant(*CI)) {
12797fa27ce4SDimitry Andric         MaskedOp.insert(CI);
12807fa27ce4SDimitry Andric         continue;
12817fa27ce4SDimitry Andric       }
12827fa27ce4SDimitry Andric 
1283e3b55780SDimitry Andric     // Loads are handled via masking (or speculated if safe to do so.)
1284e3b55780SDimitry Andric     if (auto *LI = dyn_cast<LoadInst>(&I)) {
1285e3b55780SDimitry Andric       if (!SafePtrs.count(LI->getPointerOperand()))
1286eb11fae6SDimitry Andric         MaskedOp.insert(LI);
1287eb11fae6SDimitry Andric       continue;
1288eb11fae6SDimitry Andric     }
1289eb11fae6SDimitry Andric 
1290eb11fae6SDimitry Andric     // Predicated store requires some form of masking:
1291eb11fae6SDimitry Andric     // 1) masked store HW instruction,
1292eb11fae6SDimitry Andric     // 2) emulation via load-blend-store (only if safe and legal to do so,
1293eb11fae6SDimitry Andric     //    be aware on the race conditions), or
1294eb11fae6SDimitry Andric     // 3) element-by-element predicate check and scalar store.
1295e3b55780SDimitry Andric     if (auto *SI = dyn_cast<StoreInst>(&I)) {
1296eb11fae6SDimitry Andric       MaskedOp.insert(SI);
1297eb11fae6SDimitry Andric       continue;
1298eb11fae6SDimitry Andric     }
1299e3b55780SDimitry Andric 
1300e3b55780SDimitry Andric     if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1301eb11fae6SDimitry Andric       return false;
1302eb11fae6SDimitry Andric   }
1303eb11fae6SDimitry Andric 
1304eb11fae6SDimitry Andric   return true;
1305eb11fae6SDimitry Andric }
1306eb11fae6SDimitry Andric 
canVectorizeWithIfConvert()1307eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1308eb11fae6SDimitry Andric   if (!EnableIfConversion) {
1309e6d15924SDimitry Andric     reportVectorizationFailure("If-conversion is disabled",
1310e6d15924SDimitry Andric                                "if-conversion is disabled",
13111d5ae102SDimitry Andric                                "IfConversionDisabled",
13121d5ae102SDimitry Andric                                ORE, TheLoop);
1313eb11fae6SDimitry Andric     return false;
1314eb11fae6SDimitry Andric   }
1315eb11fae6SDimitry Andric 
1316eb11fae6SDimitry Andric   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1317eb11fae6SDimitry Andric 
13181d5ae102SDimitry Andric   // A list of pointers which are known to be dereferenceable within scope of
13191d5ae102SDimitry Andric   // the loop body for each iteration of the loop which executes.  That is,
13201d5ae102SDimitry Andric   // the memory pointed to can be dereferenced (with the access size implied by
13211d5ae102SDimitry Andric   // the value's type) unconditionally within the loop header without
13221d5ae102SDimitry Andric   // introducing a new fault.
1323cfca06d7SDimitry Andric   SmallPtrSet<Value *, 8> SafePointers;
1324eb11fae6SDimitry Andric 
1325eb11fae6SDimitry Andric   // Collect safe addresses.
1326eb11fae6SDimitry Andric   for (BasicBlock *BB : TheLoop->blocks()) {
13271d5ae102SDimitry Andric     if (!blockNeedsPredication(BB)) {
1328eb11fae6SDimitry Andric       for (Instruction &I : *BB)
1329eb11fae6SDimitry Andric         if (auto *Ptr = getLoadStorePointerOperand(&I))
1330cfca06d7SDimitry Andric           SafePointers.insert(Ptr);
13311d5ae102SDimitry Andric       continue;
13321d5ae102SDimitry Andric     }
13331d5ae102SDimitry Andric 
13341d5ae102SDimitry Andric     // For a block which requires predication, a address may be safe to access
13351d5ae102SDimitry Andric     // in the loop w/o predication if we can prove dereferenceability facts
13361d5ae102SDimitry Andric     // sufficient to ensure it'll never fault within the loop. For the moment,
13371d5ae102SDimitry Andric     // we restrict this to loads; stores are more complicated due to
13381d5ae102SDimitry Andric     // concurrency restrictions.
13391d5ae102SDimitry Andric     ScalarEvolution &SE = *PSE.getSE();
13401d5ae102SDimitry Andric     for (Instruction &I : *BB) {
13411d5ae102SDimitry Andric       LoadInst *LI = dyn_cast<LoadInst>(&I);
1342b60736ecSDimitry Andric       if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1343e3b55780SDimitry Andric           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC))
1344cfca06d7SDimitry Andric         SafePointers.insert(LI->getPointerOperand());
13451d5ae102SDimitry Andric     }
1346eb11fae6SDimitry Andric   }
1347eb11fae6SDimitry Andric 
1348eb11fae6SDimitry Andric   // Collect the blocks that need predication.
1349eb11fae6SDimitry Andric   for (BasicBlock *BB : TheLoop->blocks()) {
1350eb11fae6SDimitry Andric     // We don't support switch statements inside loops.
1351eb11fae6SDimitry Andric     if (!isa<BranchInst>(BB->getTerminator())) {
1352e6d15924SDimitry Andric       reportVectorizationFailure("Loop contains a switch statement",
1353e6d15924SDimitry Andric                                  "loop contains a switch statement",
13541d5ae102SDimitry Andric                                  "LoopContainsSwitch", ORE, TheLoop,
13551d5ae102SDimitry Andric                                  BB->getTerminator());
1356eb11fae6SDimitry Andric       return false;
1357eb11fae6SDimitry Andric     }
1358eb11fae6SDimitry Andric 
1359eb11fae6SDimitry Andric     // We must be able to predicate all blocks that need to be predicated.
1360b1c73532SDimitry Andric     if (blockNeedsPredication(BB) &&
1361b1c73532SDimitry Andric         !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1362e6d15924SDimitry Andric       reportVectorizationFailure(
1363e6d15924SDimitry Andric           "Control flow cannot be substituted for a select",
1364b1c73532SDimitry Andric           "control flow cannot be substituted for a select", "NoCFGForSelect",
1365b1c73532SDimitry Andric           ORE, TheLoop, BB->getTerminator());
1366eb11fae6SDimitry Andric       return false;
1367eb11fae6SDimitry Andric     }
1368eb11fae6SDimitry Andric   }
1369eb11fae6SDimitry Andric 
1370eb11fae6SDimitry Andric   // We can if-convert this loop.
1371eb11fae6SDimitry Andric   return true;
1372eb11fae6SDimitry Andric }
1373eb11fae6SDimitry Andric 
1374eb11fae6SDimitry Andric // Helper function to canVectorizeLoopNestCFG.
canVectorizeLoopCFG(Loop * Lp,bool UseVPlanNativePath)1375eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1376eb11fae6SDimitry Andric                                                     bool UseVPlanNativePath) {
1377b60736ecSDimitry Andric   assert((UseVPlanNativePath || Lp->isInnermost()) &&
1378eb11fae6SDimitry Andric          "VPlan-native path is not enabled.");
1379eb11fae6SDimitry Andric 
1380eb11fae6SDimitry Andric   // TODO: ORE should be improved to show more accurate information when an
1381eb11fae6SDimitry Andric   // outer loop can't be vectorized because a nested loop is not understood or
1382eb11fae6SDimitry Andric   // legal. Something like: "outer_loop_location: loop not vectorized:
1383eb11fae6SDimitry Andric   // (inner_loop_location) loop control flow is not understood by vectorizer".
1384eb11fae6SDimitry Andric 
1385eb11fae6SDimitry Andric   // Store the result and return it at the end instead of exiting early, in case
1386eb11fae6SDimitry Andric   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1387eb11fae6SDimitry Andric   bool Result = true;
1388eb11fae6SDimitry Andric   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1389eb11fae6SDimitry Andric 
1390eb11fae6SDimitry Andric   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1391eb11fae6SDimitry Andric   // be canonicalized.
1392eb11fae6SDimitry Andric   if (!Lp->getLoopPreheader()) {
1393e6d15924SDimitry Andric     reportVectorizationFailure("Loop doesn't have a legal pre-header",
1394e6d15924SDimitry Andric         "loop control flow is not understood by vectorizer",
13951d5ae102SDimitry Andric         "CFGNotUnderstood", ORE, TheLoop);
1396eb11fae6SDimitry Andric     if (DoExtraAnalysis)
1397eb11fae6SDimitry Andric       Result = false;
1398eb11fae6SDimitry Andric     else
1399eb11fae6SDimitry Andric       return false;
1400eb11fae6SDimitry Andric   }
1401eb11fae6SDimitry Andric 
1402eb11fae6SDimitry Andric   // We must have a single backedge.
1403eb11fae6SDimitry Andric   if (Lp->getNumBackEdges() != 1) {
1404e6d15924SDimitry Andric     reportVectorizationFailure("The loop must have a single backedge",
1405e6d15924SDimitry Andric         "loop control flow is not understood by vectorizer",
14061d5ae102SDimitry Andric         "CFGNotUnderstood", ORE, TheLoop);
1407eb11fae6SDimitry Andric     if (DoExtraAnalysis)
1408eb11fae6SDimitry Andric       Result = false;
1409eb11fae6SDimitry Andric     else
1410eb11fae6SDimitry Andric       return false;
1411eb11fae6SDimitry Andric   }
1412eb11fae6SDimitry Andric 
1413eb11fae6SDimitry Andric   return Result;
1414eb11fae6SDimitry Andric }
1415eb11fae6SDimitry Andric 
canVectorizeLoopNestCFG(Loop * Lp,bool UseVPlanNativePath)1416eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1417eb11fae6SDimitry Andric     Loop *Lp, bool UseVPlanNativePath) {
1418eb11fae6SDimitry Andric   // Store the result and return it at the end instead of exiting early, in case
1419eb11fae6SDimitry Andric   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1420eb11fae6SDimitry Andric   bool Result = true;
1421eb11fae6SDimitry Andric   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1422eb11fae6SDimitry Andric   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1423eb11fae6SDimitry Andric     if (DoExtraAnalysis)
1424eb11fae6SDimitry Andric       Result = false;
1425eb11fae6SDimitry Andric     else
1426eb11fae6SDimitry Andric       return false;
1427eb11fae6SDimitry Andric   }
1428eb11fae6SDimitry Andric 
1429eb11fae6SDimitry Andric   // Recursively check whether the loop control flow of nested loops is
1430eb11fae6SDimitry Andric   // understood.
1431eb11fae6SDimitry Andric   for (Loop *SubLp : *Lp)
1432eb11fae6SDimitry Andric     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1433eb11fae6SDimitry Andric       if (DoExtraAnalysis)
1434eb11fae6SDimitry Andric         Result = false;
1435eb11fae6SDimitry Andric       else
1436eb11fae6SDimitry Andric         return false;
1437eb11fae6SDimitry Andric     }
1438eb11fae6SDimitry Andric 
1439eb11fae6SDimitry Andric   return Result;
1440eb11fae6SDimitry Andric }
1441eb11fae6SDimitry Andric 
canVectorize(bool UseVPlanNativePath)1442eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1443eb11fae6SDimitry Andric   // Store the result and return it at the end instead of exiting early, in case
1444eb11fae6SDimitry Andric   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1445eb11fae6SDimitry Andric   bool Result = true;
1446eb11fae6SDimitry Andric 
1447eb11fae6SDimitry Andric   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1448eb11fae6SDimitry Andric   // Check whether the loop-related control flow in the loop nest is expected by
1449eb11fae6SDimitry Andric   // vectorizer.
1450eb11fae6SDimitry Andric   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1451eb11fae6SDimitry Andric     if (DoExtraAnalysis)
1452eb11fae6SDimitry Andric       Result = false;
1453eb11fae6SDimitry Andric     else
1454eb11fae6SDimitry Andric       return false;
1455eb11fae6SDimitry Andric   }
1456eb11fae6SDimitry Andric 
1457eb11fae6SDimitry Andric   // We need to have a loop header.
1458eb11fae6SDimitry Andric   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1459eb11fae6SDimitry Andric                     << '\n');
1460eb11fae6SDimitry Andric 
1461eb11fae6SDimitry Andric   // Specific checks for outer loops. We skip the remaining legal checks at this
1462eb11fae6SDimitry Andric   // point because they don't support outer loops.
1463b60736ecSDimitry Andric   if (!TheLoop->isInnermost()) {
1464eb11fae6SDimitry Andric     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1465eb11fae6SDimitry Andric 
1466eb11fae6SDimitry Andric     if (!canVectorizeOuterLoop()) {
1467e6d15924SDimitry Andric       reportVectorizationFailure("Unsupported outer loop",
1468e6d15924SDimitry Andric                                  "unsupported outer loop",
14691d5ae102SDimitry Andric                                  "UnsupportedOuterLoop",
14701d5ae102SDimitry Andric                                  ORE, TheLoop);
1471eb11fae6SDimitry Andric       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1472eb11fae6SDimitry Andric       // outer loops.
1473eb11fae6SDimitry Andric       return false;
1474eb11fae6SDimitry Andric     }
1475eb11fae6SDimitry Andric 
1476eb11fae6SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1477eb11fae6SDimitry Andric     return Result;
1478eb11fae6SDimitry Andric   }
1479eb11fae6SDimitry Andric 
1480b60736ecSDimitry Andric   assert(TheLoop->isInnermost() && "Inner loop expected.");
1481eb11fae6SDimitry Andric   // Check if we can if-convert non-single-bb loops.
1482eb11fae6SDimitry Andric   unsigned NumBlocks = TheLoop->getNumBlocks();
1483eb11fae6SDimitry Andric   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1484eb11fae6SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1485eb11fae6SDimitry Andric     if (DoExtraAnalysis)
1486eb11fae6SDimitry Andric       Result = false;
1487eb11fae6SDimitry Andric     else
1488eb11fae6SDimitry Andric       return false;
1489eb11fae6SDimitry Andric   }
1490eb11fae6SDimitry Andric 
1491eb11fae6SDimitry Andric   // Check if we can vectorize the instructions and CFG in this loop.
1492eb11fae6SDimitry Andric   if (!canVectorizeInstrs()) {
1493eb11fae6SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1494eb11fae6SDimitry Andric     if (DoExtraAnalysis)
1495eb11fae6SDimitry Andric       Result = false;
1496eb11fae6SDimitry Andric     else
1497eb11fae6SDimitry Andric       return false;
1498eb11fae6SDimitry Andric   }
1499eb11fae6SDimitry Andric 
1500eb11fae6SDimitry Andric   // Go over each instruction and look at memory deps.
1501eb11fae6SDimitry Andric   if (!canVectorizeMemory()) {
1502eb11fae6SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1503eb11fae6SDimitry Andric     if (DoExtraAnalysis)
1504eb11fae6SDimitry Andric       Result = false;
1505eb11fae6SDimitry Andric     else
1506eb11fae6SDimitry Andric       return false;
1507eb11fae6SDimitry Andric   }
1508eb11fae6SDimitry Andric 
1509ac9a064cSDimitry Andric   if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1510ac9a064cSDimitry Andric     reportVectorizationFailure("could not determine number of loop iterations",
1511ac9a064cSDimitry Andric                                "could not determine number of loop iterations",
1512ac9a064cSDimitry Andric                                "CantComputeNumberOfIterations", ORE, TheLoop);
1513ac9a064cSDimitry Andric     if (DoExtraAnalysis)
1514ac9a064cSDimitry Andric       Result = false;
1515ac9a064cSDimitry Andric     else
1516ac9a064cSDimitry Andric       return false;
1517ac9a064cSDimitry Andric   }
1518ac9a064cSDimitry Andric 
1519eb11fae6SDimitry Andric   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1520eb11fae6SDimitry Andric                     << (LAI->getRuntimePointerChecking()->Need
1521eb11fae6SDimitry Andric                             ? " (with a runtime bound check)"
1522eb11fae6SDimitry Andric                             : "")
1523eb11fae6SDimitry Andric                     << "!\n");
1524eb11fae6SDimitry Andric 
1525eb11fae6SDimitry Andric   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1526eb11fae6SDimitry Andric   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1527eb11fae6SDimitry Andric     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1528eb11fae6SDimitry Andric 
1529145449b1SDimitry Andric   if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
1530e6d15924SDimitry Andric     reportVectorizationFailure("Too many SCEV checks needed",
1531e6d15924SDimitry Andric         "Too many SCEV assumptions need to be made and checked at runtime",
15321d5ae102SDimitry Andric         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1533eb11fae6SDimitry Andric     if (DoExtraAnalysis)
1534eb11fae6SDimitry Andric       Result = false;
1535eb11fae6SDimitry Andric     else
1536eb11fae6SDimitry Andric       return false;
1537eb11fae6SDimitry Andric   }
1538eb11fae6SDimitry Andric 
1539eb11fae6SDimitry Andric   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1540eb11fae6SDimitry Andric   // we can vectorize, and at this point we don't have any other mem analysis
1541eb11fae6SDimitry Andric   // which may limit our maximum vectorization factor, so just return true with
1542eb11fae6SDimitry Andric   // no restrictions.
1543eb11fae6SDimitry Andric   return Result;
1544eb11fae6SDimitry Andric }
1545eb11fae6SDimitry Andric 
canFoldTailByMasking() const1546ac9a064cSDimitry Andric bool LoopVectorizationLegality::canFoldTailByMasking() const {
1547d8e91e46SDimitry Andric 
1548d8e91e46SDimitry Andric   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1549d8e91e46SDimitry Andric 
15501d5ae102SDimitry Andric   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1551d8e91e46SDimitry Andric 
1552e3b55780SDimitry Andric   for (const auto &Reduction : getReductionVars())
15531d5ae102SDimitry Andric     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
15541d5ae102SDimitry Andric 
15551d5ae102SDimitry Andric   // TODO: handle non-reduction outside users when tail is folded by masking.
1556d8e91e46SDimitry Andric   for (auto *AE : AllowedExit) {
15571d5ae102SDimitry Andric     // Check that all users of allowed exit values are inside the loop or
15581d5ae102SDimitry Andric     // are the live-out of a reduction.
15591d5ae102SDimitry Andric     if (ReductionLiveOuts.count(AE))
15601d5ae102SDimitry Andric       continue;
1561d8e91e46SDimitry Andric     for (User *U : AE->users()) {
1562d8e91e46SDimitry Andric       Instruction *UI = cast<Instruction>(U);
1563d8e91e46SDimitry Andric       if (TheLoop->contains(UI))
1564d8e91e46SDimitry Andric         continue;
1565b60736ecSDimitry Andric       LLVM_DEBUG(
1566b60736ecSDimitry Andric           dbgs()
1567b60736ecSDimitry Andric           << "LV: Cannot fold tail by masking, loop has an outside user for "
1568b60736ecSDimitry Andric           << *UI << "\n");
1569d8e91e46SDimitry Andric       return false;
1570d8e91e46SDimitry Andric     }
1571d8e91e46SDimitry Andric   }
1572d8e91e46SDimitry Andric 
1573ac9a064cSDimitry Andric   for (const auto &Entry : getInductionVars()) {
1574ac9a064cSDimitry Andric     PHINode *OrigPhi = Entry.first;
1575ac9a064cSDimitry Andric     for (User *U : OrigPhi->users()) {
1576ac9a064cSDimitry Andric       auto *UI = cast<Instruction>(U);
1577ac9a064cSDimitry Andric       if (!TheLoop->contains(UI)) {
1578ac9a064cSDimitry Andric         LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
1579ac9a064cSDimitry Andric                              "outside user for "
1580ac9a064cSDimitry Andric                           << *UI << "\n");
1581ac9a064cSDimitry Andric         return false;
1582ac9a064cSDimitry Andric       }
1583ac9a064cSDimitry Andric     }
1584ac9a064cSDimitry Andric   }
1585ac9a064cSDimitry Andric 
1586d8e91e46SDimitry Andric   // The list of pointers that we can safely read and write to remains empty.
1587d8e91e46SDimitry Andric   SmallPtrSet<Value *, 8> SafePointers;
1588d8e91e46SDimitry Andric 
1589ac9a064cSDimitry Andric   // Check all blocks for predication, including those that ordinarily do not
1590ac9a064cSDimitry Andric   // need predication such as the header block.
1591b60736ecSDimitry Andric   SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
1592d8e91e46SDimitry Andric   for (BasicBlock *BB : TheLoop->blocks()) {
1593b1c73532SDimitry Andric     if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1594ac9a064cSDimitry Andric       LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
1595d8e91e46SDimitry Andric       return false;
1596d8e91e46SDimitry Andric     }
1597d8e91e46SDimitry Andric   }
1598d8e91e46SDimitry Andric 
1599d8e91e46SDimitry Andric   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1600b60736ecSDimitry Andric 
1601d8e91e46SDimitry Andric   return true;
1602d8e91e46SDimitry Andric }
1603d8e91e46SDimitry Andric 
prepareToFoldTailByMasking()1604ac9a064cSDimitry Andric void LoopVectorizationLegality::prepareToFoldTailByMasking() {
1605ac9a064cSDimitry Andric   // The list of pointers that we can safely read and write to remains empty.
1606ac9a064cSDimitry Andric   SmallPtrSet<Value *, 8> SafePointers;
1607ac9a064cSDimitry Andric 
1608ac9a064cSDimitry Andric   // Mark all blocks for predication, including those that ordinarily do not
1609ac9a064cSDimitry Andric   // need predication such as the header block.
1610ac9a064cSDimitry Andric   for (BasicBlock *BB : TheLoop->blocks()) {
1611ac9a064cSDimitry Andric     [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1612ac9a064cSDimitry Andric     assert(R && "Must be able to predicate block when tail-folding.");
1613ac9a064cSDimitry Andric   }
1614ac9a064cSDimitry Andric }
1615ac9a064cSDimitry Andric 
1616eb11fae6SDimitry Andric } // namespace llvm
1617