1eb11fae6SDimitry Andric //===- LoopVectorizationLegality.cpp --------------------------------------===//
2eb11fae6SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6eb11fae6SDimitry Andric //
7eb11fae6SDimitry Andric //===----------------------------------------------------------------------===//
8eb11fae6SDimitry Andric //
9eb11fae6SDimitry Andric // This file provides loop vectorization legality analysis. Original code
10eb11fae6SDimitry Andric // resided in LoopVectorize.cpp for a long time.
11eb11fae6SDimitry Andric //
12eb11fae6SDimitry Andric // At this point, it is implemented as a utility class, not as an analysis
13eb11fae6SDimitry Andric // pass. It should be easy to create an analysis pass around it if there
14eb11fae6SDimitry Andric // is a need (but D45420 needs to happen first).
15eb11fae6SDimitry Andric //
16b60736ecSDimitry Andric
17eb11fae6SDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
181d5ae102SDimitry Andric #include "llvm/Analysis/Loads.h"
19cfca06d7SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
20145449b1SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
21b60736ecSDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
22145449b1SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
231d5ae102SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
24eb11fae6SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
25eb11fae6SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
26cfca06d7SDimitry Andric #include "llvm/IR/PatternMatch.h"
27b60736ecSDimitry Andric #include "llvm/Transforms/Utils/SizeOpts.h"
28cfca06d7SDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorize.h"
29eb11fae6SDimitry Andric
30eb11fae6SDimitry Andric using namespace llvm;
31cfca06d7SDimitry Andric using namespace PatternMatch;
32eb11fae6SDimitry Andric
33eb11fae6SDimitry Andric #define LV_NAME "loop-vectorize"
34eb11fae6SDimitry Andric #define DEBUG_TYPE LV_NAME
35eb11fae6SDimitry Andric
36eb11fae6SDimitry Andric static cl::opt<bool>
37eb11fae6SDimitry Andric EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
38eb11fae6SDimitry Andric cl::desc("Enable if-conversion during vectorization."));
39eb11fae6SDimitry Andric
407fa27ce4SDimitry Andric static cl::opt<bool>
417fa27ce4SDimitry Andric AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
427fa27ce4SDimitry Andric cl::desc("Enable recognition of non-constant strided "
437fa27ce4SDimitry Andric "pointer induction variables."));
447fa27ce4SDimitry Andric
45344a3780SDimitry Andric namespace llvm {
46344a3780SDimitry Andric cl::opt<bool>
47344a3780SDimitry Andric HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
48344a3780SDimitry Andric cl::desc("Allow enabling loop hints to reorder "
49344a3780SDimitry Andric "FP operations during vectorization."));
50344a3780SDimitry Andric }
51eb11fae6SDimitry Andric
52344a3780SDimitry Andric // TODO: Move size-based thresholds out of legality checking, make cost based
53344a3780SDimitry Andric // decisions instead of hard thresholds.
54eb11fae6SDimitry Andric static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
55eb11fae6SDimitry Andric "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
56eb11fae6SDimitry Andric cl::desc("The maximum number of SCEV checks allowed."));
57eb11fae6SDimitry Andric
58eb11fae6SDimitry Andric static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
59eb11fae6SDimitry Andric "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
60eb11fae6SDimitry Andric cl::desc("The maximum number of SCEV checks allowed with a "
61eb11fae6SDimitry Andric "vectorize(enable) pragma"));
62eb11fae6SDimitry Andric
6377fc4c14SDimitry Andric static cl::opt<LoopVectorizeHints::ScalableForceKind>
6477fc4c14SDimitry Andric ForceScalableVectorization(
6577fc4c14SDimitry Andric "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
66344a3780SDimitry Andric cl::Hidden,
67344a3780SDimitry Andric cl::desc("Control whether the compiler can use scalable vectors to "
68344a3780SDimitry Andric "vectorize a loop"),
69344a3780SDimitry Andric cl::values(
70344a3780SDimitry Andric clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
71344a3780SDimitry Andric "Scalable vectorization is disabled."),
7277fc4c14SDimitry Andric clEnumValN(
7377fc4c14SDimitry Andric LoopVectorizeHints::SK_PreferScalable, "preferred",
7477fc4c14SDimitry Andric "Scalable vectorization is available and favored when the "
7577fc4c14SDimitry Andric "cost is inconclusive."),
7677fc4c14SDimitry Andric clEnumValN(
7777fc4c14SDimitry Andric LoopVectorizeHints::SK_PreferScalable, "on",
78344a3780SDimitry Andric "Scalable vectorization is available and favored when the "
79344a3780SDimitry Andric "cost is inconclusive.")));
80344a3780SDimitry Andric
81eb11fae6SDimitry Andric /// Maximum vectorization interleave count.
82eb11fae6SDimitry Andric static const unsigned MaxInterleaveFactor = 16;
83eb11fae6SDimitry Andric
84eb11fae6SDimitry Andric namespace llvm {
85eb11fae6SDimitry Andric
validate(unsigned Val)86eb11fae6SDimitry Andric bool LoopVectorizeHints::Hint::validate(unsigned Val) {
87eb11fae6SDimitry Andric switch (Kind) {
88eb11fae6SDimitry Andric case HK_WIDTH:
89eb11fae6SDimitry Andric return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
90344a3780SDimitry Andric case HK_INTERLEAVE:
91eb11fae6SDimitry Andric return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
92eb11fae6SDimitry Andric case HK_FORCE:
93eb11fae6SDimitry Andric return (Val <= 1);
94eb11fae6SDimitry Andric case HK_ISVECTORIZED:
951d5ae102SDimitry Andric case HK_PREDICATE:
96b60736ecSDimitry Andric case HK_SCALABLE:
97eb11fae6SDimitry Andric return (Val == 0 || Val == 1);
98eb11fae6SDimitry Andric }
99eb11fae6SDimitry Andric return false;
100eb11fae6SDimitry Andric }
101eb11fae6SDimitry Andric
LoopVectorizeHints(const Loop * L,bool InterleaveOnlyWhenForced,OptimizationRemarkEmitter & ORE,const TargetTransformInfo * TTI)102d8e91e46SDimitry Andric LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
103d8e91e46SDimitry Andric bool InterleaveOnlyWhenForced,
10477fc4c14SDimitry Andric OptimizationRemarkEmitter &ORE,
10577fc4c14SDimitry Andric const TargetTransformInfo *TTI)
106eb11fae6SDimitry Andric : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
107344a3780SDimitry Andric Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
108eb11fae6SDimitry Andric Force("vectorize.enable", FK_Undefined, HK_FORCE),
1091d5ae102SDimitry Andric IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
110b60736ecSDimitry Andric Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
111344a3780SDimitry Andric Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
112344a3780SDimitry Andric TheLoop(L), ORE(ORE) {
113eb11fae6SDimitry Andric // Populate values with existing loop metadata.
114eb11fae6SDimitry Andric getHintsFromMetadata();
115eb11fae6SDimitry Andric
116eb11fae6SDimitry Andric // force-vector-interleave overrides DisableInterleaving.
117eb11fae6SDimitry Andric if (VectorizerParams::isInterleaveForced())
118eb11fae6SDimitry Andric Interleave.Value = VectorizerParams::VectorizationInterleave;
119eb11fae6SDimitry Andric
12077fc4c14SDimitry Andric // If the metadata doesn't explicitly specify whether to enable scalable
12177fc4c14SDimitry Andric // vectorization, then decide based on the following criteria (increasing
12277fc4c14SDimitry Andric // level of priority):
12377fc4c14SDimitry Andric // - Target default
12477fc4c14SDimitry Andric // - Metadata width
12577fc4c14SDimitry Andric // - Force option (always overrides)
12677fc4c14SDimitry Andric if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) {
12777fc4c14SDimitry Andric if (TTI)
12877fc4c14SDimitry Andric Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
12977fc4c14SDimitry Andric : SK_FixedWidthOnly;
13077fc4c14SDimitry Andric
13177fc4c14SDimitry Andric if (Width.Value)
132344a3780SDimitry Andric // If the width is set, but the metadata says nothing about the scalable
133344a3780SDimitry Andric // property, then assume it concerns only a fixed-width UserVF.
134344a3780SDimitry Andric // If width is not set, the flag takes precedence.
13577fc4c14SDimitry Andric Scalable.Value = SK_FixedWidthOnly;
13677fc4c14SDimitry Andric }
13777fc4c14SDimitry Andric
13877fc4c14SDimitry Andric // If the flag is set to force any use of scalable vectors, override the loop
13977fc4c14SDimitry Andric // hints.
14077fc4c14SDimitry Andric if (ForceScalableVectorization.getValue() !=
14177fc4c14SDimitry Andric LoopVectorizeHints::SK_Unspecified)
14277fc4c14SDimitry Andric Scalable.Value = ForceScalableVectorization.getValue();
14377fc4c14SDimitry Andric
14477fc4c14SDimitry Andric // Scalable vectorization is disabled if no preference is specified.
14577fc4c14SDimitry Andric if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
146344a3780SDimitry Andric Scalable.Value = SK_FixedWidthOnly;
147344a3780SDimitry Andric
148eb11fae6SDimitry Andric if (IsVectorized.Value != 1)
149eb11fae6SDimitry Andric // If the vectorization width and interleaving count are both 1 then
150eb11fae6SDimitry Andric // consider the loop to have been already vectorized because there's
151eb11fae6SDimitry Andric // nothing more that we can do.
152b60736ecSDimitry Andric IsVectorized.Value =
153344a3780SDimitry Andric getWidth() == ElementCount::getFixed(1) && getInterleave() == 1;
154344a3780SDimitry Andric LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
155eb11fae6SDimitry Andric << "LV: Interleaving disabled by the pass manager\n");
156eb11fae6SDimitry Andric }
157eb11fae6SDimitry Andric
setAlreadyVectorized()158e6d15924SDimitry Andric void LoopVectorizeHints::setAlreadyVectorized() {
159e6d15924SDimitry Andric LLVMContext &Context = TheLoop->getHeader()->getContext();
160e6d15924SDimitry Andric
161e6d15924SDimitry Andric MDNode *IsVectorizedMD = MDNode::get(
162e6d15924SDimitry Andric Context,
163e6d15924SDimitry Andric {MDString::get(Context, "llvm.loop.isvectorized"),
164e6d15924SDimitry Andric ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
165e6d15924SDimitry Andric MDNode *LoopID = TheLoop->getLoopID();
166e6d15924SDimitry Andric MDNode *NewLoopID =
167e6d15924SDimitry Andric makePostTransformationMetadata(Context, LoopID,
168e6d15924SDimitry Andric {Twine(Prefix(), "vectorize.").str(),
169e6d15924SDimitry Andric Twine(Prefix(), "interleave.").str()},
170e6d15924SDimitry Andric {IsVectorizedMD});
171e6d15924SDimitry Andric TheLoop->setLoopID(NewLoopID);
172e6d15924SDimitry Andric
173e6d15924SDimitry Andric // Update internal cache.
174e6d15924SDimitry Andric IsVectorized.Value = 1;
175e6d15924SDimitry Andric }
176e6d15924SDimitry Andric
allowVectorization(Function * F,Loop * L,bool VectorizeOnlyWhenForced) const177d8e91e46SDimitry Andric bool LoopVectorizeHints::allowVectorization(
178d8e91e46SDimitry Andric Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
179eb11fae6SDimitry Andric if (getForce() == LoopVectorizeHints::FK_Disabled) {
180eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
181eb11fae6SDimitry Andric emitRemarkWithHints();
182eb11fae6SDimitry Andric return false;
183eb11fae6SDimitry Andric }
184eb11fae6SDimitry Andric
185d8e91e46SDimitry Andric if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
186eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
187eb11fae6SDimitry Andric emitRemarkWithHints();
188eb11fae6SDimitry Andric return false;
189eb11fae6SDimitry Andric }
190eb11fae6SDimitry Andric
191eb11fae6SDimitry Andric if (getIsVectorized() == 1) {
192eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
193eb11fae6SDimitry Andric // FIXME: Add interleave.disable metadata. This will allow
194eb11fae6SDimitry Andric // vectorize.disable to be used without disabling the pass and errors
195eb11fae6SDimitry Andric // to differentiate between disabled vectorization and a width of 1.
196eb11fae6SDimitry Andric ORE.emit([&]() {
197eb11fae6SDimitry Andric return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
198eb11fae6SDimitry Andric "AllDisabled", L->getStartLoc(),
199eb11fae6SDimitry Andric L->getHeader())
200eb11fae6SDimitry Andric << "loop not vectorized: vectorization and interleaving are "
201eb11fae6SDimitry Andric "explicitly disabled, or the loop has already been "
202eb11fae6SDimitry Andric "vectorized";
203eb11fae6SDimitry Andric });
204eb11fae6SDimitry Andric return false;
205eb11fae6SDimitry Andric }
206eb11fae6SDimitry Andric
207eb11fae6SDimitry Andric return true;
208eb11fae6SDimitry Andric }
209eb11fae6SDimitry Andric
emitRemarkWithHints() const210eb11fae6SDimitry Andric void LoopVectorizeHints::emitRemarkWithHints() const {
211eb11fae6SDimitry Andric using namespace ore;
212eb11fae6SDimitry Andric
213eb11fae6SDimitry Andric ORE.emit([&]() {
214eb11fae6SDimitry Andric if (Force.Value == LoopVectorizeHints::FK_Disabled)
215eb11fae6SDimitry Andric return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
216eb11fae6SDimitry Andric TheLoop->getStartLoc(),
217eb11fae6SDimitry Andric TheLoop->getHeader())
218eb11fae6SDimitry Andric << "loop not vectorized: vectorization is explicitly disabled";
219eb11fae6SDimitry Andric else {
220eb11fae6SDimitry Andric OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
221eb11fae6SDimitry Andric TheLoop->getStartLoc(), TheLoop->getHeader());
222eb11fae6SDimitry Andric R << "loop not vectorized";
223eb11fae6SDimitry Andric if (Force.Value == LoopVectorizeHints::FK_Enabled) {
224eb11fae6SDimitry Andric R << " (Force=" << NV("Force", true);
225eb11fae6SDimitry Andric if (Width.Value != 0)
226b60736ecSDimitry Andric R << ", Vector Width=" << NV("VectorWidth", getWidth());
227344a3780SDimitry Andric if (getInterleave() != 0)
228344a3780SDimitry Andric R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
229eb11fae6SDimitry Andric R << ")";
230eb11fae6SDimitry Andric }
231eb11fae6SDimitry Andric return R;
232eb11fae6SDimitry Andric }
233eb11fae6SDimitry Andric });
234eb11fae6SDimitry Andric }
235eb11fae6SDimitry Andric
vectorizeAnalysisPassName() const236eb11fae6SDimitry Andric const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
237b60736ecSDimitry Andric if (getWidth() == ElementCount::getFixed(1))
238eb11fae6SDimitry Andric return LV_NAME;
239eb11fae6SDimitry Andric if (getForce() == LoopVectorizeHints::FK_Disabled)
240eb11fae6SDimitry Andric return LV_NAME;
241b60736ecSDimitry Andric if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero())
242eb11fae6SDimitry Andric return LV_NAME;
243eb11fae6SDimitry Andric return OptimizationRemarkAnalysis::AlwaysPrint;
244eb11fae6SDimitry Andric }
245eb11fae6SDimitry Andric
allowReordering() const246344a3780SDimitry Andric bool LoopVectorizeHints::allowReordering() const {
247344a3780SDimitry Andric // Allow the vectorizer to change the order of operations if enabling
248344a3780SDimitry Andric // loop hints are provided
249344a3780SDimitry Andric ElementCount EC = getWidth();
250344a3780SDimitry Andric return HintsAllowReordering &&
251344a3780SDimitry Andric (getForce() == LoopVectorizeHints::FK_Enabled ||
252344a3780SDimitry Andric EC.getKnownMinValue() > 1);
253344a3780SDimitry Andric }
254344a3780SDimitry Andric
getHintsFromMetadata()255eb11fae6SDimitry Andric void LoopVectorizeHints::getHintsFromMetadata() {
256eb11fae6SDimitry Andric MDNode *LoopID = TheLoop->getLoopID();
257eb11fae6SDimitry Andric if (!LoopID)
258eb11fae6SDimitry Andric return;
259eb11fae6SDimitry Andric
260eb11fae6SDimitry Andric // First operand should refer to the loop id itself.
261eb11fae6SDimitry Andric assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
262eb11fae6SDimitry Andric assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
263eb11fae6SDimitry Andric
264ac9a064cSDimitry Andric for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
265eb11fae6SDimitry Andric const MDString *S = nullptr;
266eb11fae6SDimitry Andric SmallVector<Metadata *, 4> Args;
267eb11fae6SDimitry Andric
268eb11fae6SDimitry Andric // The expected hint is either a MDString or a MDNode with the first
269eb11fae6SDimitry Andric // operand a MDString.
270ac9a064cSDimitry Andric if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
271eb11fae6SDimitry Andric if (!MD || MD->getNumOperands() == 0)
272eb11fae6SDimitry Andric continue;
273eb11fae6SDimitry Andric S = dyn_cast<MDString>(MD->getOperand(0));
274eb11fae6SDimitry Andric for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
275eb11fae6SDimitry Andric Args.push_back(MD->getOperand(i));
276eb11fae6SDimitry Andric } else {
277ac9a064cSDimitry Andric S = dyn_cast<MDString>(MDO);
278eb11fae6SDimitry Andric assert(Args.size() == 0 && "too many arguments for MDString");
279eb11fae6SDimitry Andric }
280eb11fae6SDimitry Andric
281eb11fae6SDimitry Andric if (!S)
282eb11fae6SDimitry Andric continue;
283eb11fae6SDimitry Andric
284eb11fae6SDimitry Andric // Check if the hint starts with the loop metadata prefix.
285eb11fae6SDimitry Andric StringRef Name = S->getString();
286eb11fae6SDimitry Andric if (Args.size() == 1)
287eb11fae6SDimitry Andric setHint(Name, Args[0]);
288eb11fae6SDimitry Andric }
289eb11fae6SDimitry Andric }
290eb11fae6SDimitry Andric
setHint(StringRef Name,Metadata * Arg)291eb11fae6SDimitry Andric void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
292b1c73532SDimitry Andric if (!Name.starts_with(Prefix()))
293eb11fae6SDimitry Andric return;
294eb11fae6SDimitry Andric Name = Name.substr(Prefix().size(), StringRef::npos);
295eb11fae6SDimitry Andric
296eb11fae6SDimitry Andric const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
297eb11fae6SDimitry Andric if (!C)
298eb11fae6SDimitry Andric return;
299eb11fae6SDimitry Andric unsigned Val = C->getZExtValue();
300eb11fae6SDimitry Andric
301b60736ecSDimitry Andric Hint *Hints[] = {&Width, &Interleave, &Force,
302b60736ecSDimitry Andric &IsVectorized, &Predicate, &Scalable};
303e3b55780SDimitry Andric for (auto *H : Hints) {
304eb11fae6SDimitry Andric if (Name == H->Name) {
305eb11fae6SDimitry Andric if (H->validate(Val))
306eb11fae6SDimitry Andric H->Value = Val;
307eb11fae6SDimitry Andric else
308eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
309eb11fae6SDimitry Andric break;
310eb11fae6SDimitry Andric }
311eb11fae6SDimitry Andric }
312eb11fae6SDimitry Andric }
313eb11fae6SDimitry Andric
314eb11fae6SDimitry Andric // Return true if the inner loop \p Lp is uniform with regard to the outer loop
315eb11fae6SDimitry Andric // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
316eb11fae6SDimitry Andric // executing the inner loop will execute the same iterations). This check is
317eb11fae6SDimitry Andric // very constrained for now but it will be relaxed in the future. \p Lp is
318eb11fae6SDimitry Andric // considered uniform if it meets all the following conditions:
319eb11fae6SDimitry Andric // 1) it has a canonical IV (starting from 0 and with stride 1),
320eb11fae6SDimitry Andric // 2) its latch terminator is a conditional branch and,
321eb11fae6SDimitry Andric // 3) its latch condition is a compare instruction whose operands are the
322eb11fae6SDimitry Andric // canonical IV and an OuterLp invariant.
323eb11fae6SDimitry Andric // This check doesn't take into account the uniformity of other conditions not
324eb11fae6SDimitry Andric // related to the loop latch because they don't affect the loop uniformity.
325eb11fae6SDimitry Andric //
326eb11fae6SDimitry Andric // NOTE: We decided to keep all these checks and its associated documentation
327eb11fae6SDimitry Andric // together so that we can easily have a picture of the current supported loop
328eb11fae6SDimitry Andric // nests. However, some of the current checks don't depend on \p OuterLp and
329eb11fae6SDimitry Andric // would be redundantly executed for each \p Lp if we invoked this function for
330eb11fae6SDimitry Andric // different candidate outer loops. This is not the case for now because we
331eb11fae6SDimitry Andric // don't currently have the infrastructure to evaluate multiple candidate outer
332eb11fae6SDimitry Andric // loops and \p OuterLp will be a fixed parameter while we only support explicit
333eb11fae6SDimitry Andric // outer loop vectorization. It's also very likely that these checks go away
334eb11fae6SDimitry Andric // before introducing the aforementioned infrastructure. However, if this is not
335eb11fae6SDimitry Andric // the case, we should move the \p OuterLp independent checks to a separate
336eb11fae6SDimitry Andric // function that is only executed once for each \p Lp.
isUniformLoop(Loop * Lp,Loop * OuterLp)337eb11fae6SDimitry Andric static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
338eb11fae6SDimitry Andric assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
339eb11fae6SDimitry Andric
340eb11fae6SDimitry Andric // If Lp is the outer loop, it's uniform by definition.
341eb11fae6SDimitry Andric if (Lp == OuterLp)
342eb11fae6SDimitry Andric return true;
343eb11fae6SDimitry Andric assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
344eb11fae6SDimitry Andric
345eb11fae6SDimitry Andric // 1.
346eb11fae6SDimitry Andric PHINode *IV = Lp->getCanonicalInductionVariable();
347eb11fae6SDimitry Andric if (!IV) {
348eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
349eb11fae6SDimitry Andric return false;
350eb11fae6SDimitry Andric }
351eb11fae6SDimitry Andric
352eb11fae6SDimitry Andric // 2.
353eb11fae6SDimitry Andric BasicBlock *Latch = Lp->getLoopLatch();
354eb11fae6SDimitry Andric auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
355eb11fae6SDimitry Andric if (!LatchBr || LatchBr->isUnconditional()) {
356eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
357eb11fae6SDimitry Andric return false;
358eb11fae6SDimitry Andric }
359eb11fae6SDimitry Andric
360eb11fae6SDimitry Andric // 3.
361eb11fae6SDimitry Andric auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
362eb11fae6SDimitry Andric if (!LatchCmp) {
363eb11fae6SDimitry Andric LLVM_DEBUG(
364eb11fae6SDimitry Andric dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
365eb11fae6SDimitry Andric return false;
366eb11fae6SDimitry Andric }
367eb11fae6SDimitry Andric
368eb11fae6SDimitry Andric Value *CondOp0 = LatchCmp->getOperand(0);
369eb11fae6SDimitry Andric Value *CondOp1 = LatchCmp->getOperand(1);
370eb11fae6SDimitry Andric Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
371eb11fae6SDimitry Andric if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
372eb11fae6SDimitry Andric !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
373eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
374eb11fae6SDimitry Andric return false;
375eb11fae6SDimitry Andric }
376eb11fae6SDimitry Andric
377eb11fae6SDimitry Andric return true;
378eb11fae6SDimitry Andric }
379eb11fae6SDimitry Andric
380eb11fae6SDimitry Andric // Return true if \p Lp and all its nested loops are uniform with regard to \p
381eb11fae6SDimitry Andric // OuterLp.
isUniformLoopNest(Loop * Lp,Loop * OuterLp)382eb11fae6SDimitry Andric static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
383eb11fae6SDimitry Andric if (!isUniformLoop(Lp, OuterLp))
384eb11fae6SDimitry Andric return false;
385eb11fae6SDimitry Andric
386eb11fae6SDimitry Andric // Check if nested loops are uniform.
387eb11fae6SDimitry Andric for (Loop *SubLp : *Lp)
388eb11fae6SDimitry Andric if (!isUniformLoopNest(SubLp, OuterLp))
389eb11fae6SDimitry Andric return false;
390eb11fae6SDimitry Andric
391eb11fae6SDimitry Andric return true;
392eb11fae6SDimitry Andric }
393eb11fae6SDimitry Andric
convertPointerToIntegerType(const DataLayout & DL,Type * Ty)394eb11fae6SDimitry Andric static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
395eb11fae6SDimitry Andric if (Ty->isPointerTy())
396eb11fae6SDimitry Andric return DL.getIntPtrType(Ty);
397eb11fae6SDimitry Andric
398eb11fae6SDimitry Andric // It is possible that char's or short's overflow when we ask for the loop's
399eb11fae6SDimitry Andric // trip count, work around this by changing the type size.
400eb11fae6SDimitry Andric if (Ty->getScalarSizeInBits() < 32)
401eb11fae6SDimitry Andric return Type::getInt32Ty(Ty->getContext());
402eb11fae6SDimitry Andric
403eb11fae6SDimitry Andric return Ty;
404eb11fae6SDimitry Andric }
405eb11fae6SDimitry Andric
getWiderType(const DataLayout & DL,Type * Ty0,Type * Ty1)406eb11fae6SDimitry Andric static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
407eb11fae6SDimitry Andric Ty0 = convertPointerToIntegerType(DL, Ty0);
408eb11fae6SDimitry Andric Ty1 = convertPointerToIntegerType(DL, Ty1);
409eb11fae6SDimitry Andric if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
410eb11fae6SDimitry Andric return Ty0;
411eb11fae6SDimitry Andric return Ty1;
412eb11fae6SDimitry Andric }
413eb11fae6SDimitry Andric
414eb11fae6SDimitry Andric /// Check that the instruction has outside loop users and is not an
415eb11fae6SDimitry Andric /// identified reduction variable.
hasOutsideLoopUser(const Loop * TheLoop,Instruction * Inst,SmallPtrSetImpl<Value * > & AllowedExit)416eb11fae6SDimitry Andric static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
417eb11fae6SDimitry Andric SmallPtrSetImpl<Value *> &AllowedExit) {
418d8e91e46SDimitry Andric // Reductions, Inductions and non-header phis are allowed to have exit users. All
419eb11fae6SDimitry Andric // other instructions must not have external users.
420eb11fae6SDimitry Andric if (!AllowedExit.count(Inst))
421eb11fae6SDimitry Andric // Check that all of the users of the loop are inside the BB.
422eb11fae6SDimitry Andric for (User *U : Inst->users()) {
423eb11fae6SDimitry Andric Instruction *UI = cast<Instruction>(U);
424eb11fae6SDimitry Andric // This user may be a reduction exit value.
425eb11fae6SDimitry Andric if (!TheLoop->contains(UI)) {
426eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
427eb11fae6SDimitry Andric return true;
428eb11fae6SDimitry Andric }
429eb11fae6SDimitry Andric }
430eb11fae6SDimitry Andric return false;
431eb11fae6SDimitry Andric }
432eb11fae6SDimitry Andric
433145449b1SDimitry Andric /// Returns true if A and B have same pointer operands or same SCEVs addresses
storeToSameAddress(ScalarEvolution * SE,StoreInst * A,StoreInst * B)434145449b1SDimitry Andric static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A,
435145449b1SDimitry Andric StoreInst *B) {
436145449b1SDimitry Andric // Compare store
437145449b1SDimitry Andric if (A == B)
438145449b1SDimitry Andric return true;
439145449b1SDimitry Andric
440145449b1SDimitry Andric // Otherwise Compare pointers
441145449b1SDimitry Andric Value *APtr = A->getPointerOperand();
442145449b1SDimitry Andric Value *BPtr = B->getPointerOperand();
443145449b1SDimitry Andric if (APtr == BPtr)
444145449b1SDimitry Andric return true;
445145449b1SDimitry Andric
446145449b1SDimitry Andric // Otherwise compare address SCEVs
447145449b1SDimitry Andric if (SE->getSCEV(APtr) == SE->getSCEV(BPtr))
448145449b1SDimitry Andric return true;
449145449b1SDimitry Andric
450145449b1SDimitry Andric return false;
451145449b1SDimitry Andric }
452145449b1SDimitry Andric
isConsecutivePtr(Type * AccessTy,Value * Ptr) const453c0981da4SDimitry Andric int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
454c0981da4SDimitry Andric Value *Ptr) const {
4557fa27ce4SDimitry Andric // FIXME: Currently, the set of symbolic strides is sometimes queried before
4567fa27ce4SDimitry Andric // it's collected. This happens from canVectorizeWithIfConvert, when the
4577fa27ce4SDimitry Andric // pointer is checked to reference consecutive elements suitable for a
4587fa27ce4SDimitry Andric // masked access.
4597fa27ce4SDimitry Andric const auto &Strides =
4607fa27ce4SDimitry Andric LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
461eb11fae6SDimitry Andric
462b60736ecSDimitry Andric Function *F = TheLoop->getHeader()->getParent();
463b60736ecSDimitry Andric bool OptForSize = F->hasOptSize() ||
464b60736ecSDimitry Andric llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
465b60736ecSDimitry Andric PGSOQueryType::IRPass);
466b60736ecSDimitry Andric bool CanAddPredicate = !OptForSize;
467c0981da4SDimitry Andric int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
468e3b55780SDimitry Andric CanAddPredicate, false).value_or(0);
469eb11fae6SDimitry Andric if (Stride == 1 || Stride == -1)
470eb11fae6SDimitry Andric return Stride;
471eb11fae6SDimitry Andric return 0;
472eb11fae6SDimitry Andric }
473eb11fae6SDimitry Andric
isInvariant(Value * V) const4747fa27ce4SDimitry Andric bool LoopVectorizationLegality::isInvariant(Value *V) const {
4757fa27ce4SDimitry Andric return LAI->isInvariant(V);
476eb11fae6SDimitry Andric }
477eb11fae6SDimitry Andric
4787fa27ce4SDimitry Andric namespace {
4797fa27ce4SDimitry Andric /// A rewriter to build the SCEVs for each of the VF lanes in the expected
4807fa27ce4SDimitry Andric /// vectorized loop, which can then be compared to detect their uniformity. This
4817fa27ce4SDimitry Andric /// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
4827fa27ce4SDimitry Andric /// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
4837fa27ce4SDimitry Andric /// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
4847fa27ce4SDimitry Andric /// uniformity.
4857fa27ce4SDimitry Andric class SCEVAddRecForUniformityRewriter
4867fa27ce4SDimitry Andric : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
4877fa27ce4SDimitry Andric /// Multiplier to be applied to the step of AddRecs in TheLoop.
4887fa27ce4SDimitry Andric unsigned StepMultiplier;
4897fa27ce4SDimitry Andric
4907fa27ce4SDimitry Andric /// Offset to be added to the AddRecs in TheLoop.
4917fa27ce4SDimitry Andric unsigned Offset;
4927fa27ce4SDimitry Andric
4937fa27ce4SDimitry Andric /// Loop for which to rewrite AddRecsFor.
4947fa27ce4SDimitry Andric Loop *TheLoop;
4957fa27ce4SDimitry Andric
4967fa27ce4SDimitry Andric /// Is any sub-expressions not analyzable w.r.t. uniformity?
4977fa27ce4SDimitry Andric bool CannotAnalyze = false;
4987fa27ce4SDimitry Andric
canAnalyze() const4997fa27ce4SDimitry Andric bool canAnalyze() const { return !CannotAnalyze; }
5007fa27ce4SDimitry Andric
5017fa27ce4SDimitry Andric public:
SCEVAddRecForUniformityRewriter(ScalarEvolution & SE,unsigned StepMultiplier,unsigned Offset,Loop * TheLoop)5027fa27ce4SDimitry Andric SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
5037fa27ce4SDimitry Andric unsigned Offset, Loop *TheLoop)
5047fa27ce4SDimitry Andric : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
5057fa27ce4SDimitry Andric TheLoop(TheLoop) {}
5067fa27ce4SDimitry Andric
visitAddRecExpr(const SCEVAddRecExpr * Expr)5077fa27ce4SDimitry Andric const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
5087fa27ce4SDimitry Andric assert(Expr->getLoop() == TheLoop &&
5097fa27ce4SDimitry Andric "addrec outside of TheLoop must be invariant and should have been "
5107fa27ce4SDimitry Andric "handled earlier");
5117fa27ce4SDimitry Andric // Build a new AddRec by multiplying the step by StepMultiplier and
5127fa27ce4SDimitry Andric // incrementing the start by Offset * step.
5137fa27ce4SDimitry Andric Type *Ty = Expr->getType();
5147fa27ce4SDimitry Andric auto *Step = Expr->getStepRecurrence(SE);
5157fa27ce4SDimitry Andric if (!SE.isLoopInvariant(Step, TheLoop)) {
5167fa27ce4SDimitry Andric CannotAnalyze = true;
5177fa27ce4SDimitry Andric return Expr;
5187fa27ce4SDimitry Andric }
5197fa27ce4SDimitry Andric auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
5207fa27ce4SDimitry Andric auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
5217fa27ce4SDimitry Andric auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
5227fa27ce4SDimitry Andric return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
5237fa27ce4SDimitry Andric }
5247fa27ce4SDimitry Andric
visit(const SCEV * S)5257fa27ce4SDimitry Andric const SCEV *visit(const SCEV *S) {
5267fa27ce4SDimitry Andric if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
5277fa27ce4SDimitry Andric return S;
5287fa27ce4SDimitry Andric return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
5297fa27ce4SDimitry Andric }
5307fa27ce4SDimitry Andric
visitUnknown(const SCEVUnknown * S)5317fa27ce4SDimitry Andric const SCEV *visitUnknown(const SCEVUnknown *S) {
5327fa27ce4SDimitry Andric if (SE.isLoopInvariant(S, TheLoop))
5337fa27ce4SDimitry Andric return S;
5347fa27ce4SDimitry Andric // The value could vary across iterations.
5357fa27ce4SDimitry Andric CannotAnalyze = true;
5367fa27ce4SDimitry Andric return S;
5377fa27ce4SDimitry Andric }
5387fa27ce4SDimitry Andric
visitCouldNotCompute(const SCEVCouldNotCompute * S)5397fa27ce4SDimitry Andric const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
5407fa27ce4SDimitry Andric // Could not analyze the expression.
5417fa27ce4SDimitry Andric CannotAnalyze = true;
5427fa27ce4SDimitry Andric return S;
5437fa27ce4SDimitry Andric }
5447fa27ce4SDimitry Andric
rewrite(const SCEV * S,ScalarEvolution & SE,unsigned StepMultiplier,unsigned Offset,Loop * TheLoop)5457fa27ce4SDimitry Andric static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
5467fa27ce4SDimitry Andric unsigned StepMultiplier, unsigned Offset,
5477fa27ce4SDimitry Andric Loop *TheLoop) {
5487fa27ce4SDimitry Andric /// Bail out if the expression does not contain an UDiv expression.
5497fa27ce4SDimitry Andric /// Uniform values which are not loop invariant require operations to strip
5507fa27ce4SDimitry Andric /// out the lowest bits. For now just look for UDivs and use it to avoid
5517fa27ce4SDimitry Andric /// re-writing UDIV-free expressions for other lanes to limit compile time.
5527fa27ce4SDimitry Andric if (!SCEVExprContains(S,
5537fa27ce4SDimitry Andric [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
5547fa27ce4SDimitry Andric return SE.getCouldNotCompute();
5557fa27ce4SDimitry Andric
5567fa27ce4SDimitry Andric SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
5577fa27ce4SDimitry Andric TheLoop);
5587fa27ce4SDimitry Andric const SCEV *Result = Rewriter.visit(S);
5597fa27ce4SDimitry Andric
5607fa27ce4SDimitry Andric if (Rewriter.canAnalyze())
5617fa27ce4SDimitry Andric return Result;
5627fa27ce4SDimitry Andric return SE.getCouldNotCompute();
5637fa27ce4SDimitry Andric }
5647fa27ce4SDimitry Andric };
5657fa27ce4SDimitry Andric
5667fa27ce4SDimitry Andric } // namespace
5677fa27ce4SDimitry Andric
isUniform(Value * V,ElementCount VF) const5687fa27ce4SDimitry Andric bool LoopVectorizationLegality::isUniform(Value *V, ElementCount VF) const {
5697fa27ce4SDimitry Andric if (isInvariant(V))
5707fa27ce4SDimitry Andric return true;
5717fa27ce4SDimitry Andric if (VF.isScalable())
5727fa27ce4SDimitry Andric return false;
5737fa27ce4SDimitry Andric if (VF.isScalar())
5747fa27ce4SDimitry Andric return true;
5757fa27ce4SDimitry Andric
5767fa27ce4SDimitry Andric // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
5777fa27ce4SDimitry Andric // never considered uniform.
5787fa27ce4SDimitry Andric auto *SE = PSE.getSE();
5797fa27ce4SDimitry Andric if (!SE->isSCEVable(V->getType()))
5807fa27ce4SDimitry Andric return false;
5817fa27ce4SDimitry Andric const SCEV *S = SE->getSCEV(V);
5827fa27ce4SDimitry Andric
5837fa27ce4SDimitry Andric // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
5847fa27ce4SDimitry Andric // lane 0 matches the expressions for all other lanes.
5857fa27ce4SDimitry Andric unsigned FixedVF = VF.getKnownMinValue();
5867fa27ce4SDimitry Andric const SCEV *FirstLaneExpr =
5877fa27ce4SDimitry Andric SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
5887fa27ce4SDimitry Andric if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
5897fa27ce4SDimitry Andric return false;
5907fa27ce4SDimitry Andric
5917fa27ce4SDimitry Andric // Make sure the expressions for lanes FixedVF-1..1 match the expression for
5927fa27ce4SDimitry Andric // lane 0. We check lanes in reverse order for compile-time, as frequently
5937fa27ce4SDimitry Andric // checking the last lane is sufficient to rule out uniformity.
5947fa27ce4SDimitry Andric return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
5957fa27ce4SDimitry Andric const SCEV *IthLaneExpr =
5967fa27ce4SDimitry Andric SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
5977fa27ce4SDimitry Andric return FirstLaneExpr == IthLaneExpr;
5987fa27ce4SDimitry Andric });
5997fa27ce4SDimitry Andric }
6007fa27ce4SDimitry Andric
isUniformMemOp(Instruction & I,ElementCount VF) const6017fa27ce4SDimitry Andric bool LoopVectorizationLegality::isUniformMemOp(Instruction &I,
6027fa27ce4SDimitry Andric ElementCount VF) const {
603e3b55780SDimitry Andric Value *Ptr = getLoadStorePointerOperand(&I);
604e3b55780SDimitry Andric if (!Ptr)
605e3b55780SDimitry Andric return false;
606e3b55780SDimitry Andric // Note: There's nothing inherent which prevents predicated loads and
607e3b55780SDimitry Andric // stores from being uniform. The current lowering simply doesn't handle
608e3b55780SDimitry Andric // it; in particular, the cost model distinguishes scatter/gather from
609e3b55780SDimitry Andric // scalar w/predication, and we currently rely on the scalar path.
6107fa27ce4SDimitry Andric return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
611e3b55780SDimitry Andric }
612e3b55780SDimitry Andric
canVectorizeOuterLoop()613eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorizeOuterLoop() {
614b60736ecSDimitry Andric assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
615eb11fae6SDimitry Andric // Store the result and return it at the end instead of exiting early, in case
616eb11fae6SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
617eb11fae6SDimitry Andric bool Result = true;
618eb11fae6SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
619eb11fae6SDimitry Andric
620eb11fae6SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
621eb11fae6SDimitry Andric // Check whether the BB terminator is a BranchInst. Any other terminator is
622eb11fae6SDimitry Andric // not supported yet.
623eb11fae6SDimitry Andric auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
624eb11fae6SDimitry Andric if (!Br) {
625e6d15924SDimitry Andric reportVectorizationFailure("Unsupported basic block terminator",
626e6d15924SDimitry Andric "loop control flow is not understood by vectorizer",
6271d5ae102SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
628eb11fae6SDimitry Andric if (DoExtraAnalysis)
629eb11fae6SDimitry Andric Result = false;
630eb11fae6SDimitry Andric else
631eb11fae6SDimitry Andric return false;
632eb11fae6SDimitry Andric }
633eb11fae6SDimitry Andric
634eb11fae6SDimitry Andric // Check whether the BranchInst is a supported one. Only unconditional
635eb11fae6SDimitry Andric // branches, conditional branches with an outer loop invariant condition or
636eb11fae6SDimitry Andric // backedges are supported.
637e6d15924SDimitry Andric // FIXME: We skip these checks when VPlan predication is enabled as we
638e6d15924SDimitry Andric // want to allow divergent branches. This whole check will be removed
639e6d15924SDimitry Andric // once VPlan predication is on by default.
640145449b1SDimitry Andric if (Br && Br->isConditional() &&
641eb11fae6SDimitry Andric !TheLoop->isLoopInvariant(Br->getCondition()) &&
642eb11fae6SDimitry Andric !LI->isLoopHeader(Br->getSuccessor(0)) &&
643eb11fae6SDimitry Andric !LI->isLoopHeader(Br->getSuccessor(1))) {
644e6d15924SDimitry Andric reportVectorizationFailure("Unsupported conditional branch",
645e6d15924SDimitry Andric "loop control flow is not understood by vectorizer",
6461d5ae102SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
647eb11fae6SDimitry Andric if (DoExtraAnalysis)
648eb11fae6SDimitry Andric Result = false;
649eb11fae6SDimitry Andric else
650eb11fae6SDimitry Andric return false;
651eb11fae6SDimitry Andric }
652eb11fae6SDimitry Andric }
653eb11fae6SDimitry Andric
654eb11fae6SDimitry Andric // Check whether inner loops are uniform. At this point, we only support
655eb11fae6SDimitry Andric // simple outer loops scenarios with uniform nested loops.
656eb11fae6SDimitry Andric if (!isUniformLoopNest(TheLoop /*loop nest*/,
657eb11fae6SDimitry Andric TheLoop /*context outer loop*/)) {
658e6d15924SDimitry Andric reportVectorizationFailure("Outer loop contains divergent loops",
659e6d15924SDimitry Andric "loop control flow is not understood by vectorizer",
6601d5ae102SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
661eb11fae6SDimitry Andric if (DoExtraAnalysis)
662eb11fae6SDimitry Andric Result = false;
663eb11fae6SDimitry Andric else
664eb11fae6SDimitry Andric return false;
665eb11fae6SDimitry Andric }
666eb11fae6SDimitry Andric
667d8e91e46SDimitry Andric // Check whether we are able to set up outer loop induction.
668d8e91e46SDimitry Andric if (!setupOuterLoopInductions()) {
669e6d15924SDimitry Andric reportVectorizationFailure("Unsupported outer loop Phi(s)",
670e6d15924SDimitry Andric "Unsupported outer loop Phi(s)",
6711d5ae102SDimitry Andric "UnsupportedPhi", ORE, TheLoop);
672d8e91e46SDimitry Andric if (DoExtraAnalysis)
673d8e91e46SDimitry Andric Result = false;
674d8e91e46SDimitry Andric else
675d8e91e46SDimitry Andric return false;
676d8e91e46SDimitry Andric }
677d8e91e46SDimitry Andric
678eb11fae6SDimitry Andric return Result;
679eb11fae6SDimitry Andric }
680eb11fae6SDimitry Andric
addInductionPhi(PHINode * Phi,const InductionDescriptor & ID,SmallPtrSetImpl<Value * > & AllowedExit)681eb11fae6SDimitry Andric void LoopVectorizationLegality::addInductionPhi(
682eb11fae6SDimitry Andric PHINode *Phi, const InductionDescriptor &ID,
683eb11fae6SDimitry Andric SmallPtrSetImpl<Value *> &AllowedExit) {
684eb11fae6SDimitry Andric Inductions[Phi] = ID;
685eb11fae6SDimitry Andric
686eb11fae6SDimitry Andric // In case this induction also comes with casts that we know we can ignore
687eb11fae6SDimitry Andric // in the vectorized loop body, record them here. All casts could be recorded
688eb11fae6SDimitry Andric // here for ignoring, but suffices to record only the first (as it is the
689eb11fae6SDimitry Andric // only one that may bw used outside the cast sequence).
690eb11fae6SDimitry Andric const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
691eb11fae6SDimitry Andric if (!Casts.empty())
692eb11fae6SDimitry Andric InductionCastsToIgnore.insert(*Casts.begin());
693eb11fae6SDimitry Andric
694eb11fae6SDimitry Andric Type *PhiTy = Phi->getType();
695ac9a064cSDimitry Andric const DataLayout &DL = Phi->getDataLayout();
696eb11fae6SDimitry Andric
697eb11fae6SDimitry Andric // Get the widest type.
698eb11fae6SDimitry Andric if (!PhiTy->isFloatingPointTy()) {
699eb11fae6SDimitry Andric if (!WidestIndTy)
700eb11fae6SDimitry Andric WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
701eb11fae6SDimitry Andric else
702eb11fae6SDimitry Andric WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
703eb11fae6SDimitry Andric }
704eb11fae6SDimitry Andric
705eb11fae6SDimitry Andric // Int inductions are special because we only allow one IV.
706eb11fae6SDimitry Andric if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
707eb11fae6SDimitry Andric ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
708eb11fae6SDimitry Andric isa<Constant>(ID.getStartValue()) &&
709eb11fae6SDimitry Andric cast<Constant>(ID.getStartValue())->isNullValue()) {
710eb11fae6SDimitry Andric
711eb11fae6SDimitry Andric // Use the phi node with the widest type as induction. Use the last
712eb11fae6SDimitry Andric // one if there are multiple (no good reason for doing this other
713eb11fae6SDimitry Andric // than it is expedient). We've checked that it begins at zero and
714eb11fae6SDimitry Andric // steps by one, so this is a canonical induction variable.
715eb11fae6SDimitry Andric if (!PrimaryInduction || PhiTy == WidestIndTy)
716eb11fae6SDimitry Andric PrimaryInduction = Phi;
717eb11fae6SDimitry Andric }
718eb11fae6SDimitry Andric
719eb11fae6SDimitry Andric // Both the PHI node itself, and the "post-increment" value feeding
720eb11fae6SDimitry Andric // back into the PHI node may have external users.
721eb11fae6SDimitry Andric // We can allow those uses, except if the SCEVs we have for them rely
722eb11fae6SDimitry Andric // on predicates that only hold within the loop, since allowing the exit
723d8e91e46SDimitry Andric // currently means re-using this SCEV outside the loop (see PR33706 for more
724d8e91e46SDimitry Andric // details).
725145449b1SDimitry Andric if (PSE.getPredicate().isAlwaysTrue()) {
726eb11fae6SDimitry Andric AllowedExit.insert(Phi);
727eb11fae6SDimitry Andric AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
728eb11fae6SDimitry Andric }
729eb11fae6SDimitry Andric
730eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
731eb11fae6SDimitry Andric }
732eb11fae6SDimitry Andric
setupOuterLoopInductions()733d8e91e46SDimitry Andric bool LoopVectorizationLegality::setupOuterLoopInductions() {
734d8e91e46SDimitry Andric BasicBlock *Header = TheLoop->getHeader();
735d8e91e46SDimitry Andric
736d8e91e46SDimitry Andric // Returns true if a given Phi is a supported induction.
737d8e91e46SDimitry Andric auto isSupportedPhi = [&](PHINode &Phi) -> bool {
738d8e91e46SDimitry Andric InductionDescriptor ID;
739d8e91e46SDimitry Andric if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
740d8e91e46SDimitry Andric ID.getKind() == InductionDescriptor::IK_IntInduction) {
741d8e91e46SDimitry Andric addInductionPhi(&Phi, ID, AllowedExit);
742d8e91e46SDimitry Andric return true;
743d8e91e46SDimitry Andric } else {
744d8e91e46SDimitry Andric // Bail out for any Phi in the outer loop header that is not a supported
745d8e91e46SDimitry Andric // induction.
746d8e91e46SDimitry Andric LLVM_DEBUG(
747d8e91e46SDimitry Andric dbgs()
748d8e91e46SDimitry Andric << "LV: Found unsupported PHI for outer loop vectorization.\n");
749d8e91e46SDimitry Andric return false;
750d8e91e46SDimitry Andric }
751d8e91e46SDimitry Andric };
752d8e91e46SDimitry Andric
753d8e91e46SDimitry Andric if (llvm::all_of(Header->phis(), isSupportedPhi))
754d8e91e46SDimitry Andric return true;
755d8e91e46SDimitry Andric else
756d8e91e46SDimitry Andric return false;
757d8e91e46SDimitry Andric }
758d8e91e46SDimitry Andric
759cfca06d7SDimitry Andric /// Checks if a function is scalarizable according to the TLI, in
760cfca06d7SDimitry Andric /// the sense that it should be vectorized and then expanded in
761cfca06d7SDimitry Andric /// multiple scalar calls. This is represented in the
762cfca06d7SDimitry Andric /// TLI via mappings that do not specify a vector name, as in the
763cfca06d7SDimitry Andric /// following example:
764cfca06d7SDimitry Andric ///
765cfca06d7SDimitry Andric /// const VecDesc VecIntrinsics[] = {
766cfca06d7SDimitry Andric /// {"llvm.phx.abs.i32", "", 4}
767cfca06d7SDimitry Andric /// };
isTLIScalarize(const TargetLibraryInfo & TLI,const CallInst & CI)768cfca06d7SDimitry Andric static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
769cfca06d7SDimitry Andric const StringRef ScalarName = CI.getCalledFunction()->getName();
770cfca06d7SDimitry Andric bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
771cfca06d7SDimitry Andric // Check that all known VFs are not associated to a vector
772cfca06d7SDimitry Andric // function, i.e. the vector name is emty.
773344a3780SDimitry Andric if (Scalarize) {
774344a3780SDimitry Andric ElementCount WidestFixedVF, WidestScalableVF;
775344a3780SDimitry Andric TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
776344a3780SDimitry Andric for (ElementCount VF = ElementCount::getFixed(2);
777344a3780SDimitry Andric ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
778cfca06d7SDimitry Andric Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
779344a3780SDimitry Andric for (ElementCount VF = ElementCount::getScalable(1);
780344a3780SDimitry Andric ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
781344a3780SDimitry Andric Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
782344a3780SDimitry Andric assert((WidestScalableVF.isZero() || !Scalarize) &&
783344a3780SDimitry Andric "Caller may decide to scalarize a variant using a scalable VF");
784cfca06d7SDimitry Andric }
785cfca06d7SDimitry Andric return Scalarize;
786cfca06d7SDimitry Andric }
787cfca06d7SDimitry Andric
canVectorizeInstrs()788eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorizeInstrs() {
789eb11fae6SDimitry Andric BasicBlock *Header = TheLoop->getHeader();
790eb11fae6SDimitry Andric
791eb11fae6SDimitry Andric // For each block in the loop.
792eb11fae6SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
793eb11fae6SDimitry Andric // Scan the instructions in the block and look for hazards.
794eb11fae6SDimitry Andric for (Instruction &I : *BB) {
795eb11fae6SDimitry Andric if (auto *Phi = dyn_cast<PHINode>(&I)) {
796eb11fae6SDimitry Andric Type *PhiTy = Phi->getType();
797eb11fae6SDimitry Andric // Check that this PHI type is allowed.
798eb11fae6SDimitry Andric if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
799eb11fae6SDimitry Andric !PhiTy->isPointerTy()) {
800e6d15924SDimitry Andric reportVectorizationFailure("Found a non-int non-pointer PHI",
801e6d15924SDimitry Andric "loop control flow is not understood by vectorizer",
8021d5ae102SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
803eb11fae6SDimitry Andric return false;
804eb11fae6SDimitry Andric }
805eb11fae6SDimitry Andric
806eb11fae6SDimitry Andric // If this PHINode is not in the header block, then we know that we
807eb11fae6SDimitry Andric // can convert it to select during if-conversion. No need to check if
808eb11fae6SDimitry Andric // the PHIs in this block are induction or reduction variables.
809eb11fae6SDimitry Andric if (BB != Header) {
810d8e91e46SDimitry Andric // Non-header phi nodes that have outside uses can be vectorized. Add
811d8e91e46SDimitry Andric // them to the list of allowed exits.
812d8e91e46SDimitry Andric // Unsafe cyclic dependencies with header phis are identified during
813e3b55780SDimitry Andric // legalization for reduction, induction and fixed order
814d8e91e46SDimitry Andric // recurrences.
8151d5ae102SDimitry Andric AllowedExit.insert(&I);
816eb11fae6SDimitry Andric continue;
817eb11fae6SDimitry Andric }
818eb11fae6SDimitry Andric
819eb11fae6SDimitry Andric // We only allow if-converted PHIs with exactly two incoming values.
820eb11fae6SDimitry Andric if (Phi->getNumIncomingValues() != 2) {
821e6d15924SDimitry Andric reportVectorizationFailure("Found an invalid PHI",
822e6d15924SDimitry Andric "loop control flow is not understood by vectorizer",
8231d5ae102SDimitry Andric "CFGNotUnderstood", ORE, TheLoop, Phi);
824eb11fae6SDimitry Andric return false;
825eb11fae6SDimitry Andric }
826eb11fae6SDimitry Andric
827eb11fae6SDimitry Andric RecurrenceDescriptor RedDes;
828eb11fae6SDimitry Andric if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
829145449b1SDimitry Andric DT, PSE.getSE())) {
830344a3780SDimitry Andric Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
831eb11fae6SDimitry Andric AllowedExit.insert(RedDes.getLoopExitInstr());
832eb11fae6SDimitry Andric Reductions[Phi] = RedDes;
833eb11fae6SDimitry Andric continue;
834eb11fae6SDimitry Andric }
835eb11fae6SDimitry Andric
8367fa27ce4SDimitry Andric // We prevent matching non-constant strided pointer IVS to preserve
8377fa27ce4SDimitry Andric // historical vectorizer behavior after a generalization of the
8387fa27ce4SDimitry Andric // IVDescriptor code. The intent is to remove this check, but we
8397fa27ce4SDimitry Andric // have to fix issues around code quality for such loops first.
8407fa27ce4SDimitry Andric auto isDisallowedStridedPointerInduction =
8417fa27ce4SDimitry Andric [](const InductionDescriptor &ID) {
8427fa27ce4SDimitry Andric if (AllowStridedPointerIVs)
8437fa27ce4SDimitry Andric return false;
8447fa27ce4SDimitry Andric return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
8457fa27ce4SDimitry Andric ID.getConstIntStepValue() == nullptr;
8467fa27ce4SDimitry Andric };
8477fa27ce4SDimitry Andric
848e3b55780SDimitry Andric // TODO: Instead of recording the AllowedExit, it would be good to
849e3b55780SDimitry Andric // record the complementary set: NotAllowedExit. These include (but may
850e3b55780SDimitry Andric // not be limited to):
851d8e91e46SDimitry Andric // 1. Reduction phis as they represent the one-before-last value, which
852d8e91e46SDimitry Andric // is not available when vectorized
853d8e91e46SDimitry Andric // 2. Induction phis and increment when SCEV predicates cannot be used
854d8e91e46SDimitry Andric // outside the loop - see addInductionPhi
855d8e91e46SDimitry Andric // 3. Non-Phis with outside uses when SCEV predicates cannot be used
856d8e91e46SDimitry Andric // outside the loop - see call to hasOutsideLoopUser in the non-phi
857d8e91e46SDimitry Andric // handling below
858e3b55780SDimitry Andric // 4. FixedOrderRecurrence phis that can possibly be handled by
859d8e91e46SDimitry Andric // extraction.
860d8e91e46SDimitry Andric // By recording these, we can then reason about ways to vectorize each
861d8e91e46SDimitry Andric // of these NotAllowedExit.
862eb11fae6SDimitry Andric InductionDescriptor ID;
8637fa27ce4SDimitry Andric if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
8647fa27ce4SDimitry Andric !isDisallowedStridedPointerInduction(ID)) {
865eb11fae6SDimitry Andric addInductionPhi(Phi, ID, AllowedExit);
866344a3780SDimitry Andric Requirements->addExactFPMathInst(ID.getExactFPMathInst());
867eb11fae6SDimitry Andric continue;
868eb11fae6SDimitry Andric }
869eb11fae6SDimitry Andric
8707fa27ce4SDimitry Andric if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
871cfca06d7SDimitry Andric AllowedExit.insert(Phi);
872e3b55780SDimitry Andric FixedOrderRecurrences.insert(Phi);
873eb11fae6SDimitry Andric continue;
874eb11fae6SDimitry Andric }
875eb11fae6SDimitry Andric
876eb11fae6SDimitry Andric // As a last resort, coerce the PHI to a AddRec expression
877eb11fae6SDimitry Andric // and re-try classifying it a an induction PHI.
8787fa27ce4SDimitry Andric if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
8797fa27ce4SDimitry Andric !isDisallowedStridedPointerInduction(ID)) {
880eb11fae6SDimitry Andric addInductionPhi(Phi, ID, AllowedExit);
881eb11fae6SDimitry Andric continue;
882eb11fae6SDimitry Andric }
883eb11fae6SDimitry Andric
884e6d15924SDimitry Andric reportVectorizationFailure("Found an unidentified PHI",
885e6d15924SDimitry Andric "value that could not be identified as "
886e6d15924SDimitry Andric "reduction is used outside the loop",
8871d5ae102SDimitry Andric "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
888eb11fae6SDimitry Andric return false;
889eb11fae6SDimitry Andric } // end of PHI handling
890eb11fae6SDimitry Andric
891eb11fae6SDimitry Andric // We handle calls that:
892eb11fae6SDimitry Andric // * Are debug info intrinsics.
893eb11fae6SDimitry Andric // * Have a mapping to an IR intrinsic.
894eb11fae6SDimitry Andric // * Have a vector version available.
895eb11fae6SDimitry Andric auto *CI = dyn_cast<CallInst>(&I);
896cfca06d7SDimitry Andric
897eb11fae6SDimitry Andric if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
898eb11fae6SDimitry Andric !isa<DbgInfoIntrinsic>(CI) &&
899eb11fae6SDimitry Andric !(CI->getCalledFunction() && TLI &&
900cfca06d7SDimitry Andric (!VFDatabase::getMappings(*CI).empty() ||
901cfca06d7SDimitry Andric isTLIScalarize(*TLI, *CI)))) {
902d8e91e46SDimitry Andric // If the call is a recognized math libary call, it is likely that
903d8e91e46SDimitry Andric // we can vectorize it given loosened floating-point constraints.
904d8e91e46SDimitry Andric LibFunc Func;
905d8e91e46SDimitry Andric bool IsMathLibCall =
906d8e91e46SDimitry Andric TLI && CI->getCalledFunction() &&
907d8e91e46SDimitry Andric CI->getType()->isFloatingPointTy() &&
908d8e91e46SDimitry Andric TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
909d8e91e46SDimitry Andric TLI->hasOptimizedCodeGen(Func);
910d8e91e46SDimitry Andric
911d8e91e46SDimitry Andric if (IsMathLibCall) {
912d8e91e46SDimitry Andric // TODO: Ideally, we should not use clang-specific language here,
913d8e91e46SDimitry Andric // but it's hard to provide meaningful yet generic advice.
914d8e91e46SDimitry Andric // Also, should this be guarded by allowExtraAnalysis() and/or be part
915d8e91e46SDimitry Andric // of the returned info from isFunctionVectorizable()?
916cfca06d7SDimitry Andric reportVectorizationFailure(
917cfca06d7SDimitry Andric "Found a non-intrinsic callsite",
918e6d15924SDimitry Andric "library call cannot be vectorized. "
919d8e91e46SDimitry Andric "Try compiling with -fno-math-errno, -ffast-math, "
920e6d15924SDimitry Andric "or similar flags",
9211d5ae102SDimitry Andric "CantVectorizeLibcall", ORE, TheLoop, CI);
922d8e91e46SDimitry Andric } else {
923e6d15924SDimitry Andric reportVectorizationFailure("Found a non-intrinsic callsite",
924e6d15924SDimitry Andric "call instruction cannot be vectorized",
9251d5ae102SDimitry Andric "CantVectorizeLibcall", ORE, TheLoop, CI);
926d8e91e46SDimitry Andric }
927eb11fae6SDimitry Andric return false;
928eb11fae6SDimitry Andric }
929eb11fae6SDimitry Andric
930e6d15924SDimitry Andric // Some intrinsics have scalar arguments and should be same in order for
931e6d15924SDimitry Andric // them to be vectorized (i.e. loop invariant).
932e6d15924SDimitry Andric if (CI) {
933eb11fae6SDimitry Andric auto *SE = PSE.getSE();
934e6d15924SDimitry Andric Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
935c0981da4SDimitry Andric for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
936145449b1SDimitry Andric if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) {
937e6d15924SDimitry Andric if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
938e6d15924SDimitry Andric reportVectorizationFailure("Found unvectorizable intrinsic",
939e6d15924SDimitry Andric "intrinsic instruction cannot be vectorized",
9401d5ae102SDimitry Andric "CantVectorizeIntrinsic", ORE, TheLoop, CI);
941eb11fae6SDimitry Andric return false;
942eb11fae6SDimitry Andric }
943eb11fae6SDimitry Andric }
944e6d15924SDimitry Andric }
945eb11fae6SDimitry Andric
946b1c73532SDimitry Andric // If we found a vectorized variant of a function, note that so LV can
947b1c73532SDimitry Andric // make better decisions about maximum VF.
948b1c73532SDimitry Andric if (CI && !VFDatabase::getMappings(*CI).empty())
949b1c73532SDimitry Andric VecCallVariantsFound = true;
950b1c73532SDimitry Andric
951eb11fae6SDimitry Andric // Check that the instruction return type is vectorizable.
952eb11fae6SDimitry Andric // Also, we can't vectorize extractelement instructions.
953eb11fae6SDimitry Andric if ((!VectorType::isValidElementType(I.getType()) &&
954eb11fae6SDimitry Andric !I.getType()->isVoidTy()) ||
955eb11fae6SDimitry Andric isa<ExtractElementInst>(I)) {
956e6d15924SDimitry Andric reportVectorizationFailure("Found unvectorizable type",
957e6d15924SDimitry Andric "instruction return type cannot be vectorized",
9581d5ae102SDimitry Andric "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
959eb11fae6SDimitry Andric return false;
960eb11fae6SDimitry Andric }
961eb11fae6SDimitry Andric
962eb11fae6SDimitry Andric // Check that the stored type is vectorizable.
963eb11fae6SDimitry Andric if (auto *ST = dyn_cast<StoreInst>(&I)) {
964eb11fae6SDimitry Andric Type *T = ST->getValueOperand()->getType();
965eb11fae6SDimitry Andric if (!VectorType::isValidElementType(T)) {
966e6d15924SDimitry Andric reportVectorizationFailure("Store instruction cannot be vectorized",
967e6d15924SDimitry Andric "store instruction cannot be vectorized",
9681d5ae102SDimitry Andric "CantVectorizeStore", ORE, TheLoop, ST);
969eb11fae6SDimitry Andric return false;
970eb11fae6SDimitry Andric }
971eb11fae6SDimitry Andric
972e6d15924SDimitry Andric // For nontemporal stores, check that a nontemporal vector version is
973e6d15924SDimitry Andric // supported on the target.
974e6d15924SDimitry Andric if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
975e6d15924SDimitry Andric // Arbitrarily try a vector of 2 elements.
976b60736ecSDimitry Andric auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
977e6d15924SDimitry Andric assert(VecTy && "did not find vectorized version of stored type");
978cfca06d7SDimitry Andric if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
979e6d15924SDimitry Andric reportVectorizationFailure(
980e6d15924SDimitry Andric "nontemporal store instruction cannot be vectorized",
981e6d15924SDimitry Andric "nontemporal store instruction cannot be vectorized",
9821d5ae102SDimitry Andric "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
983e6d15924SDimitry Andric return false;
984e6d15924SDimitry Andric }
985e6d15924SDimitry Andric }
986e6d15924SDimitry Andric
987e6d15924SDimitry Andric } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
988e6d15924SDimitry Andric if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
989e6d15924SDimitry Andric // For nontemporal loads, check that a nontemporal vector version is
990e6d15924SDimitry Andric // supported on the target (arbitrarily try a vector of 2 elements).
991b60736ecSDimitry Andric auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
992e6d15924SDimitry Andric assert(VecTy && "did not find vectorized version of load type");
993cfca06d7SDimitry Andric if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
994e6d15924SDimitry Andric reportVectorizationFailure(
995e6d15924SDimitry Andric "nontemporal load instruction cannot be vectorized",
996e6d15924SDimitry Andric "nontemporal load instruction cannot be vectorized",
9971d5ae102SDimitry Andric "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
998e6d15924SDimitry Andric return false;
999e6d15924SDimitry Andric }
1000e6d15924SDimitry Andric }
1001e6d15924SDimitry Andric
1002eb11fae6SDimitry Andric // FP instructions can allow unsafe algebra, thus vectorizable by
1003eb11fae6SDimitry Andric // non-IEEE-754 compliant SIMD units.
1004eb11fae6SDimitry Andric // This applies to floating-point math operations and calls, not memory
1005eb11fae6SDimitry Andric // operations, shuffles, or casts, as they don't change precision or
1006eb11fae6SDimitry Andric // semantics.
1007eb11fae6SDimitry Andric } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
1008eb11fae6SDimitry Andric !I.isFast()) {
1009eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
1010eb11fae6SDimitry Andric Hints->setPotentiallyUnsafe();
1011eb11fae6SDimitry Andric }
1012eb11fae6SDimitry Andric
1013eb11fae6SDimitry Andric // Reduction instructions are allowed to have exit users.
1014eb11fae6SDimitry Andric // All other instructions must not have external users.
1015eb11fae6SDimitry Andric if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
1016d8e91e46SDimitry Andric // We can safely vectorize loops where instructions within the loop are
1017d8e91e46SDimitry Andric // used outside the loop only if the SCEV predicates within the loop is
1018d8e91e46SDimitry Andric // same as outside the loop. Allowing the exit means reusing the SCEV
1019d8e91e46SDimitry Andric // outside the loop.
1020145449b1SDimitry Andric if (PSE.getPredicate().isAlwaysTrue()) {
1021d8e91e46SDimitry Andric AllowedExit.insert(&I);
1022d8e91e46SDimitry Andric continue;
1023d8e91e46SDimitry Andric }
1024e6d15924SDimitry Andric reportVectorizationFailure("Value cannot be used outside the loop",
1025e6d15924SDimitry Andric "value cannot be used outside the loop",
10261d5ae102SDimitry Andric "ValueUsedOutsideLoop", ORE, TheLoop, &I);
1027eb11fae6SDimitry Andric return false;
1028eb11fae6SDimitry Andric }
1029eb11fae6SDimitry Andric } // next instr.
1030eb11fae6SDimitry Andric }
1031eb11fae6SDimitry Andric
1032eb11fae6SDimitry Andric if (!PrimaryInduction) {
1033eb11fae6SDimitry Andric if (Inductions.empty()) {
1034e6d15924SDimitry Andric reportVectorizationFailure("Did not find one integer induction var",
1035e6d15924SDimitry Andric "loop induction variable could not be identified",
10361d5ae102SDimitry Andric "NoInductionVariable", ORE, TheLoop);
1037eb11fae6SDimitry Andric return false;
1038d8e91e46SDimitry Andric } else if (!WidestIndTy) {
1039e6d15924SDimitry Andric reportVectorizationFailure("Did not find one integer induction var",
1040e6d15924SDimitry Andric "integer loop induction variable could not be identified",
10411d5ae102SDimitry Andric "NoIntegerInductionVariable", ORE, TheLoop);
1042d8e91e46SDimitry Andric return false;
1043e6d15924SDimitry Andric } else {
1044e6d15924SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
1045eb11fae6SDimitry Andric }
1046eb11fae6SDimitry Andric }
1047eb11fae6SDimitry Andric
1048eb11fae6SDimitry Andric // Now we know the widest induction type, check if our found induction
1049eb11fae6SDimitry Andric // is the same size. If it's not, unset it here and InnerLoopVectorizer
1050eb11fae6SDimitry Andric // will create another.
1051eb11fae6SDimitry Andric if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
1052eb11fae6SDimitry Andric PrimaryInduction = nullptr;
1053eb11fae6SDimitry Andric
1054eb11fae6SDimitry Andric return true;
1055eb11fae6SDimitry Andric }
1056eb11fae6SDimitry Andric
canVectorizeMemory()1057eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorizeMemory() {
1058e3b55780SDimitry Andric LAI = &LAIs.getInfo(*TheLoop);
1059eb11fae6SDimitry Andric const OptimizationRemarkAnalysis *LAR = LAI->getReport();
1060eb11fae6SDimitry Andric if (LAR) {
1061eb11fae6SDimitry Andric ORE->emit([&]() {
1062eb11fae6SDimitry Andric return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
1063eb11fae6SDimitry Andric "loop not vectorized: ", *LAR);
1064eb11fae6SDimitry Andric });
1065eb11fae6SDimitry Andric }
1066344a3780SDimitry Andric
1067eb11fae6SDimitry Andric if (!LAI->canVectorizeMemory())
1068eb11fae6SDimitry Andric return false;
1069eb11fae6SDimitry Andric
1070ac9a064cSDimitry Andric if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1071ac9a064cSDimitry Andric reportVectorizationFailure("We don't allow storing to uniform addresses",
1072ac9a064cSDimitry Andric "write to a loop invariant address could not "
1073ac9a064cSDimitry Andric "be vectorized",
1074ac9a064cSDimitry Andric "CantVectorizeStoreToLoopInvariantAddress", ORE,
1075ac9a064cSDimitry Andric TheLoop);
1076ac9a064cSDimitry Andric return false;
1077ac9a064cSDimitry Andric }
1078ac9a064cSDimitry Andric
1079145449b1SDimitry Andric // We can vectorize stores to invariant address when final reduction value is
1080145449b1SDimitry Andric // guaranteed to be stored at the end of the loop. Also, if decision to
1081145449b1SDimitry Andric // vectorize loop is made, runtime checks are added so as to make sure that
1082145449b1SDimitry Andric // invariant address won't alias with any other objects.
1083145449b1SDimitry Andric if (!LAI->getStoresToInvariantAddresses().empty()) {
1084e3b55780SDimitry Andric // For each invariant address, check if last stored value is unconditional
1085e3b55780SDimitry Andric // and the address is not calculated inside the loop.
1086145449b1SDimitry Andric for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1087e3b55780SDimitry Andric if (!isInvariantStoreOfReduction(SI))
1088e3b55780SDimitry Andric continue;
1089e3b55780SDimitry Andric
1090e3b55780SDimitry Andric if (blockNeedsPredication(SI->getParent())) {
1091145449b1SDimitry Andric reportVectorizationFailure(
1092145449b1SDimitry Andric "We don't allow storing to uniform addresses",
1093145449b1SDimitry Andric "write of conditional recurring variant value to a loop "
1094145449b1SDimitry Andric "invariant address could not be vectorized",
10951d5ae102SDimitry Andric "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1096eb11fae6SDimitry Andric return false;
1097eb11fae6SDimitry Andric }
1098e3b55780SDimitry Andric
1099e3b55780SDimitry Andric // Invariant address should be defined outside of loop. LICM pass usually
1100e3b55780SDimitry Andric // makes sure it happens, but in rare cases it does not, we do not want
1101e3b55780SDimitry Andric // to overcomplicate vectorization to support this case.
1102e3b55780SDimitry Andric if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1103e3b55780SDimitry Andric if (TheLoop->contains(Ptr)) {
1104e3b55780SDimitry Andric reportVectorizationFailure(
1105e3b55780SDimitry Andric "Invariant address is calculated inside the loop",
1106e3b55780SDimitry Andric "write to a loop invariant address could not "
1107e3b55780SDimitry Andric "be vectorized",
1108e3b55780SDimitry Andric "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1109e3b55780SDimitry Andric return false;
1110e3b55780SDimitry Andric }
1111e3b55780SDimitry Andric }
1112145449b1SDimitry Andric }
1113145449b1SDimitry Andric
1114ac9a064cSDimitry Andric if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
1115145449b1SDimitry Andric // For each invariant address, check its last stored value is the result
1116145449b1SDimitry Andric // of one of our reductions.
1117145449b1SDimitry Andric //
1118ac9a064cSDimitry Andric // We do not check if dependence with loads exists because that is already
1119ac9a064cSDimitry Andric // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
1120145449b1SDimitry Andric ScalarEvolution *SE = PSE.getSE();
1121145449b1SDimitry Andric SmallVector<StoreInst *, 4> UnhandledStores;
1122145449b1SDimitry Andric for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1123145449b1SDimitry Andric if (isInvariantStoreOfReduction(SI)) {
1124145449b1SDimitry Andric // Earlier stores to this address are effectively deadcode.
1125145449b1SDimitry Andric // With opaque pointers it is possible for one pointer to be used with
1126145449b1SDimitry Andric // different sizes of stored values:
1127145449b1SDimitry Andric // store i32 0, ptr %x
1128145449b1SDimitry Andric // store i8 0, ptr %x
1129145449b1SDimitry Andric // The latest store doesn't complitely overwrite the first one in the
1130145449b1SDimitry Andric // example. That is why we have to make sure that types of stored
1131145449b1SDimitry Andric // values are same.
1132145449b1SDimitry Andric // TODO: Check that bitwidth of unhandled store is smaller then the
1133145449b1SDimitry Andric // one that overwrites it and add a test.
1134145449b1SDimitry Andric erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
1135145449b1SDimitry Andric return storeToSameAddress(SE, SI, I) &&
1136145449b1SDimitry Andric I->getValueOperand()->getType() ==
1137145449b1SDimitry Andric SI->getValueOperand()->getType();
1138145449b1SDimitry Andric });
1139145449b1SDimitry Andric continue;
1140145449b1SDimitry Andric }
1141145449b1SDimitry Andric UnhandledStores.push_back(SI);
1142145449b1SDimitry Andric }
1143145449b1SDimitry Andric
1144145449b1SDimitry Andric bool IsOK = UnhandledStores.empty();
1145145449b1SDimitry Andric // TODO: we should also validate against InvariantMemSets.
1146145449b1SDimitry Andric if (!IsOK) {
1147145449b1SDimitry Andric reportVectorizationFailure(
1148145449b1SDimitry Andric "We don't allow storing to uniform addresses",
1149145449b1SDimitry Andric "write to a loop invariant address could not "
1150145449b1SDimitry Andric "be vectorized",
1151145449b1SDimitry Andric "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1152145449b1SDimitry Andric return false;
1153145449b1SDimitry Andric }
1154145449b1SDimitry Andric }
1155145449b1SDimitry Andric }
1156344a3780SDimitry Andric
1157145449b1SDimitry Andric PSE.addPredicate(LAI->getPSE().getPredicate());
1158eb11fae6SDimitry Andric return true;
1159eb11fae6SDimitry Andric }
1160eb11fae6SDimitry Andric
canVectorizeFPMath(bool EnableStrictReductions)1161344a3780SDimitry Andric bool LoopVectorizationLegality::canVectorizeFPMath(
1162344a3780SDimitry Andric bool EnableStrictReductions) {
1163344a3780SDimitry Andric
1164344a3780SDimitry Andric // First check if there is any ExactFP math or if we allow reassociations
1165344a3780SDimitry Andric if (!Requirements->getExactFPInst() || Hints->allowReordering())
1166344a3780SDimitry Andric return true;
1167344a3780SDimitry Andric
1168344a3780SDimitry Andric // If the above is false, we have ExactFPMath & do not allow reordering.
1169344a3780SDimitry Andric // If the EnableStrictReductions flag is set, first check if we have any
1170344a3780SDimitry Andric // Exact FP induction vars, which we cannot vectorize.
1171344a3780SDimitry Andric if (!EnableStrictReductions ||
1172344a3780SDimitry Andric any_of(getInductionVars(), [&](auto &Induction) -> bool {
1173344a3780SDimitry Andric InductionDescriptor IndDesc = Induction.second;
1174344a3780SDimitry Andric return IndDesc.getExactFPMathInst();
1175344a3780SDimitry Andric }))
1176344a3780SDimitry Andric return false;
1177344a3780SDimitry Andric
1178344a3780SDimitry Andric // We can now only vectorize if all reductions with Exact FP math also
1179344a3780SDimitry Andric // have the isOrdered flag set, which indicates that we can move the
1180344a3780SDimitry Andric // reduction operations in-loop.
1181344a3780SDimitry Andric return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1182344a3780SDimitry Andric const RecurrenceDescriptor &RdxDesc = Reduction.second;
1183344a3780SDimitry Andric return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1184344a3780SDimitry Andric }));
1185344a3780SDimitry Andric }
1186344a3780SDimitry Andric
isInvariantStoreOfReduction(StoreInst * SI)1187145449b1SDimitry Andric bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) {
1188145449b1SDimitry Andric return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1189145449b1SDimitry Andric const RecurrenceDescriptor &RdxDesc = Reduction.second;
1190145449b1SDimitry Andric return RdxDesc.IntermediateStore == SI;
1191145449b1SDimitry Andric });
1192145449b1SDimitry Andric }
1193145449b1SDimitry Andric
isInvariantAddressOfReduction(Value * V)1194145449b1SDimitry Andric bool LoopVectorizationLegality::isInvariantAddressOfReduction(Value *V) {
1195145449b1SDimitry Andric return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
1196145449b1SDimitry Andric const RecurrenceDescriptor &RdxDesc = Reduction.second;
1197145449b1SDimitry Andric if (!RdxDesc.IntermediateStore)
1198145449b1SDimitry Andric return false;
1199145449b1SDimitry Andric
1200145449b1SDimitry Andric ScalarEvolution *SE = PSE.getSE();
1201145449b1SDimitry Andric Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
1202145449b1SDimitry Andric return V == InvariantAddress ||
1203145449b1SDimitry Andric SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
1204145449b1SDimitry Andric });
1205145449b1SDimitry Andric }
1206145449b1SDimitry Andric
isInductionPhi(const Value * V) const120777fc4c14SDimitry Andric bool LoopVectorizationLegality::isInductionPhi(const Value *V) const {
1208eb11fae6SDimitry Andric Value *In0 = const_cast<Value *>(V);
1209eb11fae6SDimitry Andric PHINode *PN = dyn_cast_or_null<PHINode>(In0);
1210eb11fae6SDimitry Andric if (!PN)
1211eb11fae6SDimitry Andric return false;
1212eb11fae6SDimitry Andric
1213eb11fae6SDimitry Andric return Inductions.count(PN);
1214eb11fae6SDimitry Andric }
1215eb11fae6SDimitry Andric
121677fc4c14SDimitry Andric const InductionDescriptor *
getIntOrFpInductionDescriptor(PHINode * Phi) const121777fc4c14SDimitry Andric LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const {
121877fc4c14SDimitry Andric if (!isInductionPhi(Phi))
121977fc4c14SDimitry Andric return nullptr;
122077fc4c14SDimitry Andric auto &ID = getInductionVars().find(Phi)->second;
122177fc4c14SDimitry Andric if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
122277fc4c14SDimitry Andric ID.getKind() == InductionDescriptor::IK_FpInduction)
122377fc4c14SDimitry Andric return &ID;
122477fc4c14SDimitry Andric return nullptr;
122577fc4c14SDimitry Andric }
122677fc4c14SDimitry Andric
1227145449b1SDimitry Andric const InductionDescriptor *
getPointerInductionDescriptor(PHINode * Phi) const1228145449b1SDimitry Andric LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const {
1229145449b1SDimitry Andric if (!isInductionPhi(Phi))
1230145449b1SDimitry Andric return nullptr;
1231145449b1SDimitry Andric auto &ID = getInductionVars().find(Phi)->second;
1232145449b1SDimitry Andric if (ID.getKind() == InductionDescriptor::IK_PtrInduction)
1233145449b1SDimitry Andric return &ID;
1234145449b1SDimitry Andric return nullptr;
1235145449b1SDimitry Andric }
1236145449b1SDimitry Andric
isCastedInductionVariable(const Value * V) const123777fc4c14SDimitry Andric bool LoopVectorizationLegality::isCastedInductionVariable(
123877fc4c14SDimitry Andric const Value *V) const {
1239eb11fae6SDimitry Andric auto *Inst = dyn_cast<Instruction>(V);
1240eb11fae6SDimitry Andric return (Inst && InductionCastsToIgnore.count(Inst));
1241eb11fae6SDimitry Andric }
1242eb11fae6SDimitry Andric
isInductionVariable(const Value * V) const124377fc4c14SDimitry Andric bool LoopVectorizationLegality::isInductionVariable(const Value *V) const {
1244eb11fae6SDimitry Andric return isInductionPhi(V) || isCastedInductionVariable(V);
1245eb11fae6SDimitry Andric }
1246eb11fae6SDimitry Andric
isFixedOrderRecurrence(const PHINode * Phi) const1247e3b55780SDimitry Andric bool LoopVectorizationLegality::isFixedOrderRecurrence(
124877fc4c14SDimitry Andric const PHINode *Phi) const {
1249e3b55780SDimitry Andric return FixedOrderRecurrences.count(Phi);
1250eb11fae6SDimitry Andric }
1251eb11fae6SDimitry Andric
blockNeedsPredication(BasicBlock * BB) const1252344a3780SDimitry Andric bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
1253eb11fae6SDimitry Andric return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
1254eb11fae6SDimitry Andric }
1255eb11fae6SDimitry Andric
blockCanBePredicated(BasicBlock * BB,SmallPtrSetImpl<Value * > & SafePtrs,SmallPtrSetImpl<const Instruction * > & MaskedOp) const1256eb11fae6SDimitry Andric bool LoopVectorizationLegality::blockCanBePredicated(
1257b60736ecSDimitry Andric BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1258b1c73532SDimitry Andric SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
1259eb11fae6SDimitry Andric for (Instruction &I : *BB) {
1260cfca06d7SDimitry Andric // We can predicate blocks with calls to assume, as long as we drop them in
1261cfca06d7SDimitry Andric // case we flatten the CFG via predication.
1262cfca06d7SDimitry Andric if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1263b1c73532SDimitry Andric MaskedOp.insert(&I);
1264cfca06d7SDimitry Andric continue;
1265cfca06d7SDimitry Andric }
1266cfca06d7SDimitry Andric
1267b60736ecSDimitry Andric // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1268b60736ecSDimitry Andric // TODO: there might be cases that it should block the vectorization. Let's
1269b60736ecSDimitry Andric // ignore those for now.
1270b60736ecSDimitry Andric if (isa<NoAliasScopeDeclInst>(&I))
1271b60736ecSDimitry Andric continue;
1272b60736ecSDimitry Andric
12737fa27ce4SDimitry Andric // We can allow masked calls if there's at least one vector variant, even
12747fa27ce4SDimitry Andric // if we end up scalarizing due to the cost model calculations.
12757fa27ce4SDimitry Andric // TODO: Allow other calls if they have appropriate attributes... readonly
12767fa27ce4SDimitry Andric // and argmemonly?
12777fa27ce4SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&I))
12787fa27ce4SDimitry Andric if (VFDatabase::hasMaskedVariant(*CI)) {
12797fa27ce4SDimitry Andric MaskedOp.insert(CI);
12807fa27ce4SDimitry Andric continue;
12817fa27ce4SDimitry Andric }
12827fa27ce4SDimitry Andric
1283e3b55780SDimitry Andric // Loads are handled via masking (or speculated if safe to do so.)
1284e3b55780SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(&I)) {
1285e3b55780SDimitry Andric if (!SafePtrs.count(LI->getPointerOperand()))
1286eb11fae6SDimitry Andric MaskedOp.insert(LI);
1287eb11fae6SDimitry Andric continue;
1288eb11fae6SDimitry Andric }
1289eb11fae6SDimitry Andric
1290eb11fae6SDimitry Andric // Predicated store requires some form of masking:
1291eb11fae6SDimitry Andric // 1) masked store HW instruction,
1292eb11fae6SDimitry Andric // 2) emulation via load-blend-store (only if safe and legal to do so,
1293eb11fae6SDimitry Andric // be aware on the race conditions), or
1294eb11fae6SDimitry Andric // 3) element-by-element predicate check and scalar store.
1295e3b55780SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(&I)) {
1296eb11fae6SDimitry Andric MaskedOp.insert(SI);
1297eb11fae6SDimitry Andric continue;
1298eb11fae6SDimitry Andric }
1299e3b55780SDimitry Andric
1300e3b55780SDimitry Andric if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
1301eb11fae6SDimitry Andric return false;
1302eb11fae6SDimitry Andric }
1303eb11fae6SDimitry Andric
1304eb11fae6SDimitry Andric return true;
1305eb11fae6SDimitry Andric }
1306eb11fae6SDimitry Andric
canVectorizeWithIfConvert()1307eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
1308eb11fae6SDimitry Andric if (!EnableIfConversion) {
1309e6d15924SDimitry Andric reportVectorizationFailure("If-conversion is disabled",
1310e6d15924SDimitry Andric "if-conversion is disabled",
13111d5ae102SDimitry Andric "IfConversionDisabled",
13121d5ae102SDimitry Andric ORE, TheLoop);
1313eb11fae6SDimitry Andric return false;
1314eb11fae6SDimitry Andric }
1315eb11fae6SDimitry Andric
1316eb11fae6SDimitry Andric assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
1317eb11fae6SDimitry Andric
13181d5ae102SDimitry Andric // A list of pointers which are known to be dereferenceable within scope of
13191d5ae102SDimitry Andric // the loop body for each iteration of the loop which executes. That is,
13201d5ae102SDimitry Andric // the memory pointed to can be dereferenced (with the access size implied by
13211d5ae102SDimitry Andric // the value's type) unconditionally within the loop header without
13221d5ae102SDimitry Andric // introducing a new fault.
1323cfca06d7SDimitry Andric SmallPtrSet<Value *, 8> SafePointers;
1324eb11fae6SDimitry Andric
1325eb11fae6SDimitry Andric // Collect safe addresses.
1326eb11fae6SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
13271d5ae102SDimitry Andric if (!blockNeedsPredication(BB)) {
1328eb11fae6SDimitry Andric for (Instruction &I : *BB)
1329eb11fae6SDimitry Andric if (auto *Ptr = getLoadStorePointerOperand(&I))
1330cfca06d7SDimitry Andric SafePointers.insert(Ptr);
13311d5ae102SDimitry Andric continue;
13321d5ae102SDimitry Andric }
13331d5ae102SDimitry Andric
13341d5ae102SDimitry Andric // For a block which requires predication, a address may be safe to access
13351d5ae102SDimitry Andric // in the loop w/o predication if we can prove dereferenceability facts
13361d5ae102SDimitry Andric // sufficient to ensure it'll never fault within the loop. For the moment,
13371d5ae102SDimitry Andric // we restrict this to loads; stores are more complicated due to
13381d5ae102SDimitry Andric // concurrency restrictions.
13391d5ae102SDimitry Andric ScalarEvolution &SE = *PSE.getSE();
13401d5ae102SDimitry Andric for (Instruction &I : *BB) {
13411d5ae102SDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(&I);
1342b60736ecSDimitry Andric if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1343e3b55780SDimitry Andric isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC))
1344cfca06d7SDimitry Andric SafePointers.insert(LI->getPointerOperand());
13451d5ae102SDimitry Andric }
1346eb11fae6SDimitry Andric }
1347eb11fae6SDimitry Andric
1348eb11fae6SDimitry Andric // Collect the blocks that need predication.
1349eb11fae6SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
1350eb11fae6SDimitry Andric // We don't support switch statements inside loops.
1351eb11fae6SDimitry Andric if (!isa<BranchInst>(BB->getTerminator())) {
1352e6d15924SDimitry Andric reportVectorizationFailure("Loop contains a switch statement",
1353e6d15924SDimitry Andric "loop contains a switch statement",
13541d5ae102SDimitry Andric "LoopContainsSwitch", ORE, TheLoop,
13551d5ae102SDimitry Andric BB->getTerminator());
1356eb11fae6SDimitry Andric return false;
1357eb11fae6SDimitry Andric }
1358eb11fae6SDimitry Andric
1359eb11fae6SDimitry Andric // We must be able to predicate all blocks that need to be predicated.
1360b1c73532SDimitry Andric if (blockNeedsPredication(BB) &&
1361b1c73532SDimitry Andric !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
1362e6d15924SDimitry Andric reportVectorizationFailure(
1363e6d15924SDimitry Andric "Control flow cannot be substituted for a select",
1364b1c73532SDimitry Andric "control flow cannot be substituted for a select", "NoCFGForSelect",
1365b1c73532SDimitry Andric ORE, TheLoop, BB->getTerminator());
1366eb11fae6SDimitry Andric return false;
1367eb11fae6SDimitry Andric }
1368eb11fae6SDimitry Andric }
1369eb11fae6SDimitry Andric
1370eb11fae6SDimitry Andric // We can if-convert this loop.
1371eb11fae6SDimitry Andric return true;
1372eb11fae6SDimitry Andric }
1373eb11fae6SDimitry Andric
1374eb11fae6SDimitry Andric // Helper function to canVectorizeLoopNestCFG.
canVectorizeLoopCFG(Loop * Lp,bool UseVPlanNativePath)1375eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1376eb11fae6SDimitry Andric bool UseVPlanNativePath) {
1377b60736ecSDimitry Andric assert((UseVPlanNativePath || Lp->isInnermost()) &&
1378eb11fae6SDimitry Andric "VPlan-native path is not enabled.");
1379eb11fae6SDimitry Andric
1380eb11fae6SDimitry Andric // TODO: ORE should be improved to show more accurate information when an
1381eb11fae6SDimitry Andric // outer loop can't be vectorized because a nested loop is not understood or
1382eb11fae6SDimitry Andric // legal. Something like: "outer_loop_location: loop not vectorized:
1383eb11fae6SDimitry Andric // (inner_loop_location) loop control flow is not understood by vectorizer".
1384eb11fae6SDimitry Andric
1385eb11fae6SDimitry Andric // Store the result and return it at the end instead of exiting early, in case
1386eb11fae6SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1387eb11fae6SDimitry Andric bool Result = true;
1388eb11fae6SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1389eb11fae6SDimitry Andric
1390eb11fae6SDimitry Andric // We must have a loop in canonical form. Loops with indirectbr in them cannot
1391eb11fae6SDimitry Andric // be canonicalized.
1392eb11fae6SDimitry Andric if (!Lp->getLoopPreheader()) {
1393e6d15924SDimitry Andric reportVectorizationFailure("Loop doesn't have a legal pre-header",
1394e6d15924SDimitry Andric "loop control flow is not understood by vectorizer",
13951d5ae102SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
1396eb11fae6SDimitry Andric if (DoExtraAnalysis)
1397eb11fae6SDimitry Andric Result = false;
1398eb11fae6SDimitry Andric else
1399eb11fae6SDimitry Andric return false;
1400eb11fae6SDimitry Andric }
1401eb11fae6SDimitry Andric
1402eb11fae6SDimitry Andric // We must have a single backedge.
1403eb11fae6SDimitry Andric if (Lp->getNumBackEdges() != 1) {
1404e6d15924SDimitry Andric reportVectorizationFailure("The loop must have a single backedge",
1405e6d15924SDimitry Andric "loop control flow is not understood by vectorizer",
14061d5ae102SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
1407eb11fae6SDimitry Andric if (DoExtraAnalysis)
1408eb11fae6SDimitry Andric Result = false;
1409eb11fae6SDimitry Andric else
1410eb11fae6SDimitry Andric return false;
1411eb11fae6SDimitry Andric }
1412eb11fae6SDimitry Andric
1413eb11fae6SDimitry Andric return Result;
1414eb11fae6SDimitry Andric }
1415eb11fae6SDimitry Andric
canVectorizeLoopNestCFG(Loop * Lp,bool UseVPlanNativePath)1416eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1417eb11fae6SDimitry Andric Loop *Lp, bool UseVPlanNativePath) {
1418eb11fae6SDimitry Andric // Store the result and return it at the end instead of exiting early, in case
1419eb11fae6SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1420eb11fae6SDimitry Andric bool Result = true;
1421eb11fae6SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1422eb11fae6SDimitry Andric if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1423eb11fae6SDimitry Andric if (DoExtraAnalysis)
1424eb11fae6SDimitry Andric Result = false;
1425eb11fae6SDimitry Andric else
1426eb11fae6SDimitry Andric return false;
1427eb11fae6SDimitry Andric }
1428eb11fae6SDimitry Andric
1429eb11fae6SDimitry Andric // Recursively check whether the loop control flow of nested loops is
1430eb11fae6SDimitry Andric // understood.
1431eb11fae6SDimitry Andric for (Loop *SubLp : *Lp)
1432eb11fae6SDimitry Andric if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1433eb11fae6SDimitry Andric if (DoExtraAnalysis)
1434eb11fae6SDimitry Andric Result = false;
1435eb11fae6SDimitry Andric else
1436eb11fae6SDimitry Andric return false;
1437eb11fae6SDimitry Andric }
1438eb11fae6SDimitry Andric
1439eb11fae6SDimitry Andric return Result;
1440eb11fae6SDimitry Andric }
1441eb11fae6SDimitry Andric
canVectorize(bool UseVPlanNativePath)1442eb11fae6SDimitry Andric bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1443eb11fae6SDimitry Andric // Store the result and return it at the end instead of exiting early, in case
1444eb11fae6SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1445eb11fae6SDimitry Andric bool Result = true;
1446eb11fae6SDimitry Andric
1447eb11fae6SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1448eb11fae6SDimitry Andric // Check whether the loop-related control flow in the loop nest is expected by
1449eb11fae6SDimitry Andric // vectorizer.
1450eb11fae6SDimitry Andric if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1451eb11fae6SDimitry Andric if (DoExtraAnalysis)
1452eb11fae6SDimitry Andric Result = false;
1453eb11fae6SDimitry Andric else
1454eb11fae6SDimitry Andric return false;
1455eb11fae6SDimitry Andric }
1456eb11fae6SDimitry Andric
1457eb11fae6SDimitry Andric // We need to have a loop header.
1458eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1459eb11fae6SDimitry Andric << '\n');
1460eb11fae6SDimitry Andric
1461eb11fae6SDimitry Andric // Specific checks for outer loops. We skip the remaining legal checks at this
1462eb11fae6SDimitry Andric // point because they don't support outer loops.
1463b60736ecSDimitry Andric if (!TheLoop->isInnermost()) {
1464eb11fae6SDimitry Andric assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1465eb11fae6SDimitry Andric
1466eb11fae6SDimitry Andric if (!canVectorizeOuterLoop()) {
1467e6d15924SDimitry Andric reportVectorizationFailure("Unsupported outer loop",
1468e6d15924SDimitry Andric "unsupported outer loop",
14691d5ae102SDimitry Andric "UnsupportedOuterLoop",
14701d5ae102SDimitry Andric ORE, TheLoop);
1471eb11fae6SDimitry Andric // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1472eb11fae6SDimitry Andric // outer loops.
1473eb11fae6SDimitry Andric return false;
1474eb11fae6SDimitry Andric }
1475eb11fae6SDimitry Andric
1476eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1477eb11fae6SDimitry Andric return Result;
1478eb11fae6SDimitry Andric }
1479eb11fae6SDimitry Andric
1480b60736ecSDimitry Andric assert(TheLoop->isInnermost() && "Inner loop expected.");
1481eb11fae6SDimitry Andric // Check if we can if-convert non-single-bb loops.
1482eb11fae6SDimitry Andric unsigned NumBlocks = TheLoop->getNumBlocks();
1483eb11fae6SDimitry Andric if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1484eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1485eb11fae6SDimitry Andric if (DoExtraAnalysis)
1486eb11fae6SDimitry Andric Result = false;
1487eb11fae6SDimitry Andric else
1488eb11fae6SDimitry Andric return false;
1489eb11fae6SDimitry Andric }
1490eb11fae6SDimitry Andric
1491eb11fae6SDimitry Andric // Check if we can vectorize the instructions and CFG in this loop.
1492eb11fae6SDimitry Andric if (!canVectorizeInstrs()) {
1493eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1494eb11fae6SDimitry Andric if (DoExtraAnalysis)
1495eb11fae6SDimitry Andric Result = false;
1496eb11fae6SDimitry Andric else
1497eb11fae6SDimitry Andric return false;
1498eb11fae6SDimitry Andric }
1499eb11fae6SDimitry Andric
1500eb11fae6SDimitry Andric // Go over each instruction and look at memory deps.
1501eb11fae6SDimitry Andric if (!canVectorizeMemory()) {
1502eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1503eb11fae6SDimitry Andric if (DoExtraAnalysis)
1504eb11fae6SDimitry Andric Result = false;
1505eb11fae6SDimitry Andric else
1506eb11fae6SDimitry Andric return false;
1507eb11fae6SDimitry Andric }
1508eb11fae6SDimitry Andric
1509ac9a064cSDimitry Andric if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1510ac9a064cSDimitry Andric reportVectorizationFailure("could not determine number of loop iterations",
1511ac9a064cSDimitry Andric "could not determine number of loop iterations",
1512ac9a064cSDimitry Andric "CantComputeNumberOfIterations", ORE, TheLoop);
1513ac9a064cSDimitry Andric if (DoExtraAnalysis)
1514ac9a064cSDimitry Andric Result = false;
1515ac9a064cSDimitry Andric else
1516ac9a064cSDimitry Andric return false;
1517ac9a064cSDimitry Andric }
1518ac9a064cSDimitry Andric
1519eb11fae6SDimitry Andric LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1520eb11fae6SDimitry Andric << (LAI->getRuntimePointerChecking()->Need
1521eb11fae6SDimitry Andric ? " (with a runtime bound check)"
1522eb11fae6SDimitry Andric : "")
1523eb11fae6SDimitry Andric << "!\n");
1524eb11fae6SDimitry Andric
1525eb11fae6SDimitry Andric unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1526eb11fae6SDimitry Andric if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1527eb11fae6SDimitry Andric SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1528eb11fae6SDimitry Andric
1529145449b1SDimitry Andric if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
1530e6d15924SDimitry Andric reportVectorizationFailure("Too many SCEV checks needed",
1531e6d15924SDimitry Andric "Too many SCEV assumptions need to be made and checked at runtime",
15321d5ae102SDimitry Andric "TooManySCEVRunTimeChecks", ORE, TheLoop);
1533eb11fae6SDimitry Andric if (DoExtraAnalysis)
1534eb11fae6SDimitry Andric Result = false;
1535eb11fae6SDimitry Andric else
1536eb11fae6SDimitry Andric return false;
1537eb11fae6SDimitry Andric }
1538eb11fae6SDimitry Andric
1539eb11fae6SDimitry Andric // Okay! We've done all the tests. If any have failed, return false. Otherwise
1540eb11fae6SDimitry Andric // we can vectorize, and at this point we don't have any other mem analysis
1541eb11fae6SDimitry Andric // which may limit our maximum vectorization factor, so just return true with
1542eb11fae6SDimitry Andric // no restrictions.
1543eb11fae6SDimitry Andric return Result;
1544eb11fae6SDimitry Andric }
1545eb11fae6SDimitry Andric
canFoldTailByMasking() const1546ac9a064cSDimitry Andric bool LoopVectorizationLegality::canFoldTailByMasking() const {
1547d8e91e46SDimitry Andric
1548d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1549d8e91e46SDimitry Andric
15501d5ae102SDimitry Andric SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1551d8e91e46SDimitry Andric
1552e3b55780SDimitry Andric for (const auto &Reduction : getReductionVars())
15531d5ae102SDimitry Andric ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
15541d5ae102SDimitry Andric
15551d5ae102SDimitry Andric // TODO: handle non-reduction outside users when tail is folded by masking.
1556d8e91e46SDimitry Andric for (auto *AE : AllowedExit) {
15571d5ae102SDimitry Andric // Check that all users of allowed exit values are inside the loop or
15581d5ae102SDimitry Andric // are the live-out of a reduction.
15591d5ae102SDimitry Andric if (ReductionLiveOuts.count(AE))
15601d5ae102SDimitry Andric continue;
1561d8e91e46SDimitry Andric for (User *U : AE->users()) {
1562d8e91e46SDimitry Andric Instruction *UI = cast<Instruction>(U);
1563d8e91e46SDimitry Andric if (TheLoop->contains(UI))
1564d8e91e46SDimitry Andric continue;
1565b60736ecSDimitry Andric LLVM_DEBUG(
1566b60736ecSDimitry Andric dbgs()
1567b60736ecSDimitry Andric << "LV: Cannot fold tail by masking, loop has an outside user for "
1568b60736ecSDimitry Andric << *UI << "\n");
1569d8e91e46SDimitry Andric return false;
1570d8e91e46SDimitry Andric }
1571d8e91e46SDimitry Andric }
1572d8e91e46SDimitry Andric
1573ac9a064cSDimitry Andric for (const auto &Entry : getInductionVars()) {
1574ac9a064cSDimitry Andric PHINode *OrigPhi = Entry.first;
1575ac9a064cSDimitry Andric for (User *U : OrigPhi->users()) {
1576ac9a064cSDimitry Andric auto *UI = cast<Instruction>(U);
1577ac9a064cSDimitry Andric if (!TheLoop->contains(UI)) {
1578ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
1579ac9a064cSDimitry Andric "outside user for "
1580ac9a064cSDimitry Andric << *UI << "\n");
1581ac9a064cSDimitry Andric return false;
1582ac9a064cSDimitry Andric }
1583ac9a064cSDimitry Andric }
1584ac9a064cSDimitry Andric }
1585ac9a064cSDimitry Andric
1586d8e91e46SDimitry Andric // The list of pointers that we can safely read and write to remains empty.
1587d8e91e46SDimitry Andric SmallPtrSet<Value *, 8> SafePointers;
1588d8e91e46SDimitry Andric
1589ac9a064cSDimitry Andric // Check all blocks for predication, including those that ordinarily do not
1590ac9a064cSDimitry Andric // need predication such as the header block.
1591b60736ecSDimitry Andric SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
1592d8e91e46SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
1593b1c73532SDimitry Andric if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1594ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
1595d8e91e46SDimitry Andric return false;
1596d8e91e46SDimitry Andric }
1597d8e91e46SDimitry Andric }
1598d8e91e46SDimitry Andric
1599d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1600b60736ecSDimitry Andric
1601d8e91e46SDimitry Andric return true;
1602d8e91e46SDimitry Andric }
1603d8e91e46SDimitry Andric
prepareToFoldTailByMasking()1604ac9a064cSDimitry Andric void LoopVectorizationLegality::prepareToFoldTailByMasking() {
1605ac9a064cSDimitry Andric // The list of pointers that we can safely read and write to remains empty.
1606ac9a064cSDimitry Andric SmallPtrSet<Value *, 8> SafePointers;
1607ac9a064cSDimitry Andric
1608ac9a064cSDimitry Andric // Mark all blocks for predication, including those that ordinarily do not
1609ac9a064cSDimitry Andric // need predication such as the header block.
1610ac9a064cSDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
1611ac9a064cSDimitry Andric [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1612ac9a064cSDimitry Andric assert(R && "Must be able to predicate block when tail-folding.");
1613ac9a064cSDimitry Andric }
1614ac9a064cSDimitry Andric }
1615ac9a064cSDimitry Andric
1616eb11fae6SDimitry Andric } // namespace llvm
1617