1d8e91e46SDimitry Andric //===- VPlanSLP.cpp - SLP Analysis based on VPlan -------------------------===//
2d8e91e46SDimitry Andric //
3e6d15924SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e6d15924SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e6d15924SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6d8e91e46SDimitry Andric //
7d8e91e46SDimitry Andric //===----------------------------------------------------------------------===//
8d8e91e46SDimitry Andric /// This file implements SLP analysis based on VPlan. The analysis is based on
9d8e91e46SDimitry Andric /// the ideas described in
10d8e91e46SDimitry Andric ///
11d8e91e46SDimitry Andric /// Look-ahead SLP: auto-vectorization in the presence of commutative
12d8e91e46SDimitry Andric /// operations, CGO 2018 by Vasileios Porpodas, Rodrigo C. O. Rocha,
13d8e91e46SDimitry Andric /// Luís F. W. Góes
14d8e91e46SDimitry Andric ///
15d8e91e46SDimitry Andric //===----------------------------------------------------------------------===//
16d8e91e46SDimitry Andric
17d8e91e46SDimitry Andric #include "VPlan.h"
18145449b1SDimitry Andric #include "VPlanValue.h"
19145449b1SDimitry Andric #include "llvm/ADT/DenseMap.h"
20d8e91e46SDimitry Andric #include "llvm/ADT/SmallVector.h"
21d8e91e46SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
22d8e91e46SDimitry Andric #include "llvm/IR/Instruction.h"
23d8e91e46SDimitry Andric #include "llvm/IR/Instructions.h"
24d8e91e46SDimitry Andric #include "llvm/IR/Type.h"
25d8e91e46SDimitry Andric #include "llvm/IR/Value.h"
26d8e91e46SDimitry Andric #include "llvm/Support/Casting.h"
27d8e91e46SDimitry Andric #include "llvm/Support/Debug.h"
28d8e91e46SDimitry Andric #include "llvm/Support/ErrorHandling.h"
29d8e91e46SDimitry Andric #include "llvm/Support/raw_ostream.h"
30344a3780SDimitry Andric #include <algorithm>
31d8e91e46SDimitry Andric #include <cassert>
32e3b55780SDimitry Andric #include <optional>
33344a3780SDimitry Andric #include <utility>
34d8e91e46SDimitry Andric
35d8e91e46SDimitry Andric using namespace llvm;
36d8e91e46SDimitry Andric
37d8e91e46SDimitry Andric #define DEBUG_TYPE "vplan-slp"
38d8e91e46SDimitry Andric
39d8e91e46SDimitry Andric // Number of levels to look ahead when re-ordering multi node operands.
40d8e91e46SDimitry Andric static unsigned LookaheadMaxDepth = 5;
41d8e91e46SDimitry Andric
markFailed()42d8e91e46SDimitry Andric VPInstruction *VPlanSlp::markFailed() {
43d8e91e46SDimitry Andric // FIXME: Currently this is used to signal we hit instructions we cannot
44d8e91e46SDimitry Andric // trivially SLP'ize.
45d8e91e46SDimitry Andric CompletelySLP = false;
46d8e91e46SDimitry Andric return nullptr;
47d8e91e46SDimitry Andric }
48d8e91e46SDimitry Andric
addCombined(ArrayRef<VPValue * > Operands,VPInstruction * New)49d8e91e46SDimitry Andric void VPlanSlp::addCombined(ArrayRef<VPValue *> Operands, VPInstruction *New) {
50d8e91e46SDimitry Andric if (all_of(Operands, [](VPValue *V) {
51d8e91e46SDimitry Andric return cast<VPInstruction>(V)->getUnderlyingInstr();
52d8e91e46SDimitry Andric })) {
53d8e91e46SDimitry Andric unsigned BundleSize = 0;
54d8e91e46SDimitry Andric for (VPValue *V : Operands) {
55d8e91e46SDimitry Andric Type *T = cast<VPInstruction>(V)->getUnderlyingInstr()->getType();
56d8e91e46SDimitry Andric assert(!T->isVectorTy() && "Only scalar types supported for now");
57d8e91e46SDimitry Andric BundleSize += T->getScalarSizeInBits();
58d8e91e46SDimitry Andric }
59d8e91e46SDimitry Andric WidestBundleBits = std::max(WidestBundleBits, BundleSize);
60d8e91e46SDimitry Andric }
61d8e91e46SDimitry Andric
62d8e91e46SDimitry Andric auto Res = BundleToCombined.try_emplace(to_vector<4>(Operands), New);
63d8e91e46SDimitry Andric assert(Res.second &&
64d8e91e46SDimitry Andric "Already created a combined instruction for the operand bundle");
65d8e91e46SDimitry Andric (void)Res;
66d8e91e46SDimitry Andric }
67d8e91e46SDimitry Andric
areVectorizable(ArrayRef<VPValue * > Operands) const68d8e91e46SDimitry Andric bool VPlanSlp::areVectorizable(ArrayRef<VPValue *> Operands) const {
69d8e91e46SDimitry Andric // Currently we only support VPInstructions.
70d8e91e46SDimitry Andric if (!all_of(Operands, [](VPValue *Op) {
71d8e91e46SDimitry Andric return Op && isa<VPInstruction>(Op) &&
72d8e91e46SDimitry Andric cast<VPInstruction>(Op)->getUnderlyingInstr();
73d8e91e46SDimitry Andric })) {
74d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: not all operands are VPInstructions\n");
75d8e91e46SDimitry Andric return false;
76d8e91e46SDimitry Andric }
77d8e91e46SDimitry Andric
78d8e91e46SDimitry Andric // Check if opcodes and type width agree for all instructions in the bundle.
79d8e91e46SDimitry Andric // FIXME: Differing widths/opcodes can be handled by inserting additional
80d8e91e46SDimitry Andric // instructions.
81d8e91e46SDimitry Andric // FIXME: Deal with non-primitive types.
82d8e91e46SDimitry Andric const Instruction *OriginalInstr =
83d8e91e46SDimitry Andric cast<VPInstruction>(Operands[0])->getUnderlyingInstr();
84d8e91e46SDimitry Andric unsigned Opcode = OriginalInstr->getOpcode();
85d8e91e46SDimitry Andric unsigned Width = OriginalInstr->getType()->getPrimitiveSizeInBits();
86d8e91e46SDimitry Andric if (!all_of(Operands, [Opcode, Width](VPValue *Op) {
87d8e91e46SDimitry Andric const Instruction *I = cast<VPInstruction>(Op)->getUnderlyingInstr();
88d8e91e46SDimitry Andric return I->getOpcode() == Opcode &&
89d8e91e46SDimitry Andric I->getType()->getPrimitiveSizeInBits() == Width;
90d8e91e46SDimitry Andric })) {
91d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: Opcodes do not agree \n");
92d8e91e46SDimitry Andric return false;
93d8e91e46SDimitry Andric }
94d8e91e46SDimitry Andric
95d8e91e46SDimitry Andric // For now, all operands must be defined in the same BB.
96d8e91e46SDimitry Andric if (any_of(Operands, [this](VPValue *Op) {
97d8e91e46SDimitry Andric return cast<VPInstruction>(Op)->getParent() != &this->BB;
98d8e91e46SDimitry Andric })) {
99d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: operands in different BBs\n");
100d8e91e46SDimitry Andric return false;
101d8e91e46SDimitry Andric }
102d8e91e46SDimitry Andric
103d8e91e46SDimitry Andric if (any_of(Operands,
104d8e91e46SDimitry Andric [](VPValue *Op) { return Op->hasMoreThanOneUniqueUser(); })) {
105d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: Some operands have multiple users.\n");
106d8e91e46SDimitry Andric return false;
107d8e91e46SDimitry Andric }
108d8e91e46SDimitry Andric
109d8e91e46SDimitry Andric // For loads, check that there are no instructions writing to memory in
110d8e91e46SDimitry Andric // between them.
111d8e91e46SDimitry Andric // TODO: we only have to forbid instructions writing to memory that could
112d8e91e46SDimitry Andric // interfere with any of the loads in the bundle
113d8e91e46SDimitry Andric if (Opcode == Instruction::Load) {
114d8e91e46SDimitry Andric unsigned LoadsSeen = 0;
115d8e91e46SDimitry Andric VPBasicBlock *Parent = cast<VPInstruction>(Operands[0])->getParent();
116d8e91e46SDimitry Andric for (auto &I : *Parent) {
117344a3780SDimitry Andric auto *VPI = dyn_cast<VPInstruction>(&I);
118344a3780SDimitry Andric if (!VPI)
119344a3780SDimitry Andric break;
120d8e91e46SDimitry Andric if (VPI->getOpcode() == Instruction::Load &&
121b60736ecSDimitry Andric llvm::is_contained(Operands, VPI))
122d8e91e46SDimitry Andric LoadsSeen++;
123d8e91e46SDimitry Andric
124d8e91e46SDimitry Andric if (LoadsSeen == Operands.size())
125d8e91e46SDimitry Andric break;
126d8e91e46SDimitry Andric if (LoadsSeen > 0 && VPI->mayWriteToMemory()) {
127d8e91e46SDimitry Andric LLVM_DEBUG(
128d8e91e46SDimitry Andric dbgs() << "VPSLP: instruction modifying memory between loads\n");
129d8e91e46SDimitry Andric return false;
130d8e91e46SDimitry Andric }
131d8e91e46SDimitry Andric }
132d8e91e46SDimitry Andric
133d8e91e46SDimitry Andric if (!all_of(Operands, [](VPValue *Op) {
134d8e91e46SDimitry Andric return cast<LoadInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
135d8e91e46SDimitry Andric ->isSimple();
136d8e91e46SDimitry Andric })) {
137d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: only simple loads are supported.\n");
138d8e91e46SDimitry Andric return false;
139d8e91e46SDimitry Andric }
140d8e91e46SDimitry Andric }
141d8e91e46SDimitry Andric
142d8e91e46SDimitry Andric if (Opcode == Instruction::Store)
143d8e91e46SDimitry Andric if (!all_of(Operands, [](VPValue *Op) {
144d8e91e46SDimitry Andric return cast<StoreInst>(cast<VPInstruction>(Op)->getUnderlyingInstr())
145d8e91e46SDimitry Andric ->isSimple();
146d8e91e46SDimitry Andric })) {
147d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "VPSLP: only simple stores are supported.\n");
148d8e91e46SDimitry Andric return false;
149d8e91e46SDimitry Andric }
150d8e91e46SDimitry Andric
151d8e91e46SDimitry Andric return true;
152d8e91e46SDimitry Andric }
153d8e91e46SDimitry Andric
getOperands(ArrayRef<VPValue * > Values,unsigned OperandIndex)154d8e91e46SDimitry Andric static SmallVector<VPValue *, 4> getOperands(ArrayRef<VPValue *> Values,
155d8e91e46SDimitry Andric unsigned OperandIndex) {
156d8e91e46SDimitry Andric SmallVector<VPValue *, 4> Operands;
157d8e91e46SDimitry Andric for (VPValue *V : Values) {
158b60736ecSDimitry Andric // Currently we only support VPInstructions.
159b60736ecSDimitry Andric auto *U = cast<VPInstruction>(V);
160d8e91e46SDimitry Andric Operands.push_back(U->getOperand(OperandIndex));
161d8e91e46SDimitry Andric }
162d8e91e46SDimitry Andric return Operands;
163d8e91e46SDimitry Andric }
164d8e91e46SDimitry Andric
areCommutative(ArrayRef<VPValue * > Values)165d8e91e46SDimitry Andric static bool areCommutative(ArrayRef<VPValue *> Values) {
166d8e91e46SDimitry Andric return Instruction::isCommutative(
167d8e91e46SDimitry Andric cast<VPInstruction>(Values[0])->getOpcode());
168d8e91e46SDimitry Andric }
169d8e91e46SDimitry Andric
170d8e91e46SDimitry Andric static SmallVector<SmallVector<VPValue *, 4>, 4>
getOperands(ArrayRef<VPValue * > Values)171d8e91e46SDimitry Andric getOperands(ArrayRef<VPValue *> Values) {
172d8e91e46SDimitry Andric SmallVector<SmallVector<VPValue *, 4>, 4> Result;
173d8e91e46SDimitry Andric auto *VPI = cast<VPInstruction>(Values[0]);
174d8e91e46SDimitry Andric
175d8e91e46SDimitry Andric switch (VPI->getOpcode()) {
176d8e91e46SDimitry Andric case Instruction::Load:
177d8e91e46SDimitry Andric llvm_unreachable("Loads terminate a tree, no need to get operands");
178d8e91e46SDimitry Andric case Instruction::Store:
179d8e91e46SDimitry Andric Result.push_back(getOperands(Values, 0));
180d8e91e46SDimitry Andric break;
181d8e91e46SDimitry Andric default:
182d8e91e46SDimitry Andric for (unsigned I = 0, NumOps = VPI->getNumOperands(); I < NumOps; ++I)
183d8e91e46SDimitry Andric Result.push_back(getOperands(Values, I));
184d8e91e46SDimitry Andric break;
185d8e91e46SDimitry Andric }
186d8e91e46SDimitry Andric
187d8e91e46SDimitry Andric return Result;
188d8e91e46SDimitry Andric }
189d8e91e46SDimitry Andric
190d8e91e46SDimitry Andric /// Returns the opcode of Values or ~0 if they do not all agree.
getOpcode(ArrayRef<VPValue * > Values)191e3b55780SDimitry Andric static std::optional<unsigned> getOpcode(ArrayRef<VPValue *> Values) {
192d8e91e46SDimitry Andric unsigned Opcode = cast<VPInstruction>(Values[0])->getOpcode();
193d8e91e46SDimitry Andric if (any_of(Values, [Opcode](VPValue *V) {
194d8e91e46SDimitry Andric return cast<VPInstruction>(V)->getOpcode() != Opcode;
195d8e91e46SDimitry Andric }))
196e3b55780SDimitry Andric return std::nullopt;
197d8e91e46SDimitry Andric return {Opcode};
198d8e91e46SDimitry Andric }
199d8e91e46SDimitry Andric
200d8e91e46SDimitry Andric /// Returns true if A and B access sequential memory if they are loads or
201d8e91e46SDimitry Andric /// stores or if they have identical opcodes otherwise.
areConsecutiveOrMatch(VPInstruction * A,VPInstruction * B,VPInterleavedAccessInfo & IAI)202d8e91e46SDimitry Andric static bool areConsecutiveOrMatch(VPInstruction *A, VPInstruction *B,
203d8e91e46SDimitry Andric VPInterleavedAccessInfo &IAI) {
204d8e91e46SDimitry Andric if (A->getOpcode() != B->getOpcode())
205d8e91e46SDimitry Andric return false;
206d8e91e46SDimitry Andric
207d8e91e46SDimitry Andric if (A->getOpcode() != Instruction::Load &&
208d8e91e46SDimitry Andric A->getOpcode() != Instruction::Store)
209d8e91e46SDimitry Andric return true;
210d8e91e46SDimitry Andric auto *GA = IAI.getInterleaveGroup(A);
211d8e91e46SDimitry Andric auto *GB = IAI.getInterleaveGroup(B);
212d8e91e46SDimitry Andric
213d8e91e46SDimitry Andric return GA && GB && GA == GB && GA->getIndex(A) + 1 == GB->getIndex(B);
214d8e91e46SDimitry Andric }
215d8e91e46SDimitry Andric
216d8e91e46SDimitry Andric /// Implements getLAScore from Listing 7 in the paper.
217d8e91e46SDimitry Andric /// Traverses and compares operands of V1 and V2 to MaxLevel.
getLAScore(VPValue * V1,VPValue * V2,unsigned MaxLevel,VPInterleavedAccessInfo & IAI)218d8e91e46SDimitry Andric static unsigned getLAScore(VPValue *V1, VPValue *V2, unsigned MaxLevel,
219d8e91e46SDimitry Andric VPInterleavedAccessInfo &IAI) {
220b60736ecSDimitry Andric auto *I1 = dyn_cast<VPInstruction>(V1);
221b60736ecSDimitry Andric auto *I2 = dyn_cast<VPInstruction>(V2);
222b60736ecSDimitry Andric // Currently we only support VPInstructions.
223b60736ecSDimitry Andric if (!I1 || !I2)
224d8e91e46SDimitry Andric return 0;
225d8e91e46SDimitry Andric
226d8e91e46SDimitry Andric if (MaxLevel == 0)
227b60736ecSDimitry Andric return (unsigned)areConsecutiveOrMatch(I1, I2, IAI);
228d8e91e46SDimitry Andric
229d8e91e46SDimitry Andric unsigned Score = 0;
230b60736ecSDimitry Andric for (unsigned I = 0, EV1 = I1->getNumOperands(); I < EV1; ++I)
231b60736ecSDimitry Andric for (unsigned J = 0, EV2 = I2->getNumOperands(); J < EV2; ++J)
232b60736ecSDimitry Andric Score +=
233b60736ecSDimitry Andric getLAScore(I1->getOperand(I), I2->getOperand(J), MaxLevel - 1, IAI);
234d8e91e46SDimitry Andric return Score;
235d8e91e46SDimitry Andric }
236d8e91e46SDimitry Andric
237d8e91e46SDimitry Andric std::pair<VPlanSlp::OpMode, VPValue *>
getBest(OpMode Mode,VPValue * Last,SmallPtrSetImpl<VPValue * > & Candidates,VPInterleavedAccessInfo & IAI)238d8e91e46SDimitry Andric VPlanSlp::getBest(OpMode Mode, VPValue *Last,
239d8e91e46SDimitry Andric SmallPtrSetImpl<VPValue *> &Candidates,
240d8e91e46SDimitry Andric VPInterleavedAccessInfo &IAI) {
241d8e91e46SDimitry Andric assert((Mode == OpMode::Load || Mode == OpMode::Opcode) &&
242d8e91e46SDimitry Andric "Currently we only handle load and commutative opcodes");
243d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " getBest\n");
244d8e91e46SDimitry Andric
245d8e91e46SDimitry Andric SmallVector<VPValue *, 4> BestCandidates;
246d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " Candidates for "
247d8e91e46SDimitry Andric << *cast<VPInstruction>(Last)->getUnderlyingInstr() << " ");
248d8e91e46SDimitry Andric for (auto *Candidate : Candidates) {
249d8e91e46SDimitry Andric auto *LastI = cast<VPInstruction>(Last);
250d8e91e46SDimitry Andric auto *CandidateI = cast<VPInstruction>(Candidate);
251d8e91e46SDimitry Andric if (areConsecutiveOrMatch(LastI, CandidateI, IAI)) {
252d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << *cast<VPInstruction>(Candidate)->getUnderlyingInstr()
253d8e91e46SDimitry Andric << " ");
254d8e91e46SDimitry Andric BestCandidates.push_back(Candidate);
255d8e91e46SDimitry Andric }
256d8e91e46SDimitry Andric }
257d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
258d8e91e46SDimitry Andric
259d8e91e46SDimitry Andric if (BestCandidates.empty())
260d8e91e46SDimitry Andric return {OpMode::Failed, nullptr};
261d8e91e46SDimitry Andric
262d8e91e46SDimitry Andric if (BestCandidates.size() == 1)
263d8e91e46SDimitry Andric return {Mode, BestCandidates[0]};
264d8e91e46SDimitry Andric
265d8e91e46SDimitry Andric VPValue *Best = nullptr;
266d8e91e46SDimitry Andric unsigned BestScore = 0;
267d8e91e46SDimitry Andric for (unsigned Depth = 1; Depth < LookaheadMaxDepth; Depth++) {
268d8e91e46SDimitry Andric unsigned PrevScore = ~0u;
269d8e91e46SDimitry Andric bool AllSame = true;
270d8e91e46SDimitry Andric
271d8e91e46SDimitry Andric // FIXME: Avoid visiting the same operands multiple times.
272d8e91e46SDimitry Andric for (auto *Candidate : BestCandidates) {
273d8e91e46SDimitry Andric unsigned Score = getLAScore(Last, Candidate, Depth, IAI);
274d8e91e46SDimitry Andric if (PrevScore == ~0u)
275d8e91e46SDimitry Andric PrevScore = Score;
276d8e91e46SDimitry Andric if (PrevScore != Score)
277d8e91e46SDimitry Andric AllSame = false;
278d8e91e46SDimitry Andric PrevScore = Score;
279d8e91e46SDimitry Andric
280d8e91e46SDimitry Andric if (Score > BestScore) {
281d8e91e46SDimitry Andric BestScore = Score;
282d8e91e46SDimitry Andric Best = Candidate;
283d8e91e46SDimitry Andric }
284d8e91e46SDimitry Andric }
285d8e91e46SDimitry Andric if (!AllSame)
286d8e91e46SDimitry Andric break;
287d8e91e46SDimitry Andric }
288d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "Found best "
289d8e91e46SDimitry Andric << *cast<VPInstruction>(Best)->getUnderlyingInstr()
290d8e91e46SDimitry Andric << "\n");
291d8e91e46SDimitry Andric Candidates.erase(Best);
292d8e91e46SDimitry Andric
293d8e91e46SDimitry Andric return {Mode, Best};
294d8e91e46SDimitry Andric }
295d8e91e46SDimitry Andric
reorderMultiNodeOps()296d8e91e46SDimitry Andric SmallVector<VPlanSlp::MultiNodeOpTy, 4> VPlanSlp::reorderMultiNodeOps() {
297d8e91e46SDimitry Andric SmallVector<MultiNodeOpTy, 4> FinalOrder;
298d8e91e46SDimitry Andric SmallVector<OpMode, 4> Mode;
299d8e91e46SDimitry Andric FinalOrder.reserve(MultiNodeOps.size());
300d8e91e46SDimitry Andric Mode.reserve(MultiNodeOps.size());
301d8e91e46SDimitry Andric
302d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "Reordering multinode\n");
303d8e91e46SDimitry Andric
304d8e91e46SDimitry Andric for (auto &Operands : MultiNodeOps) {
305d8e91e46SDimitry Andric FinalOrder.push_back({Operands.first, {Operands.second[0]}});
306d8e91e46SDimitry Andric if (cast<VPInstruction>(Operands.second[0])->getOpcode() ==
307d8e91e46SDimitry Andric Instruction::Load)
308d8e91e46SDimitry Andric Mode.push_back(OpMode::Load);
309d8e91e46SDimitry Andric else
310d8e91e46SDimitry Andric Mode.push_back(OpMode::Opcode);
311d8e91e46SDimitry Andric }
312d8e91e46SDimitry Andric
313d8e91e46SDimitry Andric for (unsigned Lane = 1, E = MultiNodeOps[0].second.size(); Lane < E; ++Lane) {
314d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " Finding best value for lane " << Lane << "\n");
315d8e91e46SDimitry Andric SmallPtrSet<VPValue *, 4> Candidates;
316d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " Candidates ");
317d8e91e46SDimitry Andric for (auto Ops : MultiNodeOps) {
318d8e91e46SDimitry Andric LLVM_DEBUG(
319d8e91e46SDimitry Andric dbgs() << *cast<VPInstruction>(Ops.second[Lane])->getUnderlyingInstr()
320d8e91e46SDimitry Andric << " ");
321d8e91e46SDimitry Andric Candidates.insert(Ops.second[Lane]);
322d8e91e46SDimitry Andric }
323d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
324d8e91e46SDimitry Andric
325d8e91e46SDimitry Andric for (unsigned Op = 0, E = MultiNodeOps.size(); Op < E; ++Op) {
326d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " Checking " << Op << "\n");
327d8e91e46SDimitry Andric if (Mode[Op] == OpMode::Failed)
328d8e91e46SDimitry Andric continue;
329d8e91e46SDimitry Andric
330d8e91e46SDimitry Andric VPValue *Last = FinalOrder[Op].second[Lane - 1];
331d8e91e46SDimitry Andric std::pair<OpMode, VPValue *> Res =
332d8e91e46SDimitry Andric getBest(Mode[Op], Last, Candidates, IAI);
333d8e91e46SDimitry Andric if (Res.second)
334d8e91e46SDimitry Andric FinalOrder[Op].second.push_back(Res.second);
335d8e91e46SDimitry Andric else
336d8e91e46SDimitry Andric // TODO: handle this case
337d8e91e46SDimitry Andric FinalOrder[Op].second.push_back(markFailed());
338d8e91e46SDimitry Andric }
339d8e91e46SDimitry Andric }
340d8e91e46SDimitry Andric
341d8e91e46SDimitry Andric return FinalOrder;
342d8e91e46SDimitry Andric }
343d8e91e46SDimitry Andric
344344a3780SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
dumpBundle(ArrayRef<VPValue * > Values)345d8e91e46SDimitry Andric void VPlanSlp::dumpBundle(ArrayRef<VPValue *> Values) {
346d8e91e46SDimitry Andric dbgs() << " Ops: ";
347e3b55780SDimitry Andric for (auto *Op : Values) {
3481d5ae102SDimitry Andric if (auto *VPInstr = cast_or_null<VPInstruction>(Op))
3491d5ae102SDimitry Andric if (auto *Instr = VPInstr->getUnderlyingInstr()) {
350d8e91e46SDimitry Andric dbgs() << *Instr << " | ";
3511d5ae102SDimitry Andric continue;
3521d5ae102SDimitry Andric }
353d8e91e46SDimitry Andric dbgs() << " nullptr | ";
3541d5ae102SDimitry Andric }
355d8e91e46SDimitry Andric dbgs() << "\n";
356d8e91e46SDimitry Andric }
357344a3780SDimitry Andric #endif
358d8e91e46SDimitry Andric
buildGraph(ArrayRef<VPValue * > Values)359d8e91e46SDimitry Andric VPInstruction *VPlanSlp::buildGraph(ArrayRef<VPValue *> Values) {
360d8e91e46SDimitry Andric assert(!Values.empty() && "Need some operands!");
361d8e91e46SDimitry Andric
362d8e91e46SDimitry Andric // If we already visited this instruction bundle, re-use the existing node
363d8e91e46SDimitry Andric auto I = BundleToCombined.find(to_vector<4>(Values));
364d8e91e46SDimitry Andric if (I != BundleToCombined.end()) {
365d8e91e46SDimitry Andric #ifndef NDEBUG
366d8e91e46SDimitry Andric // Check that the resulting graph is a tree. If we re-use a node, this means
367d8e91e46SDimitry Andric // its values have multiple users. We only allow this, if all users of each
368d8e91e46SDimitry Andric // value are the same instruction.
369d8e91e46SDimitry Andric for (auto *V : Values) {
370d8e91e46SDimitry Andric auto UI = V->user_begin();
371d8e91e46SDimitry Andric auto *FirstUser = *UI++;
372d8e91e46SDimitry Andric while (UI != V->user_end()) {
373d8e91e46SDimitry Andric assert(*UI == FirstUser && "Currently we only support SLP trees.");
374d8e91e46SDimitry Andric UI++;
375d8e91e46SDimitry Andric }
376d8e91e46SDimitry Andric }
377d8e91e46SDimitry Andric #endif
378d8e91e46SDimitry Andric return I->second;
379d8e91e46SDimitry Andric }
380d8e91e46SDimitry Andric
381d8e91e46SDimitry Andric // Dump inputs
382d8e91e46SDimitry Andric LLVM_DEBUG({
383d8e91e46SDimitry Andric dbgs() << "buildGraph: ";
384d8e91e46SDimitry Andric dumpBundle(Values);
385d8e91e46SDimitry Andric });
386d8e91e46SDimitry Andric
387d8e91e46SDimitry Andric if (!areVectorizable(Values))
388d8e91e46SDimitry Andric return markFailed();
389d8e91e46SDimitry Andric
390d8e91e46SDimitry Andric assert(getOpcode(Values) && "Opcodes for all values must match");
391145449b1SDimitry Andric unsigned ValuesOpcode = *getOpcode(Values);
392d8e91e46SDimitry Andric
393d8e91e46SDimitry Andric SmallVector<VPValue *, 4> CombinedOperands;
394d8e91e46SDimitry Andric if (areCommutative(Values)) {
395d8e91e46SDimitry Andric bool MultiNodeRoot = !MultiNodeActive;
396d8e91e46SDimitry Andric MultiNodeActive = true;
397d8e91e46SDimitry Andric for (auto &Operands : getOperands(Values)) {
398d8e91e46SDimitry Andric LLVM_DEBUG({
399d8e91e46SDimitry Andric dbgs() << " Visiting Commutative";
400d8e91e46SDimitry Andric dumpBundle(Operands);
401d8e91e46SDimitry Andric });
402d8e91e46SDimitry Andric
403d8e91e46SDimitry Andric auto OperandsOpcode = getOpcode(Operands);
404d8e91e46SDimitry Andric if (OperandsOpcode && OperandsOpcode == getOpcode(Values)) {
405d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " Same opcode, continue building\n");
406d8e91e46SDimitry Andric CombinedOperands.push_back(buildGraph(Operands));
407d8e91e46SDimitry Andric } else {
408d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " Adding multinode Ops\n");
409d8e91e46SDimitry Andric // Create dummy VPInstruction, which will we replace later by the
410d8e91e46SDimitry Andric // re-ordered operand.
411d8e91e46SDimitry Andric VPInstruction *Op = new VPInstruction(0, {});
412d8e91e46SDimitry Andric CombinedOperands.push_back(Op);
413d8e91e46SDimitry Andric MultiNodeOps.emplace_back(Op, Operands);
414d8e91e46SDimitry Andric }
415d8e91e46SDimitry Andric }
416d8e91e46SDimitry Andric
417d8e91e46SDimitry Andric if (MultiNodeRoot) {
418d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "Reorder \n");
419d8e91e46SDimitry Andric MultiNodeActive = false;
420d8e91e46SDimitry Andric
421d8e91e46SDimitry Andric auto FinalOrder = reorderMultiNodeOps();
422d8e91e46SDimitry Andric
423d8e91e46SDimitry Andric MultiNodeOps.clear();
424d8e91e46SDimitry Andric for (auto &Ops : FinalOrder) {
425d8e91e46SDimitry Andric VPInstruction *NewOp = buildGraph(Ops.second);
426d8e91e46SDimitry Andric Ops.first->replaceAllUsesWith(NewOp);
427d8e91e46SDimitry Andric for (unsigned i = 0; i < CombinedOperands.size(); i++)
428d8e91e46SDimitry Andric if (CombinedOperands[i] == Ops.first)
429d8e91e46SDimitry Andric CombinedOperands[i] = NewOp;
430d8e91e46SDimitry Andric delete Ops.first;
431d8e91e46SDimitry Andric Ops.first = NewOp;
432d8e91e46SDimitry Andric }
433d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << "Found final order\n");
434d8e91e46SDimitry Andric }
435d8e91e46SDimitry Andric } else {
436d8e91e46SDimitry Andric LLVM_DEBUG(dbgs() << " NonCommuntative\n");
437d8e91e46SDimitry Andric if (ValuesOpcode == Instruction::Load)
438d8e91e46SDimitry Andric for (VPValue *V : Values)
439d8e91e46SDimitry Andric CombinedOperands.push_back(cast<VPInstruction>(V)->getOperand(0));
440d8e91e46SDimitry Andric else
441d8e91e46SDimitry Andric for (auto &Operands : getOperands(Values))
442d8e91e46SDimitry Andric CombinedOperands.push_back(buildGraph(Operands));
443d8e91e46SDimitry Andric }
444d8e91e46SDimitry Andric
445d8e91e46SDimitry Andric unsigned Opcode;
446d8e91e46SDimitry Andric switch (ValuesOpcode) {
447d8e91e46SDimitry Andric case Instruction::Load:
448d8e91e46SDimitry Andric Opcode = VPInstruction::SLPLoad;
449d8e91e46SDimitry Andric break;
450d8e91e46SDimitry Andric case Instruction::Store:
451d8e91e46SDimitry Andric Opcode = VPInstruction::SLPStore;
452d8e91e46SDimitry Andric break;
453d8e91e46SDimitry Andric default:
454d8e91e46SDimitry Andric Opcode = ValuesOpcode;
455d8e91e46SDimitry Andric break;
456d8e91e46SDimitry Andric }
457d8e91e46SDimitry Andric
458d8e91e46SDimitry Andric if (!CompletelySLP)
459d8e91e46SDimitry Andric return markFailed();
460d8e91e46SDimitry Andric
461d8e91e46SDimitry Andric assert(CombinedOperands.size() > 0 && "Need more some operands");
46277fc4c14SDimitry Andric auto *Inst = cast<VPInstruction>(Values[0])->getUnderlyingInstr();
46377fc4c14SDimitry Andric auto *VPI = new VPInstruction(Opcode, CombinedOperands, Inst->getDebugLoc());
464d8e91e46SDimitry Andric
465b60736ecSDimitry Andric LLVM_DEBUG(dbgs() << "Create VPInstruction " << *VPI << " "
466b60736ecSDimitry Andric << *cast<VPInstruction>(Values[0]) << "\n");
467d8e91e46SDimitry Andric addCombined(Values, VPI);
468d8e91e46SDimitry Andric return VPI;
469d8e91e46SDimitry Andric }
470