xref: /src/contrib/llvm-project/llvm/lib/CodeGen/WindowScheduler.cpp (revision 6c4b055cfb6bf549e9145dde6454cc6b178c35e4)
1ac9a064cSDimitry Andric //======----------- WindowScheduler.cpp - window scheduler -------------======//
2ac9a064cSDimitry Andric //
3ac9a064cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ac9a064cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5ac9a064cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ac9a064cSDimitry Andric //
7ac9a064cSDimitry Andric //===----------------------------------------------------------------------===//
8ac9a064cSDimitry Andric //
9ac9a064cSDimitry Andric // An implementation of the Window Scheduling software pipelining algorithm.
10ac9a064cSDimitry Andric //
11ac9a064cSDimitry Andric // The fundamental concept of the window scheduling algorithm involves folding
12ac9a064cSDimitry Andric // the original MBB at a specific position, followed by list scheduling on the
13ac9a064cSDimitry Andric // folded MIs. The optimal scheduling result is then chosen from various folding
14ac9a064cSDimitry Andric // positions as the final scheduling outcome.
15ac9a064cSDimitry Andric //
16ac9a064cSDimitry Andric // The primary challenge in this algorithm lies in generating the folded MIs and
17ac9a064cSDimitry Andric // establishing their dependencies. We have innovatively employed a new MBB,
18ac9a064cSDimitry Andric // created by copying the original MBB three times, known as TripleMBB. This
19ac9a064cSDimitry Andric // TripleMBB enables the convenient implementation of MI folding and dependency
20ac9a064cSDimitry Andric // establishment. To facilitate the algorithm's implementation, we have also
21ac9a064cSDimitry Andric // devised data structures such as OriMIs, TriMIs, TriToOri, and OriToCycle.
22ac9a064cSDimitry Andric //
23ac9a064cSDimitry Andric // Another challenge in the algorithm is the scheduling of phis. Semantically,
24ac9a064cSDimitry Andric // it is difficult to place the phis in the window and perform list scheduling.
25ac9a064cSDimitry Andric // Therefore, we schedule these phis separately after each list scheduling.
26ac9a064cSDimitry Andric //
27ac9a064cSDimitry Andric // The provided implementation is designed for use before the Register Allocator
28ac9a064cSDimitry Andric // (RA). If the target requires implementation after RA, it is recommended to
29ac9a064cSDimitry Andric // reimplement analyseII(), schedulePhi(), and expand(). Additionally,
30ac9a064cSDimitry Andric // target-specific logic can be added in initialize(), preProcess(), and
31ac9a064cSDimitry Andric // postProcess().
32ac9a064cSDimitry Andric //
33ac9a064cSDimitry Andric // Lastly, it is worth mentioning that getSearchIndexes() is an important
34ac9a064cSDimitry Andric // function. We have experimented with more complex heuristics on downstream
35ac9a064cSDimitry Andric // target and achieved favorable results.
36ac9a064cSDimitry Andric //
37ac9a064cSDimitry Andric //===----------------------------------------------------------------------===//
38ac9a064cSDimitry Andric #include "llvm/CodeGen/WindowScheduler.h"
39ac9a064cSDimitry Andric #include "llvm/ADT/Statistic.h"
40ac9a064cSDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
41ac9a064cSDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
42ac9a064cSDimitry Andric #include "llvm/CodeGen/MachinePipeliner.h"
43ac9a064cSDimitry Andric #include "llvm/CodeGen/ModuloSchedule.h"
44ac9a064cSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
45ac9a064cSDimitry Andric #include "llvm/Support/CommandLine.h"
46ac9a064cSDimitry Andric #include "llvm/Support/Debug.h"
47ac9a064cSDimitry Andric #include "llvm/Support/TimeProfiler.h"
48ac9a064cSDimitry Andric 
49ac9a064cSDimitry Andric using namespace llvm;
50ac9a064cSDimitry Andric 
51ac9a064cSDimitry Andric #define DEBUG_TYPE "pipeliner"
52ac9a064cSDimitry Andric 
53ac9a064cSDimitry Andric namespace {
54ac9a064cSDimitry Andric STATISTIC(NumTryWindowSchedule,
55ac9a064cSDimitry Andric           "Number of loops that we attempt to use window scheduling");
56ac9a064cSDimitry Andric STATISTIC(NumTryWindowSearch,
57ac9a064cSDimitry Andric           "Number of times that we run list schedule in the window scheduling");
58ac9a064cSDimitry Andric STATISTIC(NumWindowSchedule,
59ac9a064cSDimitry Andric           "Number of loops that we successfully use window scheduling");
60ac9a064cSDimitry Andric STATISTIC(NumFailAnalyseII,
61ac9a064cSDimitry Andric           "Window scheduling abort due to the failure of the II analysis");
62ac9a064cSDimitry Andric 
63ac9a064cSDimitry Andric cl::opt<unsigned>
64ac9a064cSDimitry Andric     WindowSearchNum("window-search-num",
65ac9a064cSDimitry Andric                     cl::desc("The number of searches per loop in the window "
66ac9a064cSDimitry Andric                              "algorithm. 0 means no search number limit."),
67ac9a064cSDimitry Andric                     cl::Hidden, cl::init(6));
68ac9a064cSDimitry Andric 
69ac9a064cSDimitry Andric cl::opt<unsigned> WindowSearchRatio(
70ac9a064cSDimitry Andric     "window-search-ratio",
71ac9a064cSDimitry Andric     cl::desc("The ratio of searches per loop in the window algorithm. 100 "
72ac9a064cSDimitry Andric              "means search all positions in the loop, while 0 means not "
73ac9a064cSDimitry Andric              "performing any search."),
74ac9a064cSDimitry Andric     cl::Hidden, cl::init(40));
75ac9a064cSDimitry Andric 
76ac9a064cSDimitry Andric cl::opt<unsigned> WindowIICoeff(
77ac9a064cSDimitry Andric     "window-ii-coeff",
78ac9a064cSDimitry Andric     cl::desc(
79ac9a064cSDimitry Andric         "The coefficient used when initializing II in the window algorithm."),
80ac9a064cSDimitry Andric     cl::Hidden, cl::init(5));
81ac9a064cSDimitry Andric 
82ac9a064cSDimitry Andric cl::opt<unsigned> WindowRegionLimit(
83ac9a064cSDimitry Andric     "window-region-limit",
84ac9a064cSDimitry Andric     cl::desc(
85ac9a064cSDimitry Andric         "The lower limit of the scheduling region in the window algorithm."),
86ac9a064cSDimitry Andric     cl::Hidden, cl::init(3));
87ac9a064cSDimitry Andric 
88ac9a064cSDimitry Andric cl::opt<unsigned> WindowDiffLimit(
89ac9a064cSDimitry Andric     "window-diff-limit",
90ac9a064cSDimitry Andric     cl::desc("The lower limit of the difference between best II and base II in "
91ac9a064cSDimitry Andric              "the window algorithm. If the difference is smaller than "
92ac9a064cSDimitry Andric              "this lower limit, window scheduling will not be performed."),
93ac9a064cSDimitry Andric     cl::Hidden, cl::init(2));
94ac9a064cSDimitry Andric } // namespace
95ac9a064cSDimitry Andric 
96ac9a064cSDimitry Andric // WindowIILimit serves as an indicator of abnormal scheduling results and could
97ac9a064cSDimitry Andric // potentially be referenced by the derived target window scheduler.
98ac9a064cSDimitry Andric cl::opt<unsigned>
99ac9a064cSDimitry Andric     WindowIILimit("window-ii-limit",
100ac9a064cSDimitry Andric                   cl::desc("The upper limit of II in the window algorithm."),
101ac9a064cSDimitry Andric                   cl::Hidden, cl::init(1000));
102ac9a064cSDimitry Andric 
WindowScheduler(MachineSchedContext * C,MachineLoop & ML)103ac9a064cSDimitry Andric WindowScheduler::WindowScheduler(MachineSchedContext *C, MachineLoop &ML)
104ac9a064cSDimitry Andric     : Context(C), MF(C->MF), MBB(ML.getHeader()), Loop(ML),
105ac9a064cSDimitry Andric       Subtarget(&MF->getSubtarget()), TII(Subtarget->getInstrInfo()),
106ac9a064cSDimitry Andric       TRI(Subtarget->getRegisterInfo()), MRI(&MF->getRegInfo()) {
107ac9a064cSDimitry Andric   TripleDAG = std::unique_ptr<ScheduleDAGInstrs>(
108ac9a064cSDimitry Andric       createMachineScheduler(/*OnlyBuildGraph=*/true));
109ac9a064cSDimitry Andric }
110ac9a064cSDimitry Andric 
run()111ac9a064cSDimitry Andric bool WindowScheduler::run() {
112ac9a064cSDimitry Andric   if (!initialize()) {
113ac9a064cSDimitry Andric     LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n");
114ac9a064cSDimitry Andric     return false;
115ac9a064cSDimitry Andric   }
116ac9a064cSDimitry Andric   // The window algorithm is time-consuming, and its compilation time should be
117ac9a064cSDimitry Andric   // taken into consideration.
118ac9a064cSDimitry Andric   TimeTraceScope Scope("WindowSearch");
119ac9a064cSDimitry Andric   ++NumTryWindowSchedule;
120ac9a064cSDimitry Andric   // Performing the relevant processing before window scheduling.
121ac9a064cSDimitry Andric   preProcess();
122ac9a064cSDimitry Andric   // The main window scheduling begins.
123ac9a064cSDimitry Andric   std::unique_ptr<ScheduleDAGInstrs> SchedDAG(createMachineScheduler());
124ac9a064cSDimitry Andric   auto SearchIndexes = getSearchIndexes(WindowSearchNum, WindowSearchRatio);
125ac9a064cSDimitry Andric   for (unsigned Idx : SearchIndexes) {
126ac9a064cSDimitry Andric     OriToCycle.clear();
127ac9a064cSDimitry Andric     ++NumTryWindowSearch;
128ac9a064cSDimitry Andric     // The scheduling starts with non-phi instruction, so SchedPhiNum needs to
129ac9a064cSDimitry Andric     // be added to Idx.
130ac9a064cSDimitry Andric     unsigned Offset = Idx + SchedPhiNum;
131ac9a064cSDimitry Andric     auto Range = getScheduleRange(Offset, SchedInstrNum);
132ac9a064cSDimitry Andric     SchedDAG->startBlock(MBB);
133ac9a064cSDimitry Andric     SchedDAG->enterRegion(MBB, Range.begin(), Range.end(), SchedInstrNum);
134ac9a064cSDimitry Andric     SchedDAG->schedule();
135ac9a064cSDimitry Andric     LLVM_DEBUG(SchedDAG->dump());
136ac9a064cSDimitry Andric     unsigned II = analyseII(*SchedDAG, Offset);
137ac9a064cSDimitry Andric     if (II == WindowIILimit) {
138ac9a064cSDimitry Andric       restoreTripleMBB();
139ac9a064cSDimitry Andric       LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n");
140ac9a064cSDimitry Andric       ++NumFailAnalyseII;
141ac9a064cSDimitry Andric       continue;
142ac9a064cSDimitry Andric     }
143ac9a064cSDimitry Andric     schedulePhi(Offset, II);
144ac9a064cSDimitry Andric     updateScheduleResult(Offset, II);
145ac9a064cSDimitry Andric     restoreTripleMBB();
146ac9a064cSDimitry Andric     LLVM_DEBUG(dbgs() << "Current window Offset is " << Offset << " and II is "
147ac9a064cSDimitry Andric                       << II << ".\n");
148ac9a064cSDimitry Andric   }
149ac9a064cSDimitry Andric   // Performing the relevant processing after window scheduling.
150ac9a064cSDimitry Andric   postProcess();
151ac9a064cSDimitry Andric   // Check whether the scheduling result is valid.
152ac9a064cSDimitry Andric   if (!isScheduleValid()) {
153ac9a064cSDimitry Andric     LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n");
154ac9a064cSDimitry Andric     return false;
155ac9a064cSDimitry Andric   }
156ac9a064cSDimitry Andric   LLVM_DEBUG(dbgs() << "\nBest window offset is " << BestOffset
157ac9a064cSDimitry Andric                     << " and Best II is " << BestII << ".\n");
158ac9a064cSDimitry Andric   // Expand the scheduling result to prologue, kernel, and epilogue.
159ac9a064cSDimitry Andric   expand();
160ac9a064cSDimitry Andric   ++NumWindowSchedule;
161ac9a064cSDimitry Andric   return true;
162ac9a064cSDimitry Andric }
163ac9a064cSDimitry Andric 
164ac9a064cSDimitry Andric ScheduleDAGInstrs *
createMachineScheduler(bool OnlyBuildGraph)165ac9a064cSDimitry Andric WindowScheduler::createMachineScheduler(bool OnlyBuildGraph) {
166ac9a064cSDimitry Andric   return OnlyBuildGraph
167ac9a064cSDimitry Andric              ? new ScheduleDAGMI(
168ac9a064cSDimitry Andric                    Context, std::make_unique<PostGenericScheduler>(Context),
169ac9a064cSDimitry Andric                    true)
170ac9a064cSDimitry Andric              : Context->PassConfig->createMachineScheduler(Context);
171ac9a064cSDimitry Andric }
172ac9a064cSDimitry Andric 
initialize()173ac9a064cSDimitry Andric bool WindowScheduler::initialize() {
174ac9a064cSDimitry Andric   if (!Subtarget->enableWindowScheduler()) {
175ac9a064cSDimitry Andric     LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n");
176ac9a064cSDimitry Andric     return false;
177ac9a064cSDimitry Andric   }
178ac9a064cSDimitry Andric   // Initialized the member variables used by window algorithm.
179ac9a064cSDimitry Andric   OriMIs.clear();
180ac9a064cSDimitry Andric   TriMIs.clear();
181ac9a064cSDimitry Andric   TriToOri.clear();
182ac9a064cSDimitry Andric   OriToCycle.clear();
183ac9a064cSDimitry Andric   SchedResult.clear();
184ac9a064cSDimitry Andric   SchedPhiNum = 0;
185ac9a064cSDimitry Andric   SchedInstrNum = 0;
186ac9a064cSDimitry Andric   BestII = UINT_MAX;
187ac9a064cSDimitry Andric   BestOffset = 0;
188ac9a064cSDimitry Andric   BaseII = 0;
189ac9a064cSDimitry Andric   // List scheduling used in the window algorithm depends on LiveIntervals.
190ac9a064cSDimitry Andric   if (!Context->LIS) {
191ac9a064cSDimitry Andric     LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n");
192ac9a064cSDimitry Andric     return false;
193ac9a064cSDimitry Andric   }
194ac9a064cSDimitry Andric   // Check each MI in MBB.
195ac9a064cSDimitry Andric   SmallSet<Register, 8> PrevDefs;
196ac9a064cSDimitry Andric   SmallSet<Register, 8> PrevUses;
197ac9a064cSDimitry Andric   auto IsLoopCarried = [&](MachineInstr &Phi) {
198ac9a064cSDimitry Andric     // Two cases are checked here: (1)The virtual register defined by the
199ac9a064cSDimitry Andric     // preceding phi is used by the succeeding phi;(2)The preceding phi uses the
200ac9a064cSDimitry Andric     // virtual register defined by the succeeding phi.
201ac9a064cSDimitry Andric     if (PrevUses.count(Phi.getOperand(0).getReg()))
202ac9a064cSDimitry Andric       return true;
203ac9a064cSDimitry Andric     PrevDefs.insert(Phi.getOperand(0).getReg());
204ac9a064cSDimitry Andric     for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {
205ac9a064cSDimitry Andric       if (PrevDefs.count(Phi.getOperand(I).getReg()))
206ac9a064cSDimitry Andric         return true;
207ac9a064cSDimitry Andric       PrevUses.insert(Phi.getOperand(I).getReg());
208ac9a064cSDimitry Andric     }
209ac9a064cSDimitry Andric     return false;
210ac9a064cSDimitry Andric   };
211ac9a064cSDimitry Andric   auto PLI = TII->analyzeLoopForPipelining(MBB);
212ac9a064cSDimitry Andric   for (auto &MI : *MBB) {
213ac9a064cSDimitry Andric     if (MI.isMetaInstruction() || MI.isTerminator())
214ac9a064cSDimitry Andric       continue;
215ac9a064cSDimitry Andric     if (MI.isPHI()) {
216ac9a064cSDimitry Andric       if (IsLoopCarried(MI)) {
217ac9a064cSDimitry Andric         LLVM_DEBUG(dbgs() << "Loop carried phis are not supported yet!\n");
218ac9a064cSDimitry Andric         return false;
219ac9a064cSDimitry Andric       }
220ac9a064cSDimitry Andric       ++SchedPhiNum;
221ac9a064cSDimitry Andric       ++BestOffset;
222ac9a064cSDimitry Andric     } else
223ac9a064cSDimitry Andric       ++SchedInstrNum;
224ac9a064cSDimitry Andric     if (TII->isSchedulingBoundary(MI, MBB, *MF)) {
225ac9a064cSDimitry Andric       LLVM_DEBUG(
226ac9a064cSDimitry Andric           dbgs() << "Boundary MI is not allowed in window scheduling!\n");
227ac9a064cSDimitry Andric       return false;
228ac9a064cSDimitry Andric     }
229ac9a064cSDimitry Andric     if (PLI->shouldIgnoreForPipelining(&MI)) {
230ac9a064cSDimitry Andric       LLVM_DEBUG(dbgs() << "Special MI defined by target is not allowed in "
231ac9a064cSDimitry Andric                            "window scheduling!\n");
232ac9a064cSDimitry Andric       return false;
233ac9a064cSDimitry Andric     }
234ac9a064cSDimitry Andric     for (auto &Def : MI.all_defs())
2357432c960SDimitry Andric       if (Def.isReg() && Def.getReg().isPhysical()) {
2367432c960SDimitry Andric         LLVM_DEBUG(dbgs() << "Physical registers are not supported in "
2377432c960SDimitry Andric                              "window scheduling!\n");
238ac9a064cSDimitry Andric         return false;
239ac9a064cSDimitry Andric       }
2407432c960SDimitry Andric   }
241ac9a064cSDimitry Andric   if (SchedInstrNum <= WindowRegionLimit) {
242ac9a064cSDimitry Andric     LLVM_DEBUG(dbgs() << "There are too few MIs in the window region!\n");
243ac9a064cSDimitry Andric     return false;
244ac9a064cSDimitry Andric   }
245ac9a064cSDimitry Andric   return true;
246ac9a064cSDimitry Andric }
247ac9a064cSDimitry Andric 
preProcess()248ac9a064cSDimitry Andric void WindowScheduler::preProcess() {
249ac9a064cSDimitry Andric   // Prior to window scheduling, it's necessary to backup the original MBB,
250ac9a064cSDimitry Andric   // generate a new TripleMBB, and build a TripleDAG based on the TripleMBB.
251ac9a064cSDimitry Andric   backupMBB();
252ac9a064cSDimitry Andric   generateTripleMBB();
253ac9a064cSDimitry Andric   TripleDAG->startBlock(MBB);
254ac9a064cSDimitry Andric   TripleDAG->enterRegion(
255ac9a064cSDimitry Andric       MBB, MBB->begin(), MBB->getFirstTerminator(),
256ac9a064cSDimitry Andric       std::distance(MBB->begin(), MBB->getFirstTerminator()));
257ac9a064cSDimitry Andric   TripleDAG->buildSchedGraph(Context->AA);
258ac9a064cSDimitry Andric }
259ac9a064cSDimitry Andric 
postProcess()260ac9a064cSDimitry Andric void WindowScheduler::postProcess() {
261ac9a064cSDimitry Andric   // After window scheduling, it's necessary to clear the TripleDAG and restore
262ac9a064cSDimitry Andric   // to the original MBB.
263ac9a064cSDimitry Andric   TripleDAG->exitRegion();
264ac9a064cSDimitry Andric   TripleDAG->finishBlock();
265ac9a064cSDimitry Andric   restoreMBB();
266ac9a064cSDimitry Andric }
267ac9a064cSDimitry Andric 
backupMBB()268ac9a064cSDimitry Andric void WindowScheduler::backupMBB() {
269ac9a064cSDimitry Andric   for (auto &MI : MBB->instrs())
270ac9a064cSDimitry Andric     OriMIs.push_back(&MI);
271ac9a064cSDimitry Andric   // Remove MIs and the corresponding live intervals.
272ac9a064cSDimitry Andric   for (auto &MI : make_early_inc_range(*MBB)) {
273ac9a064cSDimitry Andric     Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true);
274ac9a064cSDimitry Andric     MBB->remove(&MI);
275ac9a064cSDimitry Andric   }
276ac9a064cSDimitry Andric }
277ac9a064cSDimitry Andric 
restoreMBB()278ac9a064cSDimitry Andric void WindowScheduler::restoreMBB() {
279ac9a064cSDimitry Andric   // Erase MIs and the corresponding live intervals.
280ac9a064cSDimitry Andric   for (auto &MI : make_early_inc_range(*MBB)) {
281ac9a064cSDimitry Andric     Context->LIS->getSlotIndexes()->removeMachineInstrFromMaps(MI, true);
282ac9a064cSDimitry Andric     MI.eraseFromParent();
283ac9a064cSDimitry Andric   }
284ac9a064cSDimitry Andric   // Restore MBB to the state before window scheduling.
285ac9a064cSDimitry Andric   for (auto *MI : OriMIs)
286ac9a064cSDimitry Andric     MBB->push_back(MI);
287ac9a064cSDimitry Andric   updateLiveIntervals();
288ac9a064cSDimitry Andric }
289ac9a064cSDimitry Andric 
generateTripleMBB()290ac9a064cSDimitry Andric void WindowScheduler::generateTripleMBB() {
291ac9a064cSDimitry Andric   const unsigned DuplicateNum = 3;
292ac9a064cSDimitry Andric   TriMIs.clear();
293ac9a064cSDimitry Andric   TriToOri.clear();
294ac9a064cSDimitry Andric   assert(OriMIs.size() > 0 && "The Original MIs were not backed up!");
295ac9a064cSDimitry Andric   // Step 1: Performing the first copy of MBB instructions, excluding
296ac9a064cSDimitry Andric   // terminators. At the same time, we back up the anti-register of phis.
297ac9a064cSDimitry Andric   // DefPairs hold the old and new define register pairs.
298ac9a064cSDimitry Andric   DenseMap<Register, Register> DefPairs;
299ac9a064cSDimitry Andric   for (auto *MI : OriMIs) {
300ac9a064cSDimitry Andric     if (MI->isMetaInstruction() || MI->isTerminator())
301ac9a064cSDimitry Andric       continue;
302ac9a064cSDimitry Andric     if (MI->isPHI())
303ac9a064cSDimitry Andric       if (Register AntiReg = getAntiRegister(MI))
304ac9a064cSDimitry Andric         DefPairs[MI->getOperand(0).getReg()] = AntiReg;
305ac9a064cSDimitry Andric     auto *NewMI = MF->CloneMachineInstr(MI);
306ac9a064cSDimitry Andric     MBB->push_back(NewMI);
307ac9a064cSDimitry Andric     TriMIs.push_back(NewMI);
308ac9a064cSDimitry Andric     TriToOri[NewMI] = MI;
309ac9a064cSDimitry Andric   }
310ac9a064cSDimitry Andric   // Step 2: Performing the remaining two copies of MBB instructions excluding
311ac9a064cSDimitry Andric   // phis, and the last one contains terminators. At the same time, registers
312ac9a064cSDimitry Andric   // are updated accordingly.
313ac9a064cSDimitry Andric   for (size_t Cnt = 1; Cnt < DuplicateNum; ++Cnt) {
314ac9a064cSDimitry Andric     for (auto *MI : OriMIs) {
315ac9a064cSDimitry Andric       if (MI->isPHI() || MI->isMetaInstruction() ||
316ac9a064cSDimitry Andric           (MI->isTerminator() && Cnt < DuplicateNum - 1))
317ac9a064cSDimitry Andric         continue;
318ac9a064cSDimitry Andric       auto *NewMI = MF->CloneMachineInstr(MI);
319ac9a064cSDimitry Andric       DenseMap<Register, Register> NewDefs;
320ac9a064cSDimitry Andric       // New defines are updated.
321ac9a064cSDimitry Andric       for (auto MO : NewMI->all_defs())
322ac9a064cSDimitry Andric         if (MO.isReg() && MO.getReg().isVirtual()) {
323ac9a064cSDimitry Andric           Register NewDef =
324ac9a064cSDimitry Andric               MRI->createVirtualRegister(MRI->getRegClass(MO.getReg()));
325ac9a064cSDimitry Andric           NewMI->substituteRegister(MO.getReg(), NewDef, 0, *TRI);
326ac9a064cSDimitry Andric           NewDefs[MO.getReg()] = NewDef;
327ac9a064cSDimitry Andric         }
328ac9a064cSDimitry Andric       // New uses are updated.
329ac9a064cSDimitry Andric       for (auto DefRegPair : DefPairs)
330ac9a064cSDimitry Andric         if (NewMI->readsRegister(DefRegPair.first, TRI)) {
331ac9a064cSDimitry Andric           Register NewUse = DefRegPair.second;
332ac9a064cSDimitry Andric           // Note the update process for '%1 -> %9' in '%10 = sub i32 %9, %3':
333ac9a064cSDimitry Andric           //
334ac9a064cSDimitry Andric           // BB.3:                                  DefPairs
335ac9a064cSDimitry Andric           // ==================================
336ac9a064cSDimitry Andric           // %1 = phi i32 [%2, %BB.1], [%7, %BB.3]  (%1,%7)
337ac9a064cSDimitry Andric           // ...
338ac9a064cSDimitry Andric           // ==================================
339ac9a064cSDimitry Andric           // ...
340ac9a064cSDimitry Andric           // %4 = sub i32 %1, %3
341ac9a064cSDimitry Andric           // ...
342ac9a064cSDimitry Andric           // %7 = add i32 %5, %6
343ac9a064cSDimitry Andric           // ...
344ac9a064cSDimitry Andric           // ----------------------------------
345ac9a064cSDimitry Andric           // ...
346ac9a064cSDimitry Andric           // %8 = sub i32 %7, %3                    (%1,%7),(%4,%8)
347ac9a064cSDimitry Andric           // ...
348ac9a064cSDimitry Andric           // %9 = add i32 %5, %6                    (%1,%7),(%4,%8),(%7,%9)
349ac9a064cSDimitry Andric           // ...
350ac9a064cSDimitry Andric           // ----------------------------------
351ac9a064cSDimitry Andric           // ...
352ac9a064cSDimitry Andric           // %10 = sub i32 %9, %3                   (%1,%7),(%4,%10),(%7,%9)
353ac9a064cSDimitry Andric           // ...            ^
354ac9a064cSDimitry Andric           // %11 = add i32 %5, %6                   (%1,%7),(%4,%10),(%7,%11)
355ac9a064cSDimitry Andric           // ...
356ac9a064cSDimitry Andric           // ==================================
357ac9a064cSDimitry Andric           //          < Terminators >
358ac9a064cSDimitry Andric           // ==================================
359ac9a064cSDimitry Andric           if (DefPairs.count(NewUse))
360ac9a064cSDimitry Andric             NewUse = DefPairs[NewUse];
361ac9a064cSDimitry Andric           NewMI->substituteRegister(DefRegPair.first, NewUse, 0, *TRI);
362ac9a064cSDimitry Andric         }
363ac9a064cSDimitry Andric       // DefPairs is updated at last.
364ac9a064cSDimitry Andric       for (auto &NewDef : NewDefs)
365ac9a064cSDimitry Andric         DefPairs[NewDef.first] = NewDef.second;
366ac9a064cSDimitry Andric       MBB->push_back(NewMI);
367ac9a064cSDimitry Andric       TriMIs.push_back(NewMI);
368ac9a064cSDimitry Andric       TriToOri[NewMI] = MI;
369ac9a064cSDimitry Andric     }
370ac9a064cSDimitry Andric   }
371ac9a064cSDimitry Andric   // Step 3: The registers used by phis are updated, and they are generated in
372ac9a064cSDimitry Andric   // the third copy of MBB.
373ac9a064cSDimitry Andric   // In the privious example, the old phi is:
374ac9a064cSDimitry Andric   // %1 = phi i32 [%2, %BB.1], [%7, %BB.3]
375ac9a064cSDimitry Andric   // The new phi is:
376ac9a064cSDimitry Andric   // %1 = phi i32 [%2, %BB.1], [%11, %BB.3]
377ac9a064cSDimitry Andric   for (auto &Phi : MBB->phis()) {
378ac9a064cSDimitry Andric     for (auto DefRegPair : DefPairs)
379ac9a064cSDimitry Andric       if (Phi.readsRegister(DefRegPair.first, TRI))
380ac9a064cSDimitry Andric         Phi.substituteRegister(DefRegPair.first, DefRegPair.second, 0, *TRI);
381ac9a064cSDimitry Andric   }
382ac9a064cSDimitry Andric   updateLiveIntervals();
383ac9a064cSDimitry Andric }
384ac9a064cSDimitry Andric 
restoreTripleMBB()385ac9a064cSDimitry Andric void WindowScheduler::restoreTripleMBB() {
386ac9a064cSDimitry Andric   // After list scheduling, the MBB is restored in one traversal.
387ac9a064cSDimitry Andric   for (size_t I = 0; I < TriMIs.size(); ++I) {
388ac9a064cSDimitry Andric     auto *MI = TriMIs[I];
389ac9a064cSDimitry Andric     auto OldPos = MBB->begin();
390ac9a064cSDimitry Andric     std::advance(OldPos, I);
391ac9a064cSDimitry Andric     auto CurPos = MI->getIterator();
392ac9a064cSDimitry Andric     if (CurPos != OldPos) {
393ac9a064cSDimitry Andric       MBB->splice(OldPos, MBB, CurPos);
394ac9a064cSDimitry Andric       Context->LIS->handleMove(*MI, /*UpdateFlags=*/false);
395ac9a064cSDimitry Andric     }
396ac9a064cSDimitry Andric   }
397ac9a064cSDimitry Andric }
398ac9a064cSDimitry Andric 
getSearchIndexes(unsigned SearchNum,unsigned SearchRatio)399ac9a064cSDimitry Andric SmallVector<unsigned> WindowScheduler::getSearchIndexes(unsigned SearchNum,
400ac9a064cSDimitry Andric                                                         unsigned SearchRatio) {
401ac9a064cSDimitry Andric   // We use SearchRatio to get the index range, and then evenly get the indexes
402ac9a064cSDimitry Andric   // according to the SearchNum. This is a simple huristic. Depending on the
403ac9a064cSDimitry Andric   // characteristics of the target, more complex algorithms can be used for both
404ac9a064cSDimitry Andric   // performance and compilation time.
405ac9a064cSDimitry Andric   assert(SearchRatio <= 100 && "SearchRatio should be equal or less than 100!");
406ac9a064cSDimitry Andric   unsigned MaxIdx = SchedInstrNum * SearchRatio / 100;
407ac9a064cSDimitry Andric   unsigned Step = SearchNum > 0 && SearchNum <= MaxIdx ? MaxIdx / SearchNum : 1;
408ac9a064cSDimitry Andric   SmallVector<unsigned> SearchIndexes;
409ac9a064cSDimitry Andric   for (unsigned Idx = 0; Idx < MaxIdx; Idx += Step)
410ac9a064cSDimitry Andric     SearchIndexes.push_back(Idx);
411ac9a064cSDimitry Andric   return SearchIndexes;
412ac9a064cSDimitry Andric }
413ac9a064cSDimitry Andric 
getEstimatedII(ScheduleDAGInstrs & DAG)414ac9a064cSDimitry Andric int WindowScheduler::getEstimatedII(ScheduleDAGInstrs &DAG) {
415ac9a064cSDimitry Andric   // Sometimes MaxDepth is 0, so it should be limited to the minimum of 1.
416ac9a064cSDimitry Andric   unsigned MaxDepth = 1;
417ac9a064cSDimitry Andric   for (auto &SU : DAG.SUnits)
418ac9a064cSDimitry Andric     MaxDepth = std::max(SU.getDepth() + SU.Latency, MaxDepth);
419ac9a064cSDimitry Andric   return MaxDepth * WindowIICoeff;
420ac9a064cSDimitry Andric }
421ac9a064cSDimitry Andric 
calculateMaxCycle(ScheduleDAGInstrs & DAG,unsigned Offset)422ac9a064cSDimitry Andric int WindowScheduler::calculateMaxCycle(ScheduleDAGInstrs &DAG,
423ac9a064cSDimitry Andric                                        unsigned Offset) {
424ac9a064cSDimitry Andric   int InitII = getEstimatedII(DAG);
425ac9a064cSDimitry Andric   ResourceManager RM(Subtarget, &DAG);
426ac9a064cSDimitry Andric   RM.init(InitII);
427ac9a064cSDimitry Andric   // ResourceManager and DAG are used to calculate the maximum cycle for the
428ac9a064cSDimitry Andric   // scheduled MIs. Since MIs in the Region have already been scheduled, the
429ac9a064cSDimitry Andric   // emit cycles can be estimated in order here.
430ac9a064cSDimitry Andric   int CurCycle = 0;
431ac9a064cSDimitry Andric   auto Range = getScheduleRange(Offset, SchedInstrNum);
432ac9a064cSDimitry Andric   for (auto &MI : Range) {
433ac9a064cSDimitry Andric     auto *SU = DAG.getSUnit(&MI);
434ac9a064cSDimitry Andric     int ExpectCycle = CurCycle;
435ac9a064cSDimitry Andric     // The predecessors of current MI determine its earliest issue cycle.
436ac9a064cSDimitry Andric     for (auto &Pred : SU->Preds) {
437ac9a064cSDimitry Andric       if (Pred.isWeak())
438ac9a064cSDimitry Andric         continue;
439ac9a064cSDimitry Andric       auto *PredMI = Pred.getSUnit()->getInstr();
440ac9a064cSDimitry Andric       int PredCycle = getOriCycle(PredMI);
441ac9a064cSDimitry Andric       ExpectCycle = std::max(ExpectCycle, PredCycle + (int)Pred.getLatency());
442ac9a064cSDimitry Andric     }
4437432c960SDimitry Andric     // Zero cost instructions do not need to check resource.
4447432c960SDimitry Andric     if (!TII->isZeroCost(MI.getOpcode())) {
445ac9a064cSDimitry Andric       // ResourceManager can be used to detect resource conflicts between the
446ac9a064cSDimitry Andric       // current MI and the previously inserted MIs.
447ac9a064cSDimitry Andric       while (!RM.canReserveResources(*SU, CurCycle) || CurCycle < ExpectCycle) {
448ac9a064cSDimitry Andric         ++CurCycle;
449ac9a064cSDimitry Andric         if (CurCycle == (int)WindowIILimit)
450ac9a064cSDimitry Andric           return CurCycle;
451ac9a064cSDimitry Andric       }
452ac9a064cSDimitry Andric       RM.reserveResources(*SU, CurCycle);
4537432c960SDimitry Andric     }
454ac9a064cSDimitry Andric     OriToCycle[getOriMI(&MI)] = CurCycle;
455ac9a064cSDimitry Andric     LLVM_DEBUG(dbgs() << "\tCycle " << CurCycle << " [S."
456ac9a064cSDimitry Andric                       << getOriStage(getOriMI(&MI), Offset) << "]: " << MI);
457ac9a064cSDimitry Andric   }
458ac9a064cSDimitry Andric   LLVM_DEBUG(dbgs() << "MaxCycle is " << CurCycle << ".\n");
459ac9a064cSDimitry Andric   return CurCycle;
460ac9a064cSDimitry Andric }
461ac9a064cSDimitry Andric 
462ac9a064cSDimitry Andric // By utilizing TripleDAG, we can easily establish dependencies between A and B.
463ac9a064cSDimitry Andric // Based on the MaxCycle and the issue cycle of A and B, we can determine
464ac9a064cSDimitry Andric // whether it is necessary to add a stall cycle. This is because, without
465ac9a064cSDimitry Andric // inserting the stall cycle, the latency constraint between A and B cannot be
466ac9a064cSDimitry Andric // satisfied. The details are as follows:
467ac9a064cSDimitry Andric //
468ac9a064cSDimitry Andric // New MBB:
469ac9a064cSDimitry Andric // ========================================
470ac9a064cSDimitry Andric //                 < Phis >
471ac9a064cSDimitry Andric // ========================================     (sliding direction)
472ac9a064cSDimitry Andric // MBB copy 1                                            |
473ac9a064cSDimitry Andric //                                                       V
474ac9a064cSDimitry Andric //
475ac9a064cSDimitry Andric // ~~~~~~~~~~~~~~~~~~~|~~~~~~~~~~~~~~~~~~~~  ----schedule window-----
476ac9a064cSDimitry Andric //                    |                                  |
477ac9a064cSDimitry Andric // ===================V====================              |
478ac9a064cSDimitry Andric // MBB copy 2      < MI B >                              |
479ac9a064cSDimitry Andric //                                                       |
480ac9a064cSDimitry Andric //                 < MI A >                              V
481ac9a064cSDimitry Andric // ~~~~~~~~~~~~~~~~~~~:~~~~~~~~~~~~~~~~~~~~  ------------------------
482ac9a064cSDimitry Andric //                    :
483ac9a064cSDimitry Andric // ===================V====================
484ac9a064cSDimitry Andric // MBB copy 3      < MI B'>
485ac9a064cSDimitry Andric //
486ac9a064cSDimitry Andric //
487ac9a064cSDimitry Andric //
488ac9a064cSDimitry Andric //
489ac9a064cSDimitry Andric // ========================================
490ac9a064cSDimitry Andric //              < Terminators >
491ac9a064cSDimitry Andric // ========================================
calculateStallCycle(unsigned Offset,int MaxCycle)492ac9a064cSDimitry Andric int WindowScheduler::calculateStallCycle(unsigned Offset, int MaxCycle) {
493ac9a064cSDimitry Andric   int MaxStallCycle = 0;
4947432c960SDimitry Andric   int CurrentII = MaxCycle + 1;
495ac9a064cSDimitry Andric   auto Range = getScheduleRange(Offset, SchedInstrNum);
496ac9a064cSDimitry Andric   for (auto &MI : Range) {
497ac9a064cSDimitry Andric     auto *SU = TripleDAG->getSUnit(&MI);
498ac9a064cSDimitry Andric     int DefCycle = getOriCycle(&MI);
499ac9a064cSDimitry Andric     for (auto &Succ : SU->Succs) {
500ac9a064cSDimitry Andric       if (Succ.isWeak() || Succ.getSUnit() == &TripleDAG->ExitSU)
501ac9a064cSDimitry Andric         continue;
5027432c960SDimitry Andric       // If the expected cycle does not exceed CurrentII, no check is needed.
5037432c960SDimitry Andric       if (DefCycle + (int)Succ.getLatency() <= CurrentII)
504ac9a064cSDimitry Andric         continue;
505ac9a064cSDimitry Andric       // If the cycle of the scheduled MI A is less than that of the scheduled
506ac9a064cSDimitry Andric       // MI B, the scheduling will fail because the lifetime of the
507ac9a064cSDimitry Andric       // corresponding register exceeds II.
508ac9a064cSDimitry Andric       auto *SuccMI = Succ.getSUnit()->getInstr();
509ac9a064cSDimitry Andric       int UseCycle = getOriCycle(SuccMI);
510ac9a064cSDimitry Andric       if (DefCycle < UseCycle)
511ac9a064cSDimitry Andric         return WindowIILimit;
512ac9a064cSDimitry Andric       // Get the stall cycle introduced by the register between two trips.
5137432c960SDimitry Andric       int StallCycle = DefCycle + (int)Succ.getLatency() - CurrentII - UseCycle;
514ac9a064cSDimitry Andric       MaxStallCycle = std::max(MaxStallCycle, StallCycle);
515ac9a064cSDimitry Andric     }
516ac9a064cSDimitry Andric   }
517ac9a064cSDimitry Andric   LLVM_DEBUG(dbgs() << "MaxStallCycle is " << MaxStallCycle << ".\n");
518ac9a064cSDimitry Andric   return MaxStallCycle;
519ac9a064cSDimitry Andric }
520ac9a064cSDimitry Andric 
analyseII(ScheduleDAGInstrs & DAG,unsigned Offset)521ac9a064cSDimitry Andric unsigned WindowScheduler::analyseII(ScheduleDAGInstrs &DAG, unsigned Offset) {
522ac9a064cSDimitry Andric   LLVM_DEBUG(dbgs() << "Start analyzing II:\n");
523ac9a064cSDimitry Andric   int MaxCycle = calculateMaxCycle(DAG, Offset);
524ac9a064cSDimitry Andric   if (MaxCycle == (int)WindowIILimit)
525ac9a064cSDimitry Andric     return MaxCycle;
526ac9a064cSDimitry Andric   int StallCycle = calculateStallCycle(Offset, MaxCycle);
527ac9a064cSDimitry Andric   if (StallCycle == (int)WindowIILimit)
528ac9a064cSDimitry Andric     return StallCycle;
529ac9a064cSDimitry Andric   // The value of II is equal to the maximum execution cycle plus 1.
530ac9a064cSDimitry Andric   return MaxCycle + StallCycle + 1;
531ac9a064cSDimitry Andric }
532ac9a064cSDimitry Andric 
schedulePhi(int Offset,unsigned & II)533ac9a064cSDimitry Andric void WindowScheduler::schedulePhi(int Offset, unsigned &II) {
534ac9a064cSDimitry Andric   LLVM_DEBUG(dbgs() << "Start scheduling Phis:\n");
535ac9a064cSDimitry Andric   for (auto &Phi : MBB->phis()) {
536ac9a064cSDimitry Andric     int LateCycle = INT_MAX;
537ac9a064cSDimitry Andric     auto *SU = TripleDAG->getSUnit(&Phi);
538ac9a064cSDimitry Andric     for (auto &Succ : SU->Succs) {
539ac9a064cSDimitry Andric       // Phi doesn't have any Anti successors.
540ac9a064cSDimitry Andric       if (Succ.getKind() != SDep::Data)
541ac9a064cSDimitry Andric         continue;
542ac9a064cSDimitry Andric       // Phi is scheduled before the successor of stage 0. The issue cycle of
543ac9a064cSDimitry Andric       // phi is the latest cycle in this interval.
544ac9a064cSDimitry Andric       auto *SuccMI = Succ.getSUnit()->getInstr();
545ac9a064cSDimitry Andric       int Cycle = getOriCycle(SuccMI);
546ac9a064cSDimitry Andric       if (getOriStage(getOriMI(SuccMI), Offset) == 0)
547ac9a064cSDimitry Andric         LateCycle = std::min(LateCycle, Cycle);
548ac9a064cSDimitry Andric     }
549ac9a064cSDimitry Andric     // The anti-dependency of phi need to be handled separately in the same way.
550ac9a064cSDimitry Andric     if (Register AntiReg = getAntiRegister(&Phi)) {
551ac9a064cSDimitry Andric       auto *AntiMI = MRI->getVRegDef(AntiReg);
552ac9a064cSDimitry Andric       // AntiReg may be defined outside the kernel MBB.
553ac9a064cSDimitry Andric       if (AntiMI->getParent() == MBB) {
554ac9a064cSDimitry Andric         auto AntiCycle = getOriCycle(AntiMI);
555ac9a064cSDimitry Andric         if (getOriStage(getOriMI(AntiMI), Offset) == 0)
556ac9a064cSDimitry Andric           LateCycle = std::min(LateCycle, AntiCycle);
557ac9a064cSDimitry Andric       }
558ac9a064cSDimitry Andric     }
559ac9a064cSDimitry Andric     // If there is no limit to the late cycle, a default value is given.
560ac9a064cSDimitry Andric     if (LateCycle == INT_MAX)
561ac9a064cSDimitry Andric       LateCycle = (int)(II - 1);
562ac9a064cSDimitry Andric     LLVM_DEBUG(dbgs() << "\tCycle range [0, " << LateCycle << "] " << Phi);
563ac9a064cSDimitry Andric     // The issue cycle of phi is set to the latest cycle in the interval.
564ac9a064cSDimitry Andric     auto *OriPhi = getOriMI(&Phi);
565ac9a064cSDimitry Andric     OriToCycle[OriPhi] = LateCycle;
566ac9a064cSDimitry Andric   }
567ac9a064cSDimitry Andric }
568ac9a064cSDimitry Andric 
getIssueOrder(unsigned Offset,unsigned II)569ac9a064cSDimitry Andric DenseMap<MachineInstr *, int> WindowScheduler::getIssueOrder(unsigned Offset,
570ac9a064cSDimitry Andric                                                              unsigned II) {
571ac9a064cSDimitry Andric   // At each issue cycle, phi is placed before MIs in stage 0. So the simplest
572ac9a064cSDimitry Andric   // way is to put phi at the beginning of the current cycle.
573ac9a064cSDimitry Andric   DenseMap<int, SmallVector<MachineInstr *>> CycleToMIs;
574ac9a064cSDimitry Andric   auto Range = getScheduleRange(Offset, SchedInstrNum);
575ac9a064cSDimitry Andric   for (auto &Phi : MBB->phis())
576ac9a064cSDimitry Andric     CycleToMIs[getOriCycle(&Phi)].push_back(getOriMI(&Phi));
577ac9a064cSDimitry Andric   for (auto &MI : Range)
578ac9a064cSDimitry Andric     CycleToMIs[getOriCycle(&MI)].push_back(getOriMI(&MI));
579ac9a064cSDimitry Andric   // Each MI is assigned a separate ordered Id, which is used as a sort marker
580ac9a064cSDimitry Andric   // in the following expand process.
581ac9a064cSDimitry Andric   DenseMap<MachineInstr *, int> IssueOrder;
582ac9a064cSDimitry Andric   int Id = 0;
583ac9a064cSDimitry Andric   for (int Cycle = 0; Cycle < (int)II; ++Cycle) {
584ac9a064cSDimitry Andric     if (!CycleToMIs.count(Cycle))
585ac9a064cSDimitry Andric       continue;
586ac9a064cSDimitry Andric     for (auto *MI : CycleToMIs[Cycle])
587ac9a064cSDimitry Andric       IssueOrder[MI] = Id++;
588ac9a064cSDimitry Andric   }
589ac9a064cSDimitry Andric   return IssueOrder;
590ac9a064cSDimitry Andric }
591ac9a064cSDimitry Andric 
updateScheduleResult(unsigned Offset,unsigned II)592ac9a064cSDimitry Andric void WindowScheduler::updateScheduleResult(unsigned Offset, unsigned II) {
593ac9a064cSDimitry Andric   // At the first update, Offset is equal to SchedPhiNum. At this time, only
594ac9a064cSDimitry Andric   // BestII, BestOffset, and BaseII need to be updated.
595ac9a064cSDimitry Andric   if (Offset == SchedPhiNum) {
596ac9a064cSDimitry Andric     BestII = II;
597ac9a064cSDimitry Andric     BestOffset = SchedPhiNum;
598ac9a064cSDimitry Andric     BaseII = II;
599ac9a064cSDimitry Andric     return;
600ac9a064cSDimitry Andric   }
601ac9a064cSDimitry Andric   // The update will only continue if the II is smaller than BestII and the II
602ac9a064cSDimitry Andric   // is sufficiently small.
603ac9a064cSDimitry Andric   if ((II >= BestII) || (II + WindowDiffLimit > BaseII))
604ac9a064cSDimitry Andric     return;
605ac9a064cSDimitry Andric   BestII = II;
606ac9a064cSDimitry Andric   BestOffset = Offset;
607ac9a064cSDimitry Andric   // Record the result of the current list scheduling, noting that each MI is
608ac9a064cSDimitry Andric   // stored unordered in SchedResult.
609ac9a064cSDimitry Andric   SchedResult.clear();
610ac9a064cSDimitry Andric   auto IssueOrder = getIssueOrder(Offset, II);
611ac9a064cSDimitry Andric   for (auto &Pair : OriToCycle) {
612ac9a064cSDimitry Andric     assert(IssueOrder.count(Pair.first) && "Cannot find original MI!");
613ac9a064cSDimitry Andric     SchedResult.push_back(std::make_tuple(Pair.first, Pair.second,
614ac9a064cSDimitry Andric                                           getOriStage(Pair.first, Offset),
615ac9a064cSDimitry Andric                                           IssueOrder[Pair.first]));
616ac9a064cSDimitry Andric   }
617ac9a064cSDimitry Andric }
618ac9a064cSDimitry Andric 
expand()619ac9a064cSDimitry Andric void WindowScheduler::expand() {
620ac9a064cSDimitry Andric   // The MIs in the SchedResult are sorted by the issue order ID.
621ac9a064cSDimitry Andric   llvm::stable_sort(SchedResult,
622ac9a064cSDimitry Andric                     [](const std::tuple<MachineInstr *, int, int, int> &A,
623ac9a064cSDimitry Andric                        const std::tuple<MachineInstr *, int, int, int> &B) {
624ac9a064cSDimitry Andric                       return std::get<3>(A) < std::get<3>(B);
625ac9a064cSDimitry Andric                     });
626ac9a064cSDimitry Andric   // Use the scheduling infrastructure for expansion, noting that InstrChanges
627ac9a064cSDimitry Andric   // is not supported here.
628ac9a064cSDimitry Andric   DenseMap<MachineInstr *, int> Cycles, Stages;
629ac9a064cSDimitry Andric   std::vector<MachineInstr *> OrderedInsts;
630ac9a064cSDimitry Andric   for (auto &Info : SchedResult) {
631ac9a064cSDimitry Andric     auto *MI = std::get<0>(Info);
632ac9a064cSDimitry Andric     OrderedInsts.push_back(MI);
633ac9a064cSDimitry Andric     Cycles[MI] = std::get<1>(Info);
634ac9a064cSDimitry Andric     Stages[MI] = std::get<2>(Info);
635ac9a064cSDimitry Andric     LLVM_DEBUG(dbgs() << "\tCycle " << Cycles[MI] << " [S." << Stages[MI]
636ac9a064cSDimitry Andric                       << "]: " << *MI);
637ac9a064cSDimitry Andric   }
638ac9a064cSDimitry Andric   ModuloSchedule MS(*MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
639ac9a064cSDimitry Andric                     std::move(Stages));
640ac9a064cSDimitry Andric   ModuloScheduleExpander MSE(*MF, MS, *Context->LIS,
641ac9a064cSDimitry Andric                              ModuloScheduleExpander::InstrChangesTy());
642ac9a064cSDimitry Andric   MSE.expand();
643ac9a064cSDimitry Andric   MSE.cleanup();
644ac9a064cSDimitry Andric }
645ac9a064cSDimitry Andric 
updateLiveIntervals()646ac9a064cSDimitry Andric void WindowScheduler::updateLiveIntervals() {
647ac9a064cSDimitry Andric   SmallVector<Register, 128> UsedRegs;
648ac9a064cSDimitry Andric   for (MachineInstr &MI : *MBB)
649ac9a064cSDimitry Andric     for (const MachineOperand &MO : MI.operands()) {
650ac9a064cSDimitry Andric       if (!MO.isReg() || MO.getReg() == 0)
651ac9a064cSDimitry Andric         continue;
652ac9a064cSDimitry Andric       Register Reg = MO.getReg();
653ac9a064cSDimitry Andric       if (!is_contained(UsedRegs, Reg))
654ac9a064cSDimitry Andric         UsedRegs.push_back(Reg);
655ac9a064cSDimitry Andric     }
656ac9a064cSDimitry Andric   Context->LIS->repairIntervalsInRange(MBB, MBB->begin(), MBB->end(), UsedRegs);
657ac9a064cSDimitry Andric }
658ac9a064cSDimitry Andric 
659ac9a064cSDimitry Andric iterator_range<MachineBasicBlock::iterator>
getScheduleRange(unsigned Offset,unsigned Num)660ac9a064cSDimitry Andric WindowScheduler::getScheduleRange(unsigned Offset, unsigned Num) {
661ac9a064cSDimitry Andric   auto RegionBegin = MBB->begin();
662ac9a064cSDimitry Andric   std::advance(RegionBegin, Offset);
663ac9a064cSDimitry Andric   auto RegionEnd = RegionBegin;
664ac9a064cSDimitry Andric   std::advance(RegionEnd, Num);
665ac9a064cSDimitry Andric   return make_range(RegionBegin, RegionEnd);
666ac9a064cSDimitry Andric }
667ac9a064cSDimitry Andric 
getOriCycle(MachineInstr * NewMI)668ac9a064cSDimitry Andric int WindowScheduler::getOriCycle(MachineInstr *NewMI) {
669ac9a064cSDimitry Andric   assert(TriToOri.count(NewMI) && "Cannot find original MI!");
670ac9a064cSDimitry Andric   auto *OriMI = TriToOri[NewMI];
671ac9a064cSDimitry Andric   assert(OriToCycle.count(OriMI) && "Cannot find schedule cycle!");
672ac9a064cSDimitry Andric   return OriToCycle[OriMI];
673ac9a064cSDimitry Andric }
674ac9a064cSDimitry Andric 
getOriMI(MachineInstr * NewMI)675ac9a064cSDimitry Andric MachineInstr *WindowScheduler::getOriMI(MachineInstr *NewMI) {
676ac9a064cSDimitry Andric   assert(TriToOri.count(NewMI) && "Cannot find original MI!");
677ac9a064cSDimitry Andric   return TriToOri[NewMI];
678ac9a064cSDimitry Andric }
679ac9a064cSDimitry Andric 
getOriStage(MachineInstr * OriMI,unsigned Offset)680ac9a064cSDimitry Andric unsigned WindowScheduler::getOriStage(MachineInstr *OriMI, unsigned Offset) {
681ac9a064cSDimitry Andric   assert(llvm::find(OriMIs, OriMI) != OriMIs.end() &&
682ac9a064cSDimitry Andric          "Cannot find OriMI in OriMIs!");
683ac9a064cSDimitry Andric   // If there is no instruction fold, all MI stages are 0.
684ac9a064cSDimitry Andric   if (Offset == SchedPhiNum)
685ac9a064cSDimitry Andric     return 0;
686ac9a064cSDimitry Andric   // For those MIs with an ID less than the Offset, their stages are set to 0,
687ac9a064cSDimitry Andric   // while the rest are set to 1.
688ac9a064cSDimitry Andric   unsigned Id = 0;
689ac9a064cSDimitry Andric   for (auto *MI : OriMIs) {
690ac9a064cSDimitry Andric     if (MI->isMetaInstruction())
691ac9a064cSDimitry Andric       continue;
692ac9a064cSDimitry Andric     if (MI == OriMI)
693ac9a064cSDimitry Andric       break;
694ac9a064cSDimitry Andric     ++Id;
695ac9a064cSDimitry Andric   }
696ac9a064cSDimitry Andric   return Id >= (size_t)Offset ? 1 : 0;
697ac9a064cSDimitry Andric }
698ac9a064cSDimitry Andric 
getAntiRegister(MachineInstr * Phi)699ac9a064cSDimitry Andric Register WindowScheduler::getAntiRegister(MachineInstr *Phi) {
700ac9a064cSDimitry Andric   assert(Phi->isPHI() && "Expecting PHI!");
701ac9a064cSDimitry Andric   Register AntiReg;
702ac9a064cSDimitry Andric   for (auto MO : Phi->uses()) {
703ac9a064cSDimitry Andric     if (MO.isReg())
704ac9a064cSDimitry Andric       AntiReg = MO.getReg();
705ac9a064cSDimitry Andric     else if (MO.isMBB() && MO.getMBB() == MBB)
706ac9a064cSDimitry Andric       return AntiReg;
707ac9a064cSDimitry Andric   }
708ac9a064cSDimitry Andric   return 0;
709ac9a064cSDimitry Andric }
710