11d5ae102SDimitry Andric //===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
21d5ae102SDimitry Andric //
31d5ae102SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
41d5ae102SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
51d5ae102SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61d5ae102SDimitry Andric //
71d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
81d5ae102SDimitry Andric
91d5ae102SDimitry Andric #include "llvm/CodeGen/ModuloSchedule.h"
101d5ae102SDimitry Andric #include "llvm/ADT/StringExtras.h"
11cfca06d7SDimitry Andric #include "llvm/Analysis/MemoryLocation.h"
121d5ae102SDimitry Andric #include "llvm/CodeGen/LiveIntervals.h"
131d5ae102SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h"
14145449b1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
151d5ae102SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h"
16706b4fc4SDimitry Andric #include "llvm/InitializePasses.h"
171d5ae102SDimitry Andric #include "llvm/MC/MCContext.h"
181d5ae102SDimitry Andric #include "llvm/Support/Debug.h"
191d5ae102SDimitry Andric #include "llvm/Support/ErrorHandling.h"
201d5ae102SDimitry Andric #include "llvm/Support/raw_ostream.h"
211d5ae102SDimitry Andric
221d5ae102SDimitry Andric #define DEBUG_TYPE "pipeliner"
231d5ae102SDimitry Andric using namespace llvm;
241d5ae102SDimitry Andric
25ac9a064cSDimitry Andric static cl::opt<bool> SwapBranchTargetsMVE(
26ac9a064cSDimitry Andric "pipeliner-swap-branch-targets-mve", cl::Hidden, cl::init(false),
27ac9a064cSDimitry Andric cl::desc("Swap target blocks of a conditional branch for MVE expander"));
28ac9a064cSDimitry Andric
print(raw_ostream & OS)291d5ae102SDimitry Andric void ModuloSchedule::print(raw_ostream &OS) {
301d5ae102SDimitry Andric for (MachineInstr *MI : ScheduledInstrs)
311d5ae102SDimitry Andric OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
321d5ae102SDimitry Andric }
331d5ae102SDimitry Andric
341d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
351d5ae102SDimitry Andric // ModuloScheduleExpander implementation
361d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
371d5ae102SDimitry Andric
381d5ae102SDimitry Andric /// Return the register values for the operands of a Phi instruction.
391d5ae102SDimitry Andric /// This function assume the instruction is a Phi.
getPhiRegs(MachineInstr & Phi,MachineBasicBlock * Loop,unsigned & InitVal,unsigned & LoopVal)401d5ae102SDimitry Andric static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
411d5ae102SDimitry Andric unsigned &InitVal, unsigned &LoopVal) {
421d5ae102SDimitry Andric assert(Phi.isPHI() && "Expecting a Phi.");
431d5ae102SDimitry Andric
441d5ae102SDimitry Andric InitVal = 0;
451d5ae102SDimitry Andric LoopVal = 0;
461d5ae102SDimitry Andric for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
471d5ae102SDimitry Andric if (Phi.getOperand(i + 1).getMBB() != Loop)
481d5ae102SDimitry Andric InitVal = Phi.getOperand(i).getReg();
491d5ae102SDimitry Andric else
501d5ae102SDimitry Andric LoopVal = Phi.getOperand(i).getReg();
511d5ae102SDimitry Andric
521d5ae102SDimitry Andric assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
531d5ae102SDimitry Andric }
541d5ae102SDimitry Andric
551d5ae102SDimitry Andric /// Return the Phi register value that comes from the incoming block.
getInitPhiReg(MachineInstr & Phi,MachineBasicBlock * LoopBB)561d5ae102SDimitry Andric static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
571d5ae102SDimitry Andric for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
581d5ae102SDimitry Andric if (Phi.getOperand(i + 1).getMBB() != LoopBB)
591d5ae102SDimitry Andric return Phi.getOperand(i).getReg();
601d5ae102SDimitry Andric return 0;
611d5ae102SDimitry Andric }
621d5ae102SDimitry Andric
631d5ae102SDimitry Andric /// Return the Phi register value that comes the loop block.
getLoopPhiReg(MachineInstr & Phi,MachineBasicBlock * LoopBB)641d5ae102SDimitry Andric static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
651d5ae102SDimitry Andric for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
661d5ae102SDimitry Andric if (Phi.getOperand(i + 1).getMBB() == LoopBB)
671d5ae102SDimitry Andric return Phi.getOperand(i).getReg();
681d5ae102SDimitry Andric return 0;
691d5ae102SDimitry Andric }
701d5ae102SDimitry Andric
expand()711d5ae102SDimitry Andric void ModuloScheduleExpander::expand() {
721d5ae102SDimitry Andric BB = Schedule.getLoop()->getTopBlock();
731d5ae102SDimitry Andric Preheader = *BB->pred_begin();
741d5ae102SDimitry Andric if (Preheader == BB)
751d5ae102SDimitry Andric Preheader = *std::next(BB->pred_begin());
761d5ae102SDimitry Andric
771d5ae102SDimitry Andric // Iterate over the definitions in each instruction, and compute the
781d5ae102SDimitry Andric // stage difference for each use. Keep the maximum value.
791d5ae102SDimitry Andric for (MachineInstr *MI : Schedule.getInstructions()) {
801d5ae102SDimitry Andric int DefStage = Schedule.getStage(MI);
817fa27ce4SDimitry Andric for (const MachineOperand &Op : MI->all_defs()) {
821d5ae102SDimitry Andric Register Reg = Op.getReg();
831d5ae102SDimitry Andric unsigned MaxDiff = 0;
841d5ae102SDimitry Andric bool PhiIsSwapped = false;
85c0981da4SDimitry Andric for (MachineOperand &UseOp : MRI.use_operands(Reg)) {
861d5ae102SDimitry Andric MachineInstr *UseMI = UseOp.getParent();
871d5ae102SDimitry Andric int UseStage = Schedule.getStage(UseMI);
881d5ae102SDimitry Andric unsigned Diff = 0;
891d5ae102SDimitry Andric if (UseStage != -1 && UseStage >= DefStage)
901d5ae102SDimitry Andric Diff = UseStage - DefStage;
911d5ae102SDimitry Andric if (MI->isPHI()) {
921d5ae102SDimitry Andric if (isLoopCarried(*MI))
931d5ae102SDimitry Andric ++Diff;
941d5ae102SDimitry Andric else
951d5ae102SDimitry Andric PhiIsSwapped = true;
961d5ae102SDimitry Andric }
971d5ae102SDimitry Andric MaxDiff = std::max(Diff, MaxDiff);
981d5ae102SDimitry Andric }
991d5ae102SDimitry Andric RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
1001d5ae102SDimitry Andric }
1011d5ae102SDimitry Andric }
1021d5ae102SDimitry Andric
1031d5ae102SDimitry Andric generatePipelinedLoop();
1041d5ae102SDimitry Andric }
1051d5ae102SDimitry Andric
generatePipelinedLoop()1061d5ae102SDimitry Andric void ModuloScheduleExpander::generatePipelinedLoop() {
1071d5ae102SDimitry Andric LoopInfo = TII->analyzeLoopForPipelining(BB);
1081d5ae102SDimitry Andric assert(LoopInfo && "Must be able to analyze loop!");
1091d5ae102SDimitry Andric
1101d5ae102SDimitry Andric // Create a new basic block for the kernel and add it to the CFG.
1111d5ae102SDimitry Andric MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
1121d5ae102SDimitry Andric
1131d5ae102SDimitry Andric unsigned MaxStageCount = Schedule.getNumStages() - 1;
1141d5ae102SDimitry Andric
1151d5ae102SDimitry Andric // Remember the registers that are used in different stages. The index is
1161d5ae102SDimitry Andric // the iteration, or stage, that the instruction is scheduled in. This is
1171d5ae102SDimitry Andric // a map between register names in the original block and the names created
1181d5ae102SDimitry Andric // in each stage of the pipelined loop.
1191d5ae102SDimitry Andric ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
120e3b55780SDimitry Andric
121e3b55780SDimitry Andric // The renaming destination by Phis for the registers across stages.
122e3b55780SDimitry Andric // This map is updated during Phis generation to point to the most recent
123e3b55780SDimitry Andric // renaming destination.
124e3b55780SDimitry Andric ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
125e3b55780SDimitry Andric
1261d5ae102SDimitry Andric InstrMapTy InstrMap;
1271d5ae102SDimitry Andric
1281d5ae102SDimitry Andric SmallVector<MachineBasicBlock *, 4> PrologBBs;
1291d5ae102SDimitry Andric
1301d5ae102SDimitry Andric // Generate the prolog instructions that set up the pipeline.
1311d5ae102SDimitry Andric generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
1321d5ae102SDimitry Andric MF.insert(BB->getIterator(), KernelBB);
1331de139fdSDimitry Andric LIS.insertMBBInMaps(KernelBB);
1341d5ae102SDimitry Andric
1351d5ae102SDimitry Andric // Rearrange the instructions to generate the new, pipelined loop,
1361d5ae102SDimitry Andric // and update register names as needed.
1371d5ae102SDimitry Andric for (MachineInstr *CI : Schedule.getInstructions()) {
1381d5ae102SDimitry Andric if (CI->isPHI())
1391d5ae102SDimitry Andric continue;
1401d5ae102SDimitry Andric unsigned StageNum = Schedule.getStage(CI);
1411d5ae102SDimitry Andric MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
1421d5ae102SDimitry Andric updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
1431d5ae102SDimitry Andric KernelBB->push_back(NewMI);
1441d5ae102SDimitry Andric InstrMap[NewMI] = CI;
1451d5ae102SDimitry Andric }
1461d5ae102SDimitry Andric
1471d5ae102SDimitry Andric // Copy any terminator instructions to the new kernel, and update
1481d5ae102SDimitry Andric // names as needed.
149c0981da4SDimitry Andric for (MachineInstr &MI : BB->terminators()) {
150c0981da4SDimitry Andric MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
1511d5ae102SDimitry Andric updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
1521d5ae102SDimitry Andric KernelBB->push_back(NewMI);
153c0981da4SDimitry Andric InstrMap[NewMI] = &MI;
1541d5ae102SDimitry Andric }
1551d5ae102SDimitry Andric
1561d5ae102SDimitry Andric NewKernel = KernelBB;
1571d5ae102SDimitry Andric KernelBB->transferSuccessors(BB);
1581d5ae102SDimitry Andric KernelBB->replaceSuccessor(BB, KernelBB);
1591d5ae102SDimitry Andric
1601d5ae102SDimitry Andric generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
1611d5ae102SDimitry Andric InstrMap, MaxStageCount, MaxStageCount, false);
162e3b55780SDimitry Andric generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
163e3b55780SDimitry Andric InstrMap, MaxStageCount, MaxStageCount, false);
1641d5ae102SDimitry Andric
1651d5ae102SDimitry Andric LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
1661d5ae102SDimitry Andric
1671d5ae102SDimitry Andric SmallVector<MachineBasicBlock *, 4> EpilogBBs;
1681d5ae102SDimitry Andric // Generate the epilog instructions to complete the pipeline.
169e3b55780SDimitry Andric generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
170e3b55780SDimitry Andric PrologBBs);
1711d5ae102SDimitry Andric
1721d5ae102SDimitry Andric // We need this step because the register allocation doesn't handle some
1731d5ae102SDimitry Andric // situations well, so we insert copies to help out.
1741d5ae102SDimitry Andric splitLifetimes(KernelBB, EpilogBBs);
1751d5ae102SDimitry Andric
1761d5ae102SDimitry Andric // Remove dead instructions due to loop induction variables.
1771d5ae102SDimitry Andric removeDeadInstructions(KernelBB, EpilogBBs);
1781d5ae102SDimitry Andric
1791d5ae102SDimitry Andric // Add branches between prolog and epilog blocks.
1801d5ae102SDimitry Andric addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
1811d5ae102SDimitry Andric
1821d5ae102SDimitry Andric delete[] VRMap;
183e3b55780SDimitry Andric delete[] VRMapPhi;
1841d5ae102SDimitry Andric }
1851d5ae102SDimitry Andric
cleanup()1861d5ae102SDimitry Andric void ModuloScheduleExpander::cleanup() {
1871d5ae102SDimitry Andric // Remove the original loop since it's no longer referenced.
1881d5ae102SDimitry Andric for (auto &I : *BB)
1891d5ae102SDimitry Andric LIS.RemoveMachineInstrFromMaps(I);
1901d5ae102SDimitry Andric BB->clear();
1911d5ae102SDimitry Andric BB->eraseFromParent();
1921d5ae102SDimitry Andric }
1931d5ae102SDimitry Andric
1941d5ae102SDimitry Andric /// Generate the pipeline prolog code.
generateProlog(unsigned LastStage,MachineBasicBlock * KernelBB,ValueMapTy * VRMap,MBBVectorTy & PrologBBs)1951d5ae102SDimitry Andric void ModuloScheduleExpander::generateProlog(unsigned LastStage,
1961d5ae102SDimitry Andric MachineBasicBlock *KernelBB,
1971d5ae102SDimitry Andric ValueMapTy *VRMap,
1981d5ae102SDimitry Andric MBBVectorTy &PrologBBs) {
1991d5ae102SDimitry Andric MachineBasicBlock *PredBB = Preheader;
2001d5ae102SDimitry Andric InstrMapTy InstrMap;
2011d5ae102SDimitry Andric
2021d5ae102SDimitry Andric // Generate a basic block for each stage, not including the last stage,
2031d5ae102SDimitry Andric // which will be generated in the kernel. Each basic block may contain
2041d5ae102SDimitry Andric // instructions from multiple stages/iterations.
2051d5ae102SDimitry Andric for (unsigned i = 0; i < LastStage; ++i) {
2061d5ae102SDimitry Andric // Create and insert the prolog basic block prior to the original loop
2071d5ae102SDimitry Andric // basic block. The original loop is removed later.
2081d5ae102SDimitry Andric MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
2091d5ae102SDimitry Andric PrologBBs.push_back(NewBB);
2101d5ae102SDimitry Andric MF.insert(BB->getIterator(), NewBB);
2111d5ae102SDimitry Andric NewBB->transferSuccessors(PredBB);
2121d5ae102SDimitry Andric PredBB->addSuccessor(NewBB);
2131d5ae102SDimitry Andric PredBB = NewBB;
2141de139fdSDimitry Andric LIS.insertMBBInMaps(NewBB);
2151d5ae102SDimitry Andric
2161d5ae102SDimitry Andric // Generate instructions for each appropriate stage. Process instructions
2171d5ae102SDimitry Andric // in original program order.
2181d5ae102SDimitry Andric for (int StageNum = i; StageNum >= 0; --StageNum) {
2191d5ae102SDimitry Andric for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
2201d5ae102SDimitry Andric BBE = BB->getFirstTerminator();
2211d5ae102SDimitry Andric BBI != BBE; ++BBI) {
2221d5ae102SDimitry Andric if (Schedule.getStage(&*BBI) == StageNum) {
2231d5ae102SDimitry Andric if (BBI->isPHI())
2241d5ae102SDimitry Andric continue;
2251d5ae102SDimitry Andric MachineInstr *NewMI =
2261d5ae102SDimitry Andric cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
2271d5ae102SDimitry Andric updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
2281d5ae102SDimitry Andric NewBB->push_back(NewMI);
2291d5ae102SDimitry Andric InstrMap[NewMI] = &*BBI;
2301d5ae102SDimitry Andric }
2311d5ae102SDimitry Andric }
2321d5ae102SDimitry Andric }
2331d5ae102SDimitry Andric rewritePhiValues(NewBB, i, VRMap, InstrMap);
2341d5ae102SDimitry Andric LLVM_DEBUG({
2351d5ae102SDimitry Andric dbgs() << "prolog:\n";
2361d5ae102SDimitry Andric NewBB->dump();
2371d5ae102SDimitry Andric });
2381d5ae102SDimitry Andric }
2391d5ae102SDimitry Andric
2401d5ae102SDimitry Andric PredBB->replaceSuccessor(BB, KernelBB);
2411d5ae102SDimitry Andric
2421d5ae102SDimitry Andric // Check if we need to remove the branch from the preheader to the original
2431d5ae102SDimitry Andric // loop, and replace it with a branch to the new loop.
2441d5ae102SDimitry Andric unsigned numBranches = TII->removeBranch(*Preheader);
2451d5ae102SDimitry Andric if (numBranches) {
2461d5ae102SDimitry Andric SmallVector<MachineOperand, 0> Cond;
2471d5ae102SDimitry Andric TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
2481d5ae102SDimitry Andric }
2491d5ae102SDimitry Andric }
2501d5ae102SDimitry Andric
2511d5ae102SDimitry Andric /// Generate the pipeline epilog code. The epilog code finishes the iterations
2521d5ae102SDimitry Andric /// that were started in either the prolog or the kernel. We create a basic
2531d5ae102SDimitry Andric /// block for each stage that needs to complete.
generateEpilog(unsigned LastStage,MachineBasicBlock * KernelBB,MachineBasicBlock * OrigBB,ValueMapTy * VRMap,ValueMapTy * VRMapPhi,MBBVectorTy & EpilogBBs,MBBVectorTy & PrologBBs)254145449b1SDimitry Andric void ModuloScheduleExpander::generateEpilog(
255145449b1SDimitry Andric unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
256e3b55780SDimitry Andric ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
257e3b55780SDimitry Andric MBBVectorTy &PrologBBs) {
2581d5ae102SDimitry Andric // We need to change the branch from the kernel to the first epilog block, so
2591d5ae102SDimitry Andric // this call to analyze branch uses the kernel rather than the original BB.
2601d5ae102SDimitry Andric MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2611d5ae102SDimitry Andric SmallVector<MachineOperand, 4> Cond;
2621d5ae102SDimitry Andric bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
2631d5ae102SDimitry Andric assert(!checkBranch && "generateEpilog must be able to analyze the branch");
2641d5ae102SDimitry Andric if (checkBranch)
2651d5ae102SDimitry Andric return;
2661d5ae102SDimitry Andric
2671d5ae102SDimitry Andric MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
2681d5ae102SDimitry Andric if (*LoopExitI == KernelBB)
2691d5ae102SDimitry Andric ++LoopExitI;
2701d5ae102SDimitry Andric assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
2711d5ae102SDimitry Andric MachineBasicBlock *LoopExitBB = *LoopExitI;
2721d5ae102SDimitry Andric
2731d5ae102SDimitry Andric MachineBasicBlock *PredBB = KernelBB;
2741d5ae102SDimitry Andric MachineBasicBlock *EpilogStart = LoopExitBB;
2751d5ae102SDimitry Andric InstrMapTy InstrMap;
2761d5ae102SDimitry Andric
2771d5ae102SDimitry Andric // Generate a basic block for each stage, not including the last stage,
2781d5ae102SDimitry Andric // which was generated for the kernel. Each basic block may contain
2791d5ae102SDimitry Andric // instructions from multiple stages/iterations.
2801d5ae102SDimitry Andric int EpilogStage = LastStage + 1;
2811d5ae102SDimitry Andric for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
2821d5ae102SDimitry Andric MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
2831d5ae102SDimitry Andric EpilogBBs.push_back(NewBB);
2841d5ae102SDimitry Andric MF.insert(BB->getIterator(), NewBB);
2851d5ae102SDimitry Andric
2861d5ae102SDimitry Andric PredBB->replaceSuccessor(LoopExitBB, NewBB);
2871d5ae102SDimitry Andric NewBB->addSuccessor(LoopExitBB);
2881de139fdSDimitry Andric LIS.insertMBBInMaps(NewBB);
2891d5ae102SDimitry Andric
2901d5ae102SDimitry Andric if (EpilogStart == LoopExitBB)
2911d5ae102SDimitry Andric EpilogStart = NewBB;
2921d5ae102SDimitry Andric
2931d5ae102SDimitry Andric // Add instructions to the epilog depending on the current block.
2941d5ae102SDimitry Andric // Process instructions in original program order.
2951d5ae102SDimitry Andric for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
2961d5ae102SDimitry Andric for (auto &BBI : *BB) {
2971d5ae102SDimitry Andric if (BBI.isPHI())
2981d5ae102SDimitry Andric continue;
2991d5ae102SDimitry Andric MachineInstr *In = &BBI;
3001d5ae102SDimitry Andric if ((unsigned)Schedule.getStage(In) == StageNum) {
3011d5ae102SDimitry Andric // Instructions with memoperands in the epilog are updated with
3021d5ae102SDimitry Andric // conservative values.
3031d5ae102SDimitry Andric MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
3041d5ae102SDimitry Andric updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
3051d5ae102SDimitry Andric NewBB->push_back(NewMI);
3061d5ae102SDimitry Andric InstrMap[NewMI] = In;
3071d5ae102SDimitry Andric }
3081d5ae102SDimitry Andric }
3091d5ae102SDimitry Andric }
3101d5ae102SDimitry Andric generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
3111d5ae102SDimitry Andric InstrMap, LastStage, EpilogStage, i == 1);
312e3b55780SDimitry Andric generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
313e3b55780SDimitry Andric InstrMap, LastStage, EpilogStage, i == 1);
3141d5ae102SDimitry Andric PredBB = NewBB;
3151d5ae102SDimitry Andric
3161d5ae102SDimitry Andric LLVM_DEBUG({
3171d5ae102SDimitry Andric dbgs() << "epilog:\n";
3181d5ae102SDimitry Andric NewBB->dump();
3191d5ae102SDimitry Andric });
3201d5ae102SDimitry Andric }
3211d5ae102SDimitry Andric
3221d5ae102SDimitry Andric // Fix any Phi nodes in the loop exit block.
3231d5ae102SDimitry Andric LoopExitBB->replacePhiUsesWith(BB, PredBB);
3241d5ae102SDimitry Andric
3251d5ae102SDimitry Andric // Create a branch to the new epilog from the kernel.
3261d5ae102SDimitry Andric // Remove the original branch and add a new branch to the epilog.
3271d5ae102SDimitry Andric TII->removeBranch(*KernelBB);
328145449b1SDimitry Andric assert((OrigBB == TBB || OrigBB == FBB) &&
329145449b1SDimitry Andric "Unable to determine looping branch direction");
330145449b1SDimitry Andric if (OrigBB != TBB)
331145449b1SDimitry Andric TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
332145449b1SDimitry Andric else
3331d5ae102SDimitry Andric TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
3341d5ae102SDimitry Andric // Add a branch to the loop exit.
3351d5ae102SDimitry Andric if (EpilogBBs.size() > 0) {
3361d5ae102SDimitry Andric MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
3371d5ae102SDimitry Andric SmallVector<MachineOperand, 4> Cond1;
3381d5ae102SDimitry Andric TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
3391d5ae102SDimitry Andric }
3401d5ae102SDimitry Andric }
3411d5ae102SDimitry Andric
3421d5ae102SDimitry Andric /// Replace all uses of FromReg that appear outside the specified
3431d5ae102SDimitry Andric /// basic block with ToReg.
replaceRegUsesAfterLoop(unsigned FromReg,unsigned ToReg,MachineBasicBlock * MBB,MachineRegisterInfo & MRI,LiveIntervals & LIS)3441d5ae102SDimitry Andric static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
3451d5ae102SDimitry Andric MachineBasicBlock *MBB,
3461d5ae102SDimitry Andric MachineRegisterInfo &MRI,
3471d5ae102SDimitry Andric LiveIntervals &LIS) {
348c0981da4SDimitry Andric for (MachineOperand &O :
349c0981da4SDimitry Andric llvm::make_early_inc_range(MRI.use_operands(FromReg)))
3501d5ae102SDimitry Andric if (O.getParent()->getParent() != MBB)
3511d5ae102SDimitry Andric O.setReg(ToReg);
3521d5ae102SDimitry Andric if (!LIS.hasInterval(ToReg))
3531d5ae102SDimitry Andric LIS.createEmptyInterval(ToReg);
3541d5ae102SDimitry Andric }
3551d5ae102SDimitry Andric
3561d5ae102SDimitry Andric /// Return true if the register has a use that occurs outside the
3571d5ae102SDimitry Andric /// specified loop.
hasUseAfterLoop(unsigned Reg,MachineBasicBlock * BB,MachineRegisterInfo & MRI)3581d5ae102SDimitry Andric static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
3591d5ae102SDimitry Andric MachineRegisterInfo &MRI) {
360c0981da4SDimitry Andric for (const MachineOperand &MO : MRI.use_operands(Reg))
361c0981da4SDimitry Andric if (MO.getParent()->getParent() != BB)
3621d5ae102SDimitry Andric return true;
3631d5ae102SDimitry Andric return false;
3641d5ae102SDimitry Andric }
3651d5ae102SDimitry Andric
3661d5ae102SDimitry Andric /// Generate Phis for the specific block in the generated pipelined code.
3671d5ae102SDimitry Andric /// This function looks at the Phis from the original code to guide the
3681d5ae102SDimitry Andric /// creation of new Phis.
generateExistingPhis(MachineBasicBlock * NewBB,MachineBasicBlock * BB1,MachineBasicBlock * BB2,MachineBasicBlock * KernelBB,ValueMapTy * VRMap,InstrMapTy & InstrMap,unsigned LastStageNum,unsigned CurStageNum,bool IsLast)3691d5ae102SDimitry Andric void ModuloScheduleExpander::generateExistingPhis(
3701d5ae102SDimitry Andric MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
3711d5ae102SDimitry Andric MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
3721d5ae102SDimitry Andric unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
3731d5ae102SDimitry Andric // Compute the stage number for the initial value of the Phi, which
3741d5ae102SDimitry Andric // comes from the prolog. The prolog to use depends on to which kernel/
3751d5ae102SDimitry Andric // epilog that we're adding the Phi.
3761d5ae102SDimitry Andric unsigned PrologStage = 0;
3771d5ae102SDimitry Andric unsigned PrevStage = 0;
3781d5ae102SDimitry Andric bool InKernel = (LastStageNum == CurStageNum);
3791d5ae102SDimitry Andric if (InKernel) {
3801d5ae102SDimitry Andric PrologStage = LastStageNum - 1;
3811d5ae102SDimitry Andric PrevStage = CurStageNum;
3821d5ae102SDimitry Andric } else {
3831d5ae102SDimitry Andric PrologStage = LastStageNum - (CurStageNum - LastStageNum);
3841d5ae102SDimitry Andric PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
3851d5ae102SDimitry Andric }
3861d5ae102SDimitry Andric
3871d5ae102SDimitry Andric for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
3881d5ae102SDimitry Andric BBE = BB->getFirstNonPHI();
3891d5ae102SDimitry Andric BBI != BBE; ++BBI) {
3901d5ae102SDimitry Andric Register Def = BBI->getOperand(0).getReg();
3911d5ae102SDimitry Andric
3921d5ae102SDimitry Andric unsigned InitVal = 0;
3931d5ae102SDimitry Andric unsigned LoopVal = 0;
3941d5ae102SDimitry Andric getPhiRegs(*BBI, BB, InitVal, LoopVal);
3951d5ae102SDimitry Andric
3961d5ae102SDimitry Andric unsigned PhiOp1 = 0;
3971d5ae102SDimitry Andric // The Phi value from the loop body typically is defined in the loop, but
3981d5ae102SDimitry Andric // not always. So, we need to check if the value is defined in the loop.
3991d5ae102SDimitry Andric unsigned PhiOp2 = LoopVal;
4001d5ae102SDimitry Andric if (VRMap[LastStageNum].count(LoopVal))
4011d5ae102SDimitry Andric PhiOp2 = VRMap[LastStageNum][LoopVal];
4021d5ae102SDimitry Andric
4031d5ae102SDimitry Andric int StageScheduled = Schedule.getStage(&*BBI);
4041d5ae102SDimitry Andric int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
4051d5ae102SDimitry Andric unsigned NumStages = getStagesForReg(Def, CurStageNum);
4061d5ae102SDimitry Andric if (NumStages == 0) {
4071d5ae102SDimitry Andric // We don't need to generate a Phi anymore, but we need to rename any uses
4081d5ae102SDimitry Andric // of the Phi value.
4091d5ae102SDimitry Andric unsigned NewReg = VRMap[PrevStage][LoopVal];
4101d5ae102SDimitry Andric rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
4111d5ae102SDimitry Andric InitVal, NewReg);
4121d5ae102SDimitry Andric if (VRMap[CurStageNum].count(LoopVal))
4131d5ae102SDimitry Andric VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
4141d5ae102SDimitry Andric }
4151d5ae102SDimitry Andric // Adjust the number of Phis needed depending on the number of prologs left,
4161d5ae102SDimitry Andric // and the distance from where the Phi is first scheduled. The number of
4171d5ae102SDimitry Andric // Phis cannot exceed the number of prolog stages. Each stage can
4181d5ae102SDimitry Andric // potentially define two values.
4191d5ae102SDimitry Andric unsigned MaxPhis = PrologStage + 2;
4201d5ae102SDimitry Andric if (!InKernel && (int)PrologStage <= LoopValStage)
4211d5ae102SDimitry Andric MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
4221d5ae102SDimitry Andric unsigned NumPhis = std::min(NumStages, MaxPhis);
4231d5ae102SDimitry Andric
4241d5ae102SDimitry Andric unsigned NewReg = 0;
4251d5ae102SDimitry Andric unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
4261d5ae102SDimitry Andric // In the epilog, we may need to look back one stage to get the correct
427cfca06d7SDimitry Andric // Phi name, because the epilog and prolog blocks execute the same stage.
4281d5ae102SDimitry Andric // The correct name is from the previous block only when the Phi has
4291d5ae102SDimitry Andric // been completely scheduled prior to the epilog, and Phi value is not
4301d5ae102SDimitry Andric // needed in multiple stages.
4311d5ae102SDimitry Andric int StageDiff = 0;
4321d5ae102SDimitry Andric if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
4331d5ae102SDimitry Andric NumPhis == 1)
4341d5ae102SDimitry Andric StageDiff = 1;
4351d5ae102SDimitry Andric // Adjust the computations below when the phi and the loop definition
4361d5ae102SDimitry Andric // are scheduled in different stages.
4371d5ae102SDimitry Andric if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
4381d5ae102SDimitry Andric StageDiff = StageScheduled - LoopValStage;
4391d5ae102SDimitry Andric for (unsigned np = 0; np < NumPhis; ++np) {
4401d5ae102SDimitry Andric // If the Phi hasn't been scheduled, then use the initial Phi operand
4411d5ae102SDimitry Andric // value. Otherwise, use the scheduled version of the instruction. This
4421d5ae102SDimitry Andric // is a little complicated when a Phi references another Phi.
4431d5ae102SDimitry Andric if (np > PrologStage || StageScheduled >= (int)LastStageNum)
4441d5ae102SDimitry Andric PhiOp1 = InitVal;
4451d5ae102SDimitry Andric // Check if the Phi has already been scheduled in a prolog stage.
4461d5ae102SDimitry Andric else if (PrologStage >= AccessStage + StageDiff + np &&
4471d5ae102SDimitry Andric VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
4481d5ae102SDimitry Andric PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
4491d5ae102SDimitry Andric // Check if the Phi has already been scheduled, but the loop instruction
4501d5ae102SDimitry Andric // is either another Phi, or doesn't occur in the loop.
4511d5ae102SDimitry Andric else if (PrologStage >= AccessStage + StageDiff + np) {
4521d5ae102SDimitry Andric // If the Phi references another Phi, we need to examine the other
4531d5ae102SDimitry Andric // Phi to get the correct value.
4541d5ae102SDimitry Andric PhiOp1 = LoopVal;
4551d5ae102SDimitry Andric MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
4561d5ae102SDimitry Andric int Indirects = 1;
4571d5ae102SDimitry Andric while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
4581d5ae102SDimitry Andric int PhiStage = Schedule.getStage(InstOp1);
4591d5ae102SDimitry Andric if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
4601d5ae102SDimitry Andric PhiOp1 = getInitPhiReg(*InstOp1, BB);
4611d5ae102SDimitry Andric else
4621d5ae102SDimitry Andric PhiOp1 = getLoopPhiReg(*InstOp1, BB);
4631d5ae102SDimitry Andric InstOp1 = MRI.getVRegDef(PhiOp1);
4641d5ae102SDimitry Andric int PhiOpStage = Schedule.getStage(InstOp1);
4651d5ae102SDimitry Andric int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
4661d5ae102SDimitry Andric if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
4671d5ae102SDimitry Andric VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
4681d5ae102SDimitry Andric PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
4691d5ae102SDimitry Andric break;
4701d5ae102SDimitry Andric }
4711d5ae102SDimitry Andric ++Indirects;
4721d5ae102SDimitry Andric }
4731d5ae102SDimitry Andric } else
4741d5ae102SDimitry Andric PhiOp1 = InitVal;
4751d5ae102SDimitry Andric // If this references a generated Phi in the kernel, get the Phi operand
4761d5ae102SDimitry Andric // from the incoming block.
4771d5ae102SDimitry Andric if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
4781d5ae102SDimitry Andric if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
4791d5ae102SDimitry Andric PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
4801d5ae102SDimitry Andric
4811d5ae102SDimitry Andric MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
4821d5ae102SDimitry Andric bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
4831d5ae102SDimitry Andric // In the epilog, a map lookup is needed to get the value from the kernel,
4841d5ae102SDimitry Andric // or previous epilog block. How is does this depends on if the
4851d5ae102SDimitry Andric // instruction is scheduled in the previous block.
4861d5ae102SDimitry Andric if (!InKernel) {
4871d5ae102SDimitry Andric int StageDiffAdj = 0;
4881d5ae102SDimitry Andric if (LoopValStage != -1 && StageScheduled > LoopValStage)
4891d5ae102SDimitry Andric StageDiffAdj = StageScheduled - LoopValStage;
4901d5ae102SDimitry Andric // Use the loop value defined in the kernel, unless the kernel
4911d5ae102SDimitry Andric // contains the last definition of the Phi.
4921d5ae102SDimitry Andric if (np == 0 && PrevStage == LastStageNum &&
4931d5ae102SDimitry Andric (StageScheduled != 0 || LoopValStage != 0) &&
4941d5ae102SDimitry Andric VRMap[PrevStage - StageDiffAdj].count(LoopVal))
4951d5ae102SDimitry Andric PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
4961d5ae102SDimitry Andric // Use the value defined by the Phi. We add one because we switch
4971d5ae102SDimitry Andric // from looking at the loop value to the Phi definition.
4981d5ae102SDimitry Andric else if (np > 0 && PrevStage == LastStageNum &&
4991d5ae102SDimitry Andric VRMap[PrevStage - np + 1].count(Def))
5001d5ae102SDimitry Andric PhiOp2 = VRMap[PrevStage - np + 1][Def];
5011d5ae102SDimitry Andric // Use the loop value defined in the kernel.
5021d5ae102SDimitry Andric else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
5031d5ae102SDimitry Andric VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
5041d5ae102SDimitry Andric PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
5051d5ae102SDimitry Andric // Use the value defined by the Phi, unless we're generating the first
5061d5ae102SDimitry Andric // epilog and the Phi refers to a Phi in a different stage.
5071d5ae102SDimitry Andric else if (VRMap[PrevStage - np].count(Def) &&
5081d5ae102SDimitry Andric (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
5091d5ae102SDimitry Andric (LoopValStage == StageScheduled)))
5101d5ae102SDimitry Andric PhiOp2 = VRMap[PrevStage - np][Def];
5111d5ae102SDimitry Andric }
5121d5ae102SDimitry Andric
5131d5ae102SDimitry Andric // Check if we can reuse an existing Phi. This occurs when a Phi
5141d5ae102SDimitry Andric // references another Phi, and the other Phi is scheduled in an
5151d5ae102SDimitry Andric // earlier stage. We can try to reuse an existing Phi up until the last
5161d5ae102SDimitry Andric // stage of the current Phi.
5171d5ae102SDimitry Andric if (LoopDefIsPhi) {
5181d5ae102SDimitry Andric if (static_cast<int>(PrologStage - np) >= StageScheduled) {
5191d5ae102SDimitry Andric int LVNumStages = getStagesForPhi(LoopVal);
5201d5ae102SDimitry Andric int StageDiff = (StageScheduled - LoopValStage);
5211d5ae102SDimitry Andric LVNumStages -= StageDiff;
5221d5ae102SDimitry Andric // Make sure the loop value Phi has been processed already.
5231d5ae102SDimitry Andric if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
5241d5ae102SDimitry Andric NewReg = PhiOp2;
5251d5ae102SDimitry Andric unsigned ReuseStage = CurStageNum;
5261d5ae102SDimitry Andric if (isLoopCarried(*PhiInst))
5271d5ae102SDimitry Andric ReuseStage -= LVNumStages;
5281d5ae102SDimitry Andric // Check if the Phi to reuse has been generated yet. If not, then
5291d5ae102SDimitry Andric // there is nothing to reuse.
5301d5ae102SDimitry Andric if (VRMap[ReuseStage - np].count(LoopVal)) {
5311d5ae102SDimitry Andric NewReg = VRMap[ReuseStage - np][LoopVal];
5321d5ae102SDimitry Andric
5331d5ae102SDimitry Andric rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
5341d5ae102SDimitry Andric Def, NewReg);
5351d5ae102SDimitry Andric // Update the map with the new Phi name.
5361d5ae102SDimitry Andric VRMap[CurStageNum - np][Def] = NewReg;
5371d5ae102SDimitry Andric PhiOp2 = NewReg;
5381d5ae102SDimitry Andric if (VRMap[LastStageNum - np - 1].count(LoopVal))
5391d5ae102SDimitry Andric PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
5401d5ae102SDimitry Andric
5411d5ae102SDimitry Andric if (IsLast && np == NumPhis - 1)
5421d5ae102SDimitry Andric replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
5431d5ae102SDimitry Andric continue;
5441d5ae102SDimitry Andric }
5451d5ae102SDimitry Andric }
5461d5ae102SDimitry Andric }
5471d5ae102SDimitry Andric if (InKernel && StageDiff > 0 &&
5481d5ae102SDimitry Andric VRMap[CurStageNum - StageDiff - np].count(LoopVal))
5491d5ae102SDimitry Andric PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
5501d5ae102SDimitry Andric }
5511d5ae102SDimitry Andric
5521d5ae102SDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(Def);
5531d5ae102SDimitry Andric NewReg = MRI.createVirtualRegister(RC);
5541d5ae102SDimitry Andric
5551d5ae102SDimitry Andric MachineInstrBuilder NewPhi =
5561d5ae102SDimitry Andric BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
5571d5ae102SDimitry Andric TII->get(TargetOpcode::PHI), NewReg);
5581d5ae102SDimitry Andric NewPhi.addReg(PhiOp1).addMBB(BB1);
5591d5ae102SDimitry Andric NewPhi.addReg(PhiOp2).addMBB(BB2);
5601d5ae102SDimitry Andric if (np == 0)
5611d5ae102SDimitry Andric InstrMap[NewPhi] = &*BBI;
5621d5ae102SDimitry Andric
5631d5ae102SDimitry Andric // We define the Phis after creating the new pipelined code, so
5641d5ae102SDimitry Andric // we need to rename the Phi values in scheduled instructions.
5651d5ae102SDimitry Andric
5661d5ae102SDimitry Andric unsigned PrevReg = 0;
5671d5ae102SDimitry Andric if (InKernel && VRMap[PrevStage - np].count(LoopVal))
5681d5ae102SDimitry Andric PrevReg = VRMap[PrevStage - np][LoopVal];
5691d5ae102SDimitry Andric rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
5701d5ae102SDimitry Andric NewReg, PrevReg);
5711d5ae102SDimitry Andric // If the Phi has been scheduled, use the new name for rewriting.
5721d5ae102SDimitry Andric if (VRMap[CurStageNum - np].count(Def)) {
5731d5ae102SDimitry Andric unsigned R = VRMap[CurStageNum - np][Def];
5741d5ae102SDimitry Andric rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
5751d5ae102SDimitry Andric NewReg);
5761d5ae102SDimitry Andric }
5771d5ae102SDimitry Andric
5781d5ae102SDimitry Andric // Check if we need to rename any uses that occurs after the loop. The
5791d5ae102SDimitry Andric // register to replace depends on whether the Phi is scheduled in the
5801d5ae102SDimitry Andric // epilog.
5811d5ae102SDimitry Andric if (IsLast && np == NumPhis - 1)
5821d5ae102SDimitry Andric replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
5831d5ae102SDimitry Andric
5841d5ae102SDimitry Andric // In the kernel, a dependent Phi uses the value from this Phi.
5851d5ae102SDimitry Andric if (InKernel)
5861d5ae102SDimitry Andric PhiOp2 = NewReg;
5871d5ae102SDimitry Andric
5881d5ae102SDimitry Andric // Update the map with the new Phi name.
5891d5ae102SDimitry Andric VRMap[CurStageNum - np][Def] = NewReg;
5901d5ae102SDimitry Andric }
5911d5ae102SDimitry Andric
5921d5ae102SDimitry Andric while (NumPhis++ < NumStages) {
5931d5ae102SDimitry Andric rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
5941d5ae102SDimitry Andric NewReg, 0);
5951d5ae102SDimitry Andric }
5961d5ae102SDimitry Andric
5971d5ae102SDimitry Andric // Check if we need to rename a Phi that has been eliminated due to
5981d5ae102SDimitry Andric // scheduling.
5991d5ae102SDimitry Andric if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
6001d5ae102SDimitry Andric replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
6011d5ae102SDimitry Andric }
6021d5ae102SDimitry Andric }
6031d5ae102SDimitry Andric
6041d5ae102SDimitry Andric /// Generate Phis for the specified block in the generated pipelined code.
6051d5ae102SDimitry Andric /// These are new Phis needed because the definition is scheduled after the
6061d5ae102SDimitry Andric /// use in the pipelined sequence.
generatePhis(MachineBasicBlock * NewBB,MachineBasicBlock * BB1,MachineBasicBlock * BB2,MachineBasicBlock * KernelBB,ValueMapTy * VRMap,ValueMapTy * VRMapPhi,InstrMapTy & InstrMap,unsigned LastStageNum,unsigned CurStageNum,bool IsLast)6071d5ae102SDimitry Andric void ModuloScheduleExpander::generatePhis(
6081d5ae102SDimitry Andric MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
609e3b55780SDimitry Andric MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
610e3b55780SDimitry Andric InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
611e3b55780SDimitry Andric bool IsLast) {
6121d5ae102SDimitry Andric // Compute the stage number that contains the initial Phi value, and
6131d5ae102SDimitry Andric // the Phi from the previous stage.
6141d5ae102SDimitry Andric unsigned PrologStage = 0;
6151d5ae102SDimitry Andric unsigned PrevStage = 0;
6161d5ae102SDimitry Andric unsigned StageDiff = CurStageNum - LastStageNum;
6171d5ae102SDimitry Andric bool InKernel = (StageDiff == 0);
6181d5ae102SDimitry Andric if (InKernel) {
6191d5ae102SDimitry Andric PrologStage = LastStageNum - 1;
6201d5ae102SDimitry Andric PrevStage = CurStageNum;
6211d5ae102SDimitry Andric } else {
6221d5ae102SDimitry Andric PrologStage = LastStageNum - StageDiff;
6231d5ae102SDimitry Andric PrevStage = LastStageNum + StageDiff - 1;
6241d5ae102SDimitry Andric }
6251d5ae102SDimitry Andric
6261d5ae102SDimitry Andric for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
6271d5ae102SDimitry Andric BBE = BB->instr_end();
6281d5ae102SDimitry Andric BBI != BBE; ++BBI) {
6291d5ae102SDimitry Andric for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
6301d5ae102SDimitry Andric MachineOperand &MO = BBI->getOperand(i);
631e3b55780SDimitry Andric if (!MO.isReg() || !MO.isDef() || !MO.getReg().isVirtual())
6321d5ae102SDimitry Andric continue;
6331d5ae102SDimitry Andric
6341d5ae102SDimitry Andric int StageScheduled = Schedule.getStage(&*BBI);
6351d5ae102SDimitry Andric assert(StageScheduled != -1 && "Expecting scheduled instruction.");
6361d5ae102SDimitry Andric Register Def = MO.getReg();
6371d5ae102SDimitry Andric unsigned NumPhis = getStagesForReg(Def, CurStageNum);
6381d5ae102SDimitry Andric // An instruction scheduled in stage 0 and is used after the loop
6391d5ae102SDimitry Andric // requires a phi in the epilog for the last definition from either
6401d5ae102SDimitry Andric // the kernel or prolog.
6411d5ae102SDimitry Andric if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
6421d5ae102SDimitry Andric hasUseAfterLoop(Def, BB, MRI))
6431d5ae102SDimitry Andric NumPhis = 1;
6441d5ae102SDimitry Andric if (!InKernel && (unsigned)StageScheduled > PrologStage)
6451d5ae102SDimitry Andric continue;
6461d5ae102SDimitry Andric
647e3b55780SDimitry Andric unsigned PhiOp2;
648e3b55780SDimitry Andric if (InKernel) {
649e3b55780SDimitry Andric PhiOp2 = VRMap[PrevStage][Def];
6501d5ae102SDimitry Andric if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
6511d5ae102SDimitry Andric if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
6521d5ae102SDimitry Andric PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
653e3b55780SDimitry Andric }
6541d5ae102SDimitry Andric // The number of Phis can't exceed the number of prolog stages. The
6551d5ae102SDimitry Andric // prolog stage number is zero based.
6561d5ae102SDimitry Andric if (NumPhis > PrologStage + 1 - StageScheduled)
6571d5ae102SDimitry Andric NumPhis = PrologStage + 1 - StageScheduled;
6581d5ae102SDimitry Andric for (unsigned np = 0; np < NumPhis; ++np) {
659e3b55780SDimitry Andric // Example for
660e3b55780SDimitry Andric // Org:
661e3b55780SDimitry Andric // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
662e3b55780SDimitry Andric //
663e3b55780SDimitry Andric // Prolog0 (Stage0):
664e3b55780SDimitry Andric // %Clone0 = ...
665e3b55780SDimitry Andric // Prolog1 (Stage1):
666e3b55780SDimitry Andric // %Clone1 = ...
667e3b55780SDimitry Andric // Kernel (Stage2):
668e3b55780SDimitry Andric // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
669e3b55780SDimitry Andric // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
670e3b55780SDimitry Andric // %Clone2 = ...
671e3b55780SDimitry Andric // Epilog0 (Stage3):
672e3b55780SDimitry Andric // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
673e3b55780SDimitry Andric // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
674e3b55780SDimitry Andric // Epilog1 (Stage4):
675e3b55780SDimitry Andric // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
676e3b55780SDimitry Andric //
677e3b55780SDimitry Andric // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
678e3b55780SDimitry Andric // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
679e3b55780SDimitry Andric // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
680e3b55780SDimitry Andric
6811d5ae102SDimitry Andric unsigned PhiOp1 = VRMap[PrologStage][Def];
6821d5ae102SDimitry Andric if (np <= PrologStage)
6831d5ae102SDimitry Andric PhiOp1 = VRMap[PrologStage - np][Def];
684e3b55780SDimitry Andric if (!InKernel) {
685e3b55780SDimitry Andric if (PrevStage == LastStageNum && np == 0)
686e3b55780SDimitry Andric PhiOp2 = VRMap[LastStageNum][Def];
687e3b55780SDimitry Andric else
688e3b55780SDimitry Andric PhiOp2 = VRMapPhi[PrevStage - np][Def];
6891d5ae102SDimitry Andric }
6901d5ae102SDimitry Andric
6911d5ae102SDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(Def);
6921d5ae102SDimitry Andric Register NewReg = MRI.createVirtualRegister(RC);
6931d5ae102SDimitry Andric
6941d5ae102SDimitry Andric MachineInstrBuilder NewPhi =
6951d5ae102SDimitry Andric BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
6961d5ae102SDimitry Andric TII->get(TargetOpcode::PHI), NewReg);
6971d5ae102SDimitry Andric NewPhi.addReg(PhiOp1).addMBB(BB1);
6981d5ae102SDimitry Andric NewPhi.addReg(PhiOp2).addMBB(BB2);
6991d5ae102SDimitry Andric if (np == 0)
7001d5ae102SDimitry Andric InstrMap[NewPhi] = &*BBI;
7011d5ae102SDimitry Andric
7021d5ae102SDimitry Andric // Rewrite uses and update the map. The actions depend upon whether
7031d5ae102SDimitry Andric // we generating code for the kernel or epilog blocks.
7041d5ae102SDimitry Andric if (InKernel) {
7051d5ae102SDimitry Andric rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
7061d5ae102SDimitry Andric NewReg);
7071d5ae102SDimitry Andric rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
7081d5ae102SDimitry Andric NewReg);
7091d5ae102SDimitry Andric
7101d5ae102SDimitry Andric PhiOp2 = NewReg;
711e3b55780SDimitry Andric VRMapPhi[PrevStage - np - 1][Def] = NewReg;
7121d5ae102SDimitry Andric } else {
713e3b55780SDimitry Andric VRMapPhi[CurStageNum - np][Def] = NewReg;
7141d5ae102SDimitry Andric if (np == NumPhis - 1)
7151d5ae102SDimitry Andric rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
7161d5ae102SDimitry Andric NewReg);
7171d5ae102SDimitry Andric }
7181d5ae102SDimitry Andric if (IsLast && np == NumPhis - 1)
7191d5ae102SDimitry Andric replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
7201d5ae102SDimitry Andric }
7211d5ae102SDimitry Andric }
7221d5ae102SDimitry Andric }
7231d5ae102SDimitry Andric }
7241d5ae102SDimitry Andric
7251d5ae102SDimitry Andric /// Remove instructions that generate values with no uses.
7261d5ae102SDimitry Andric /// Typically, these are induction variable operations that generate values
7271d5ae102SDimitry Andric /// used in the loop itself. A dead instruction has a definition with
7281d5ae102SDimitry Andric /// no uses, or uses that occur in the original loop only.
removeDeadInstructions(MachineBasicBlock * KernelBB,MBBVectorTy & EpilogBBs)7291d5ae102SDimitry Andric void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
7301d5ae102SDimitry Andric MBBVectorTy &EpilogBBs) {
7311d5ae102SDimitry Andric // For each epilog block, check that the value defined by each instruction
7321d5ae102SDimitry Andric // is used. If not, delete it.
733c0981da4SDimitry Andric for (MachineBasicBlock *MBB : llvm::reverse(EpilogBBs))
734c0981da4SDimitry Andric for (MachineBasicBlock::reverse_instr_iterator MI = MBB->instr_rbegin(),
735c0981da4SDimitry Andric ME = MBB->instr_rend();
7361d5ae102SDimitry Andric MI != ME;) {
7371d5ae102SDimitry Andric // From DeadMachineInstructionElem. Don't delete inline assembly.
7381d5ae102SDimitry Andric if (MI->isInlineAsm()) {
7391d5ae102SDimitry Andric ++MI;
7401d5ae102SDimitry Andric continue;
7411d5ae102SDimitry Andric }
7421d5ae102SDimitry Andric bool SawStore = false;
7431d5ae102SDimitry Andric // Check if it's safe to remove the instruction due to side effects.
7441d5ae102SDimitry Andric // We can, and want to, remove Phis here.
7451d5ae102SDimitry Andric if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
7461d5ae102SDimitry Andric ++MI;
7471d5ae102SDimitry Andric continue;
7481d5ae102SDimitry Andric }
7491d5ae102SDimitry Andric bool used = true;
7507fa27ce4SDimitry Andric for (const MachineOperand &MO : MI->all_defs()) {
751c0981da4SDimitry Andric Register reg = MO.getReg();
7521d5ae102SDimitry Andric // Assume physical registers are used, unless they are marked dead.
753e3b55780SDimitry Andric if (reg.isPhysical()) {
754c0981da4SDimitry Andric used = !MO.isDead();
7551d5ae102SDimitry Andric if (used)
7561d5ae102SDimitry Andric break;
7571d5ae102SDimitry Andric continue;
7581d5ae102SDimitry Andric }
7591d5ae102SDimitry Andric unsigned realUses = 0;
760c0981da4SDimitry Andric for (const MachineOperand &U : MRI.use_operands(reg)) {
7611d5ae102SDimitry Andric // Check if there are any uses that occur only in the original
7621d5ae102SDimitry Andric // loop. If so, that's not a real use.
763c0981da4SDimitry Andric if (U.getParent()->getParent() != BB) {
7641d5ae102SDimitry Andric realUses++;
7651d5ae102SDimitry Andric used = true;
7661d5ae102SDimitry Andric break;
7671d5ae102SDimitry Andric }
7681d5ae102SDimitry Andric }
7691d5ae102SDimitry Andric if (realUses > 0)
7701d5ae102SDimitry Andric break;
7711d5ae102SDimitry Andric used = false;
7721d5ae102SDimitry Andric }
7731d5ae102SDimitry Andric if (!used) {
7741d5ae102SDimitry Andric LIS.RemoveMachineInstrFromMaps(*MI);
7751d5ae102SDimitry Andric MI++->eraseFromParent();
7761d5ae102SDimitry Andric continue;
7771d5ae102SDimitry Andric }
7781d5ae102SDimitry Andric ++MI;
7791d5ae102SDimitry Andric }
7801d5ae102SDimitry Andric // In the kernel block, check if we can remove a Phi that generates a value
7811d5ae102SDimitry Andric // used in an instruction removed in the epilog block.
782c0981da4SDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(KernelBB->phis())) {
783c0981da4SDimitry Andric Register reg = MI.getOperand(0).getReg();
7841d5ae102SDimitry Andric if (MRI.use_begin(reg) == MRI.use_end()) {
785c0981da4SDimitry Andric LIS.RemoveMachineInstrFromMaps(MI);
786c0981da4SDimitry Andric MI.eraseFromParent();
7871d5ae102SDimitry Andric }
7881d5ae102SDimitry Andric }
7891d5ae102SDimitry Andric }
7901d5ae102SDimitry Andric
7911d5ae102SDimitry Andric /// For loop carried definitions, we split the lifetime of a virtual register
7921d5ae102SDimitry Andric /// that has uses past the definition in the next iteration. A copy with a new
7931d5ae102SDimitry Andric /// virtual register is inserted before the definition, which helps with
7941d5ae102SDimitry Andric /// generating a better register assignment.
7951d5ae102SDimitry Andric ///
7961d5ae102SDimitry Andric /// v1 = phi(a, v2) v1 = phi(a, v2)
7971d5ae102SDimitry Andric /// v2 = phi(b, v3) v2 = phi(b, v3)
7981d5ae102SDimitry Andric /// v3 = .. v4 = copy v1
7991d5ae102SDimitry Andric /// .. = V1 v3 = ..
8001d5ae102SDimitry Andric /// .. = v4
splitLifetimes(MachineBasicBlock * KernelBB,MBBVectorTy & EpilogBBs)8011d5ae102SDimitry Andric void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
8021d5ae102SDimitry Andric MBBVectorTy &EpilogBBs) {
8031d5ae102SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8041d5ae102SDimitry Andric for (auto &PHI : KernelBB->phis()) {
8051d5ae102SDimitry Andric Register Def = PHI.getOperand(0).getReg();
8061d5ae102SDimitry Andric // Check for any Phi definition that used as an operand of another Phi
8071d5ae102SDimitry Andric // in the same block.
8081d5ae102SDimitry Andric for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
8091d5ae102SDimitry Andric E = MRI.use_instr_end();
8101d5ae102SDimitry Andric I != E; ++I) {
8111d5ae102SDimitry Andric if (I->isPHI() && I->getParent() == KernelBB) {
8121d5ae102SDimitry Andric // Get the loop carried definition.
8131d5ae102SDimitry Andric unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
8141d5ae102SDimitry Andric if (!LCDef)
8151d5ae102SDimitry Andric continue;
8161d5ae102SDimitry Andric MachineInstr *MI = MRI.getVRegDef(LCDef);
8171d5ae102SDimitry Andric if (!MI || MI->getParent() != KernelBB || MI->isPHI())
8181d5ae102SDimitry Andric continue;
8191d5ae102SDimitry Andric // Search through the rest of the block looking for uses of the Phi
8201d5ae102SDimitry Andric // definition. If one occurs, then split the lifetime.
8211d5ae102SDimitry Andric unsigned SplitReg = 0;
8221d5ae102SDimitry Andric for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
8231d5ae102SDimitry Andric KernelBB->instr_end()))
824ac9a064cSDimitry Andric if (BBJ.readsRegister(Def, /*TRI=*/nullptr)) {
8251d5ae102SDimitry Andric // We split the lifetime when we find the first use.
8261d5ae102SDimitry Andric if (SplitReg == 0) {
8271d5ae102SDimitry Andric SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
8281d5ae102SDimitry Andric BuildMI(*KernelBB, MI, MI->getDebugLoc(),
8291d5ae102SDimitry Andric TII->get(TargetOpcode::COPY), SplitReg)
8301d5ae102SDimitry Andric .addReg(Def);
8311d5ae102SDimitry Andric }
8321d5ae102SDimitry Andric BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
8331d5ae102SDimitry Andric }
8341d5ae102SDimitry Andric if (!SplitReg)
8351d5ae102SDimitry Andric continue;
8361d5ae102SDimitry Andric // Search through each of the epilog blocks for any uses to be renamed.
8371d5ae102SDimitry Andric for (auto &Epilog : EpilogBBs)
8381d5ae102SDimitry Andric for (auto &I : *Epilog)
839ac9a064cSDimitry Andric if (I.readsRegister(Def, /*TRI=*/nullptr))
8401d5ae102SDimitry Andric I.substituteRegister(Def, SplitReg, 0, *TRI);
8411d5ae102SDimitry Andric break;
8421d5ae102SDimitry Andric }
8431d5ae102SDimitry Andric }
8441d5ae102SDimitry Andric }
8451d5ae102SDimitry Andric }
8461d5ae102SDimitry Andric
8471d5ae102SDimitry Andric /// Remove the incoming block from the Phis in a basic block.
removePhis(MachineBasicBlock * BB,MachineBasicBlock * Incoming)8481d5ae102SDimitry Andric static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
8491d5ae102SDimitry Andric for (MachineInstr &MI : *BB) {
8501d5ae102SDimitry Andric if (!MI.isPHI())
8511d5ae102SDimitry Andric break;
8521d5ae102SDimitry Andric for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
8531d5ae102SDimitry Andric if (MI.getOperand(i + 1).getMBB() == Incoming) {
854145449b1SDimitry Andric MI.removeOperand(i + 1);
855145449b1SDimitry Andric MI.removeOperand(i);
8561d5ae102SDimitry Andric break;
8571d5ae102SDimitry Andric }
8581d5ae102SDimitry Andric }
8591d5ae102SDimitry Andric }
8601d5ae102SDimitry Andric
8611d5ae102SDimitry Andric /// Create branches from each prolog basic block to the appropriate epilog
8621d5ae102SDimitry Andric /// block. These edges are needed if the loop ends before reaching the
8631d5ae102SDimitry Andric /// kernel.
addBranches(MachineBasicBlock & PreheaderBB,MBBVectorTy & PrologBBs,MachineBasicBlock * KernelBB,MBBVectorTy & EpilogBBs,ValueMapTy * VRMap)8641d5ae102SDimitry Andric void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
8651d5ae102SDimitry Andric MBBVectorTy &PrologBBs,
8661d5ae102SDimitry Andric MachineBasicBlock *KernelBB,
8671d5ae102SDimitry Andric MBBVectorTy &EpilogBBs,
8681d5ae102SDimitry Andric ValueMapTy *VRMap) {
8691d5ae102SDimitry Andric assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
8701d5ae102SDimitry Andric MachineBasicBlock *LastPro = KernelBB;
8711d5ae102SDimitry Andric MachineBasicBlock *LastEpi = KernelBB;
8721d5ae102SDimitry Andric
8731d5ae102SDimitry Andric // Start from the blocks connected to the kernel and work "out"
8741d5ae102SDimitry Andric // to the first prolog and the last epilog blocks.
8751d5ae102SDimitry Andric SmallVector<MachineInstr *, 4> PrevInsts;
8761d5ae102SDimitry Andric unsigned MaxIter = PrologBBs.size() - 1;
8771d5ae102SDimitry Andric for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
8781d5ae102SDimitry Andric // Add branches to the prolog that go to the corresponding
8791d5ae102SDimitry Andric // epilog, and the fall-thru prolog/kernel block.
8801d5ae102SDimitry Andric MachineBasicBlock *Prolog = PrologBBs[j];
8811d5ae102SDimitry Andric MachineBasicBlock *Epilog = EpilogBBs[i];
8821d5ae102SDimitry Andric
8831d5ae102SDimitry Andric SmallVector<MachineOperand, 4> Cond;
884e3b55780SDimitry Andric std::optional<bool> StaticallyGreater =
8851d5ae102SDimitry Andric LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
8861d5ae102SDimitry Andric unsigned numAdded = 0;
887145449b1SDimitry Andric if (!StaticallyGreater) {
8881d5ae102SDimitry Andric Prolog->addSuccessor(Epilog);
8891d5ae102SDimitry Andric numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
8901d5ae102SDimitry Andric } else if (*StaticallyGreater == false) {
8911d5ae102SDimitry Andric Prolog->addSuccessor(Epilog);
8921d5ae102SDimitry Andric Prolog->removeSuccessor(LastPro);
8931d5ae102SDimitry Andric LastEpi->removeSuccessor(Epilog);
8941d5ae102SDimitry Andric numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
8951d5ae102SDimitry Andric removePhis(Epilog, LastEpi);
8961d5ae102SDimitry Andric // Remove the blocks that are no longer referenced.
8971d5ae102SDimitry Andric if (LastPro != LastEpi) {
8981d5ae102SDimitry Andric LastEpi->clear();
8991d5ae102SDimitry Andric LastEpi->eraseFromParent();
9001d5ae102SDimitry Andric }
9011d5ae102SDimitry Andric if (LastPro == KernelBB) {
9021d5ae102SDimitry Andric LoopInfo->disposed();
9031d5ae102SDimitry Andric NewKernel = nullptr;
9041d5ae102SDimitry Andric }
9051d5ae102SDimitry Andric LastPro->clear();
9061d5ae102SDimitry Andric LastPro->eraseFromParent();
9071d5ae102SDimitry Andric } else {
9081d5ae102SDimitry Andric numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
9091d5ae102SDimitry Andric removePhis(Epilog, Prolog);
9101d5ae102SDimitry Andric }
9111d5ae102SDimitry Andric LastPro = Prolog;
9121d5ae102SDimitry Andric LastEpi = Epilog;
9131d5ae102SDimitry Andric for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
9141d5ae102SDimitry Andric E = Prolog->instr_rend();
9151d5ae102SDimitry Andric I != E && numAdded > 0; ++I, --numAdded)
9161d5ae102SDimitry Andric updateInstruction(&*I, false, j, 0, VRMap);
9171d5ae102SDimitry Andric }
9181d5ae102SDimitry Andric
9191d5ae102SDimitry Andric if (NewKernel) {
9201d5ae102SDimitry Andric LoopInfo->setPreheader(PrologBBs[MaxIter]);
9211d5ae102SDimitry Andric LoopInfo->adjustTripCount(-(MaxIter + 1));
9221d5ae102SDimitry Andric }
9231d5ae102SDimitry Andric }
9241d5ae102SDimitry Andric
9251d5ae102SDimitry Andric /// Return true if we can compute the amount the instruction changes
9261d5ae102SDimitry Andric /// during each iteration. Set Delta to the amount of the change.
computeDelta(MachineInstr & MI,unsigned & Delta)9271d5ae102SDimitry Andric bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
9281d5ae102SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
9291d5ae102SDimitry Andric const MachineOperand *BaseOp;
9301d5ae102SDimitry Andric int64_t Offset;
931cfca06d7SDimitry Andric bool OffsetIsScalable;
932cfca06d7SDimitry Andric if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, OffsetIsScalable, TRI))
933cfca06d7SDimitry Andric return false;
934cfca06d7SDimitry Andric
935cfca06d7SDimitry Andric // FIXME: This algorithm assumes instructions have fixed-size offsets.
936cfca06d7SDimitry Andric if (OffsetIsScalable)
9371d5ae102SDimitry Andric return false;
9381d5ae102SDimitry Andric
9391d5ae102SDimitry Andric if (!BaseOp->isReg())
9401d5ae102SDimitry Andric return false;
9411d5ae102SDimitry Andric
9421d5ae102SDimitry Andric Register BaseReg = BaseOp->getReg();
9431d5ae102SDimitry Andric
9441d5ae102SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo();
9451d5ae102SDimitry Andric // Check if there is a Phi. If so, get the definition in the loop.
9461d5ae102SDimitry Andric MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
9471d5ae102SDimitry Andric if (BaseDef && BaseDef->isPHI()) {
9481d5ae102SDimitry Andric BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
9491d5ae102SDimitry Andric BaseDef = MRI.getVRegDef(BaseReg);
9501d5ae102SDimitry Andric }
9511d5ae102SDimitry Andric if (!BaseDef)
9521d5ae102SDimitry Andric return false;
9531d5ae102SDimitry Andric
9541d5ae102SDimitry Andric int D = 0;
9551d5ae102SDimitry Andric if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
9561d5ae102SDimitry Andric return false;
9571d5ae102SDimitry Andric
9581d5ae102SDimitry Andric Delta = D;
9591d5ae102SDimitry Andric return true;
9601d5ae102SDimitry Andric }
9611d5ae102SDimitry Andric
9621d5ae102SDimitry Andric /// Update the memory operand with a new offset when the pipeliner
9631d5ae102SDimitry Andric /// generates a new copy of the instruction that refers to a
9641d5ae102SDimitry Andric /// different memory location.
updateMemOperands(MachineInstr & NewMI,MachineInstr & OldMI,unsigned Num)9651d5ae102SDimitry Andric void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
9661d5ae102SDimitry Andric MachineInstr &OldMI,
9671d5ae102SDimitry Andric unsigned Num) {
9681d5ae102SDimitry Andric if (Num == 0)
9691d5ae102SDimitry Andric return;
9701d5ae102SDimitry Andric // If the instruction has memory operands, then adjust the offset
9711d5ae102SDimitry Andric // when the instruction appears in different stages.
9721d5ae102SDimitry Andric if (NewMI.memoperands_empty())
9731d5ae102SDimitry Andric return;
9741d5ae102SDimitry Andric SmallVector<MachineMemOperand *, 2> NewMMOs;
9751d5ae102SDimitry Andric for (MachineMemOperand *MMO : NewMI.memoperands()) {
9761d5ae102SDimitry Andric // TODO: Figure out whether isAtomic is really necessary (see D57601).
9771d5ae102SDimitry Andric if (MMO->isVolatile() || MMO->isAtomic() ||
9781d5ae102SDimitry Andric (MMO->isInvariant() && MMO->isDereferenceable()) ||
9791d5ae102SDimitry Andric (!MMO->getValue())) {
9801d5ae102SDimitry Andric NewMMOs.push_back(MMO);
9811d5ae102SDimitry Andric continue;
9821d5ae102SDimitry Andric }
9831d5ae102SDimitry Andric unsigned Delta;
9841d5ae102SDimitry Andric if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
9851d5ae102SDimitry Andric int64_t AdjOffset = Delta * Num;
9861d5ae102SDimitry Andric NewMMOs.push_back(
9871d5ae102SDimitry Andric MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
9881d5ae102SDimitry Andric } else {
989ac9a064cSDimitry Andric NewMMOs.push_back(MF.getMachineMemOperand(
990ac9a064cSDimitry Andric MMO, 0, LocationSize::beforeOrAfterPointer()));
9911d5ae102SDimitry Andric }
9921d5ae102SDimitry Andric }
9931d5ae102SDimitry Andric NewMI.setMemRefs(MF, NewMMOs);
9941d5ae102SDimitry Andric }
9951d5ae102SDimitry Andric
9961d5ae102SDimitry Andric /// Clone the instruction for the new pipelined loop and update the
9971d5ae102SDimitry Andric /// memory operands, if needed.
cloneInstr(MachineInstr * OldMI,unsigned CurStageNum,unsigned InstStageNum)9981d5ae102SDimitry Andric MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
9991d5ae102SDimitry Andric unsigned CurStageNum,
10001d5ae102SDimitry Andric unsigned InstStageNum) {
10011d5ae102SDimitry Andric MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
10021d5ae102SDimitry Andric updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
10031d5ae102SDimitry Andric return NewMI;
10041d5ae102SDimitry Andric }
10051d5ae102SDimitry Andric
10061d5ae102SDimitry Andric /// Clone the instruction for the new pipelined loop. If needed, this
10071d5ae102SDimitry Andric /// function updates the instruction using the values saved in the
10081d5ae102SDimitry Andric /// InstrChanges structure.
cloneAndChangeInstr(MachineInstr * OldMI,unsigned CurStageNum,unsigned InstStageNum)10091d5ae102SDimitry Andric MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
10101d5ae102SDimitry Andric MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
10111d5ae102SDimitry Andric MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
10121d5ae102SDimitry Andric auto It = InstrChanges.find(OldMI);
10131d5ae102SDimitry Andric if (It != InstrChanges.end()) {
10141d5ae102SDimitry Andric std::pair<unsigned, int64_t> RegAndOffset = It->second;
10151d5ae102SDimitry Andric unsigned BasePos, OffsetPos;
10161d5ae102SDimitry Andric if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
10171d5ae102SDimitry Andric return nullptr;
10181d5ae102SDimitry Andric int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
10191d5ae102SDimitry Andric MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
10201d5ae102SDimitry Andric if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
10211d5ae102SDimitry Andric NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
10221d5ae102SDimitry Andric NewMI->getOperand(OffsetPos).setImm(NewOffset);
10231d5ae102SDimitry Andric }
10241d5ae102SDimitry Andric updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
10251d5ae102SDimitry Andric return NewMI;
10261d5ae102SDimitry Andric }
10271d5ae102SDimitry Andric
10281d5ae102SDimitry Andric /// Update the machine instruction with new virtual registers. This
1029145449b1SDimitry Andric /// function may change the definitions and/or uses.
updateInstruction(MachineInstr * NewMI,bool LastDef,unsigned CurStageNum,unsigned InstrStageNum,ValueMapTy * VRMap)10301d5ae102SDimitry Andric void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
10311d5ae102SDimitry Andric bool LastDef,
10321d5ae102SDimitry Andric unsigned CurStageNum,
10331d5ae102SDimitry Andric unsigned InstrStageNum,
10341d5ae102SDimitry Andric ValueMapTy *VRMap) {
1035f65dcba8SDimitry Andric for (MachineOperand &MO : NewMI->operands()) {
1036e3b55780SDimitry Andric if (!MO.isReg() || !MO.getReg().isVirtual())
10371d5ae102SDimitry Andric continue;
10381d5ae102SDimitry Andric Register reg = MO.getReg();
10391d5ae102SDimitry Andric if (MO.isDef()) {
10401d5ae102SDimitry Andric // Create a new virtual register for the definition.
10411d5ae102SDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(reg);
10421d5ae102SDimitry Andric Register NewReg = MRI.createVirtualRegister(RC);
10431d5ae102SDimitry Andric MO.setReg(NewReg);
10441d5ae102SDimitry Andric VRMap[CurStageNum][reg] = NewReg;
10451d5ae102SDimitry Andric if (LastDef)
10461d5ae102SDimitry Andric replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
10471d5ae102SDimitry Andric } else if (MO.isUse()) {
10481d5ae102SDimitry Andric MachineInstr *Def = MRI.getVRegDef(reg);
10491d5ae102SDimitry Andric // Compute the stage that contains the last definition for instruction.
10501d5ae102SDimitry Andric int DefStageNum = Schedule.getStage(Def);
10511d5ae102SDimitry Andric unsigned StageNum = CurStageNum;
10521d5ae102SDimitry Andric if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
10531d5ae102SDimitry Andric // Compute the difference in stages between the defintion and the use.
10541d5ae102SDimitry Andric unsigned StageDiff = (InstrStageNum - DefStageNum);
10551d5ae102SDimitry Andric // Make an adjustment to get the last definition.
10561d5ae102SDimitry Andric StageNum -= StageDiff;
10571d5ae102SDimitry Andric }
10581d5ae102SDimitry Andric if (VRMap[StageNum].count(reg))
10591d5ae102SDimitry Andric MO.setReg(VRMap[StageNum][reg]);
10601d5ae102SDimitry Andric }
10611d5ae102SDimitry Andric }
10621d5ae102SDimitry Andric }
10631d5ae102SDimitry Andric
10641d5ae102SDimitry Andric /// Return the instruction in the loop that defines the register.
10651d5ae102SDimitry Andric /// If the definition is a Phi, then follow the Phi operand to
10661d5ae102SDimitry Andric /// the instruction in the loop.
findDefInLoop(unsigned Reg)10671d5ae102SDimitry Andric MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
10681d5ae102SDimitry Andric SmallPtrSet<MachineInstr *, 8> Visited;
10691d5ae102SDimitry Andric MachineInstr *Def = MRI.getVRegDef(Reg);
10701d5ae102SDimitry Andric while (Def->isPHI()) {
10711d5ae102SDimitry Andric if (!Visited.insert(Def).second)
10721d5ae102SDimitry Andric break;
10731d5ae102SDimitry Andric for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
10741d5ae102SDimitry Andric if (Def->getOperand(i + 1).getMBB() == BB) {
10751d5ae102SDimitry Andric Def = MRI.getVRegDef(Def->getOperand(i).getReg());
10761d5ae102SDimitry Andric break;
10771d5ae102SDimitry Andric }
10781d5ae102SDimitry Andric }
10791d5ae102SDimitry Andric return Def;
10801d5ae102SDimitry Andric }
10811d5ae102SDimitry Andric
10821d5ae102SDimitry Andric /// Return the new name for the value from the previous stage.
getPrevMapVal(unsigned StageNum,unsigned PhiStage,unsigned LoopVal,unsigned LoopStage,ValueMapTy * VRMap,MachineBasicBlock * BB)10831d5ae102SDimitry Andric unsigned ModuloScheduleExpander::getPrevMapVal(
10841d5ae102SDimitry Andric unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
10851d5ae102SDimitry Andric ValueMapTy *VRMap, MachineBasicBlock *BB) {
10861d5ae102SDimitry Andric unsigned PrevVal = 0;
10871d5ae102SDimitry Andric if (StageNum > PhiStage) {
10881d5ae102SDimitry Andric MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
10891d5ae102SDimitry Andric if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
10901d5ae102SDimitry Andric // The name is defined in the previous stage.
10911d5ae102SDimitry Andric PrevVal = VRMap[StageNum - 1][LoopVal];
10921d5ae102SDimitry Andric else if (VRMap[StageNum].count(LoopVal))
10931d5ae102SDimitry Andric // The previous name is defined in the current stage when the instruction
10941d5ae102SDimitry Andric // order is swapped.
10951d5ae102SDimitry Andric PrevVal = VRMap[StageNum][LoopVal];
10961d5ae102SDimitry Andric else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
10971d5ae102SDimitry Andric // The loop value hasn't yet been scheduled.
10981d5ae102SDimitry Andric PrevVal = LoopVal;
10991d5ae102SDimitry Andric else if (StageNum == PhiStage + 1)
11001d5ae102SDimitry Andric // The loop value is another phi, which has not been scheduled.
11011d5ae102SDimitry Andric PrevVal = getInitPhiReg(*LoopInst, BB);
11021d5ae102SDimitry Andric else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
11031d5ae102SDimitry Andric // The loop value is another phi, which has been scheduled.
11041d5ae102SDimitry Andric PrevVal =
11051d5ae102SDimitry Andric getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
11061d5ae102SDimitry Andric LoopStage, VRMap, BB);
11071d5ae102SDimitry Andric }
11081d5ae102SDimitry Andric return PrevVal;
11091d5ae102SDimitry Andric }
11101d5ae102SDimitry Andric
11111d5ae102SDimitry Andric /// Rewrite the Phi values in the specified block to use the mappings
11121d5ae102SDimitry Andric /// from the initial operand. Once the Phi is scheduled, we switch
11131d5ae102SDimitry Andric /// to using the loop value instead of the Phi value, so those names
11141d5ae102SDimitry Andric /// do not need to be rewritten.
rewritePhiValues(MachineBasicBlock * NewBB,unsigned StageNum,ValueMapTy * VRMap,InstrMapTy & InstrMap)11151d5ae102SDimitry Andric void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
11161d5ae102SDimitry Andric unsigned StageNum,
11171d5ae102SDimitry Andric ValueMapTy *VRMap,
11181d5ae102SDimitry Andric InstrMapTy &InstrMap) {
11191d5ae102SDimitry Andric for (auto &PHI : BB->phis()) {
11201d5ae102SDimitry Andric unsigned InitVal = 0;
11211d5ae102SDimitry Andric unsigned LoopVal = 0;
11221d5ae102SDimitry Andric getPhiRegs(PHI, BB, InitVal, LoopVal);
11231d5ae102SDimitry Andric Register PhiDef = PHI.getOperand(0).getReg();
11241d5ae102SDimitry Andric
11251d5ae102SDimitry Andric unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
11261d5ae102SDimitry Andric unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
11271d5ae102SDimitry Andric unsigned NumPhis = getStagesForPhi(PhiDef);
11281d5ae102SDimitry Andric if (NumPhis > StageNum)
11291d5ae102SDimitry Andric NumPhis = StageNum;
11301d5ae102SDimitry Andric for (unsigned np = 0; np <= NumPhis; ++np) {
11311d5ae102SDimitry Andric unsigned NewVal =
11321d5ae102SDimitry Andric getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
11331d5ae102SDimitry Andric if (!NewVal)
11341d5ae102SDimitry Andric NewVal = InitVal;
11351d5ae102SDimitry Andric rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
11361d5ae102SDimitry Andric NewVal);
11371d5ae102SDimitry Andric }
11381d5ae102SDimitry Andric }
11391d5ae102SDimitry Andric }
11401d5ae102SDimitry Andric
11411d5ae102SDimitry Andric /// Rewrite a previously scheduled instruction to use the register value
11421d5ae102SDimitry Andric /// from the new instruction. Make sure the instruction occurs in the
11431d5ae102SDimitry Andric /// basic block, and we don't change the uses in the new instruction.
rewriteScheduledInstr(MachineBasicBlock * BB,InstrMapTy & InstrMap,unsigned CurStageNum,unsigned PhiNum,MachineInstr * Phi,unsigned OldReg,unsigned NewReg,unsigned PrevReg)11441d5ae102SDimitry Andric void ModuloScheduleExpander::rewriteScheduledInstr(
11451d5ae102SDimitry Andric MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
11461d5ae102SDimitry Andric unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
11471d5ae102SDimitry Andric unsigned PrevReg) {
11481d5ae102SDimitry Andric bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
11491d5ae102SDimitry Andric int StagePhi = Schedule.getStage(Phi) + PhiNum;
11501d5ae102SDimitry Andric // Rewrite uses that have been scheduled already to use the new
11511d5ae102SDimitry Andric // Phi register.
1152c0981da4SDimitry Andric for (MachineOperand &UseOp :
1153c0981da4SDimitry Andric llvm::make_early_inc_range(MRI.use_operands(OldReg))) {
11541d5ae102SDimitry Andric MachineInstr *UseMI = UseOp.getParent();
11551d5ae102SDimitry Andric if (UseMI->getParent() != BB)
11561d5ae102SDimitry Andric continue;
11571d5ae102SDimitry Andric if (UseMI->isPHI()) {
11581d5ae102SDimitry Andric if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
11591d5ae102SDimitry Andric continue;
11601d5ae102SDimitry Andric if (getLoopPhiReg(*UseMI, BB) != OldReg)
11611d5ae102SDimitry Andric continue;
11621d5ae102SDimitry Andric }
11631d5ae102SDimitry Andric InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
11641d5ae102SDimitry Andric assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
11651d5ae102SDimitry Andric MachineInstr *OrigMI = OrigInstr->second;
11661d5ae102SDimitry Andric int StageSched = Schedule.getStage(OrigMI);
11671d5ae102SDimitry Andric int CycleSched = Schedule.getCycle(OrigMI);
11681d5ae102SDimitry Andric unsigned ReplaceReg = 0;
11691d5ae102SDimitry Andric // This is the stage for the scheduled instruction.
11701d5ae102SDimitry Andric if (StagePhi == StageSched && Phi->isPHI()) {
11711d5ae102SDimitry Andric int CyclePhi = Schedule.getCycle(Phi);
11721d5ae102SDimitry Andric if (PrevReg && InProlog)
11731d5ae102SDimitry Andric ReplaceReg = PrevReg;
11741d5ae102SDimitry Andric else if (PrevReg && !isLoopCarried(*Phi) &&
11751d5ae102SDimitry Andric (CyclePhi <= CycleSched || OrigMI->isPHI()))
11761d5ae102SDimitry Andric ReplaceReg = PrevReg;
11771d5ae102SDimitry Andric else
11781d5ae102SDimitry Andric ReplaceReg = NewReg;
11791d5ae102SDimitry Andric }
11801d5ae102SDimitry Andric // The scheduled instruction occurs before the scheduled Phi, and the
11811d5ae102SDimitry Andric // Phi is not loop carried.
11821d5ae102SDimitry Andric if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
11831d5ae102SDimitry Andric ReplaceReg = NewReg;
11841d5ae102SDimitry Andric if (StagePhi > StageSched && Phi->isPHI())
11851d5ae102SDimitry Andric ReplaceReg = NewReg;
11861d5ae102SDimitry Andric if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
11871d5ae102SDimitry Andric ReplaceReg = NewReg;
11881d5ae102SDimitry Andric if (ReplaceReg) {
1189145449b1SDimitry Andric const TargetRegisterClass *NRC =
11901d5ae102SDimitry Andric MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
1191145449b1SDimitry Andric if (NRC)
11921d5ae102SDimitry Andric UseOp.setReg(ReplaceReg);
1193145449b1SDimitry Andric else {
1194145449b1SDimitry Andric Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
1195145449b1SDimitry Andric BuildMI(*BB, UseMI, UseMI->getDebugLoc(), TII->get(TargetOpcode::COPY),
1196145449b1SDimitry Andric SplitReg)
1197145449b1SDimitry Andric .addReg(ReplaceReg);
1198145449b1SDimitry Andric UseOp.setReg(SplitReg);
1199145449b1SDimitry Andric }
12001d5ae102SDimitry Andric }
12011d5ae102SDimitry Andric }
12021d5ae102SDimitry Andric }
12031d5ae102SDimitry Andric
isLoopCarried(MachineInstr & Phi)12041d5ae102SDimitry Andric bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
12051d5ae102SDimitry Andric if (!Phi.isPHI())
12061d5ae102SDimitry Andric return false;
1207706b4fc4SDimitry Andric int DefCycle = Schedule.getCycle(&Phi);
12081d5ae102SDimitry Andric int DefStage = Schedule.getStage(&Phi);
12091d5ae102SDimitry Andric
12101d5ae102SDimitry Andric unsigned InitVal = 0;
12111d5ae102SDimitry Andric unsigned LoopVal = 0;
12121d5ae102SDimitry Andric getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
12131d5ae102SDimitry Andric MachineInstr *Use = MRI.getVRegDef(LoopVal);
12141d5ae102SDimitry Andric if (!Use || Use->isPHI())
12151d5ae102SDimitry Andric return true;
1216706b4fc4SDimitry Andric int LoopCycle = Schedule.getCycle(Use);
12171d5ae102SDimitry Andric int LoopStage = Schedule.getStage(Use);
12181d5ae102SDimitry Andric return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
12191d5ae102SDimitry Andric }
12201d5ae102SDimitry Andric
12211d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
12221d5ae102SDimitry Andric // PeelingModuloScheduleExpander implementation
12231d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
12241d5ae102SDimitry Andric // This is a reimplementation of ModuloScheduleExpander that works by creating
12251d5ae102SDimitry Andric // a fully correct steady-state kernel and peeling off the prolog and epilogs.
12261d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
12271d5ae102SDimitry Andric
12281d5ae102SDimitry Andric namespace {
12291d5ae102SDimitry Andric // Remove any dead phis in MBB. Dead phis either have only one block as input
12301d5ae102SDimitry Andric // (in which case they are the identity) or have no uses.
EliminateDeadPhis(MachineBasicBlock * MBB,MachineRegisterInfo & MRI,LiveIntervals * LIS,bool KeepSingleSrcPhi=false)12311d5ae102SDimitry Andric void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
1232706b4fc4SDimitry Andric LiveIntervals *LIS, bool KeepSingleSrcPhi = false) {
12331d5ae102SDimitry Andric bool Changed = true;
12341d5ae102SDimitry Andric while (Changed) {
12351d5ae102SDimitry Andric Changed = false;
1236c0981da4SDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(MBB->phis())) {
12371d5ae102SDimitry Andric assert(MI.isPHI());
12381d5ae102SDimitry Andric if (MRI.use_empty(MI.getOperand(0).getReg())) {
12391d5ae102SDimitry Andric if (LIS)
12401d5ae102SDimitry Andric LIS->RemoveMachineInstrFromMaps(MI);
12411d5ae102SDimitry Andric MI.eraseFromParent();
12421d5ae102SDimitry Andric Changed = true;
1243706b4fc4SDimitry Andric } else if (!KeepSingleSrcPhi && MI.getNumExplicitOperands() == 3) {
1244145449b1SDimitry Andric const TargetRegisterClass *ConstrainRegClass =
12451d5ae102SDimitry Andric MRI.constrainRegClass(MI.getOperand(1).getReg(),
12461d5ae102SDimitry Andric MRI.getRegClass(MI.getOperand(0).getReg()));
1247145449b1SDimitry Andric assert(ConstrainRegClass &&
1248145449b1SDimitry Andric "Expected a valid constrained register class!");
1249145449b1SDimitry Andric (void)ConstrainRegClass;
12501d5ae102SDimitry Andric MRI.replaceRegWith(MI.getOperand(0).getReg(),
12511d5ae102SDimitry Andric MI.getOperand(1).getReg());
12521d5ae102SDimitry Andric if (LIS)
12531d5ae102SDimitry Andric LIS->RemoveMachineInstrFromMaps(MI);
12541d5ae102SDimitry Andric MI.eraseFromParent();
12551d5ae102SDimitry Andric Changed = true;
12561d5ae102SDimitry Andric }
12571d5ae102SDimitry Andric }
12581d5ae102SDimitry Andric }
12591d5ae102SDimitry Andric }
12601d5ae102SDimitry Andric
12611d5ae102SDimitry Andric /// Rewrites the kernel block in-place to adhere to the given schedule.
12621d5ae102SDimitry Andric /// KernelRewriter holds all of the state required to perform the rewriting.
12631d5ae102SDimitry Andric class KernelRewriter {
12641d5ae102SDimitry Andric ModuloSchedule &S;
12651d5ae102SDimitry Andric MachineBasicBlock *BB;
12661d5ae102SDimitry Andric MachineBasicBlock *PreheaderBB, *ExitBB;
12671d5ae102SDimitry Andric MachineRegisterInfo &MRI;
12681d5ae102SDimitry Andric const TargetInstrInfo *TII;
12691d5ae102SDimitry Andric LiveIntervals *LIS;
12701d5ae102SDimitry Andric
12711d5ae102SDimitry Andric // Map from register class to canonical undef register for that class.
12721d5ae102SDimitry Andric DenseMap<const TargetRegisterClass *, Register> Undefs;
12731d5ae102SDimitry Andric // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
12741d5ae102SDimitry Andric // this map is only used when InitReg is non-undef.
12751d5ae102SDimitry Andric DenseMap<std::pair<unsigned, unsigned>, Register> Phis;
12761d5ae102SDimitry Andric // Map from LoopReg to phi register where the InitReg is undef.
12771d5ae102SDimitry Andric DenseMap<Register, Register> UndefPhis;
12781d5ae102SDimitry Andric
12791d5ae102SDimitry Andric // Reg is used by MI. Return the new register MI should use to adhere to the
12801d5ae102SDimitry Andric // schedule. Insert phis as necessary.
12811d5ae102SDimitry Andric Register remapUse(Register Reg, MachineInstr &MI);
12821d5ae102SDimitry Andric // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
12831d5ae102SDimitry Andric // If InitReg is not given it is chosen arbitrarily. It will either be undef
12841d5ae102SDimitry Andric // or will be chosen so as to share another phi.
1285e3b55780SDimitry Andric Register phi(Register LoopReg, std::optional<Register> InitReg = {},
12861d5ae102SDimitry Andric const TargetRegisterClass *RC = nullptr);
12871d5ae102SDimitry Andric // Create an undef register of the given register class.
12881d5ae102SDimitry Andric Register undef(const TargetRegisterClass *RC);
12891d5ae102SDimitry Andric
12901d5ae102SDimitry Andric public:
1291344a3780SDimitry Andric KernelRewriter(MachineLoop &L, ModuloSchedule &S, MachineBasicBlock *LoopBB,
12921d5ae102SDimitry Andric LiveIntervals *LIS = nullptr);
12931d5ae102SDimitry Andric void rewrite();
12941d5ae102SDimitry Andric };
12951d5ae102SDimitry Andric } // namespace
12961d5ae102SDimitry Andric
KernelRewriter(MachineLoop & L,ModuloSchedule & S,MachineBasicBlock * LoopBB,LiveIntervals * LIS)12971d5ae102SDimitry Andric KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
1298344a3780SDimitry Andric MachineBasicBlock *LoopBB, LiveIntervals *LIS)
1299344a3780SDimitry Andric : S(S), BB(LoopBB), PreheaderBB(L.getLoopPreheader()),
13001d5ae102SDimitry Andric ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
13011d5ae102SDimitry Andric TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
13021d5ae102SDimitry Andric PreheaderBB = *BB->pred_begin();
13031d5ae102SDimitry Andric if (PreheaderBB == BB)
13041d5ae102SDimitry Andric PreheaderBB = *std::next(BB->pred_begin());
13051d5ae102SDimitry Andric }
13061d5ae102SDimitry Andric
rewrite()13071d5ae102SDimitry Andric void KernelRewriter::rewrite() {
13081d5ae102SDimitry Andric // Rearrange the loop to be in schedule order. Note that the schedule may
13091d5ae102SDimitry Andric // contain instructions that are not owned by the loop block (InstrChanges and
13101d5ae102SDimitry Andric // friends), so we gracefully handle unowned instructions and delete any
13111d5ae102SDimitry Andric // instructions that weren't in the schedule.
13121d5ae102SDimitry Andric auto InsertPt = BB->getFirstTerminator();
13131d5ae102SDimitry Andric MachineInstr *FirstMI = nullptr;
13141d5ae102SDimitry Andric for (MachineInstr *MI : S.getInstructions()) {
13151d5ae102SDimitry Andric if (MI->isPHI())
13161d5ae102SDimitry Andric continue;
13171d5ae102SDimitry Andric if (MI->getParent())
13181d5ae102SDimitry Andric MI->removeFromParent();
13191d5ae102SDimitry Andric BB->insert(InsertPt, MI);
13201d5ae102SDimitry Andric if (!FirstMI)
13211d5ae102SDimitry Andric FirstMI = MI;
13221d5ae102SDimitry Andric }
13231d5ae102SDimitry Andric assert(FirstMI && "Failed to find first MI in schedule");
13241d5ae102SDimitry Andric
13251d5ae102SDimitry Andric // At this point all of the scheduled instructions are between FirstMI
13261d5ae102SDimitry Andric // and the end of the block. Kill from the first non-phi to FirstMI.
13271d5ae102SDimitry Andric for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
13281d5ae102SDimitry Andric if (LIS)
13291d5ae102SDimitry Andric LIS->RemoveMachineInstrFromMaps(*I);
13301d5ae102SDimitry Andric (I++)->eraseFromParent();
13311d5ae102SDimitry Andric }
13321d5ae102SDimitry Andric
13331d5ae102SDimitry Andric // Now remap every instruction in the loop.
13341d5ae102SDimitry Andric for (MachineInstr &MI : *BB) {
13351d5ae102SDimitry Andric if (MI.isPHI() || MI.isTerminator())
13361d5ae102SDimitry Andric continue;
13371d5ae102SDimitry Andric for (MachineOperand &MO : MI.uses()) {
13381d5ae102SDimitry Andric if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
13391d5ae102SDimitry Andric continue;
13401d5ae102SDimitry Andric Register Reg = remapUse(MO.getReg(), MI);
13411d5ae102SDimitry Andric MO.setReg(Reg);
13421d5ae102SDimitry Andric }
13431d5ae102SDimitry Andric }
13441d5ae102SDimitry Andric EliminateDeadPhis(BB, MRI, LIS);
13451d5ae102SDimitry Andric
13461d5ae102SDimitry Andric // Ensure a phi exists for all instructions that are either referenced by
13471d5ae102SDimitry Andric // an illegal phi or by an instruction outside the loop. This allows us to
13481d5ae102SDimitry Andric // treat remaps of these values the same as "normal" values that come from
13491d5ae102SDimitry Andric // loop-carried phis.
13501d5ae102SDimitry Andric for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
13511d5ae102SDimitry Andric if (MI->isPHI()) {
13521d5ae102SDimitry Andric Register R = MI->getOperand(0).getReg();
13531d5ae102SDimitry Andric phi(R);
13541d5ae102SDimitry Andric continue;
13551d5ae102SDimitry Andric }
13561d5ae102SDimitry Andric
13571d5ae102SDimitry Andric for (MachineOperand &Def : MI->defs()) {
13581d5ae102SDimitry Andric for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
13591d5ae102SDimitry Andric if (MI.getParent() != BB) {
13601d5ae102SDimitry Andric phi(Def.getReg());
13611d5ae102SDimitry Andric break;
13621d5ae102SDimitry Andric }
13631d5ae102SDimitry Andric }
13641d5ae102SDimitry Andric }
13651d5ae102SDimitry Andric }
13661d5ae102SDimitry Andric }
13671d5ae102SDimitry Andric
remapUse(Register Reg,MachineInstr & MI)13681d5ae102SDimitry Andric Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
13691d5ae102SDimitry Andric MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
13701d5ae102SDimitry Andric if (!Producer)
13711d5ae102SDimitry Andric return Reg;
13721d5ae102SDimitry Andric
13731d5ae102SDimitry Andric int ConsumerStage = S.getStage(&MI);
13741d5ae102SDimitry Andric if (!Producer->isPHI()) {
13751d5ae102SDimitry Andric // Non-phi producers are simple to remap. Insert as many phis as the
13761d5ae102SDimitry Andric // difference between the consumer and producer stages.
13771d5ae102SDimitry Andric if (Producer->getParent() != BB)
13781d5ae102SDimitry Andric // Producer was not inside the loop. Use the register as-is.
13791d5ae102SDimitry Andric return Reg;
13801d5ae102SDimitry Andric int ProducerStage = S.getStage(Producer);
13811d5ae102SDimitry Andric assert(ConsumerStage != -1 &&
13821d5ae102SDimitry Andric "In-loop consumer should always be scheduled!");
13831d5ae102SDimitry Andric assert(ConsumerStage >= ProducerStage);
13841d5ae102SDimitry Andric unsigned StageDiff = ConsumerStage - ProducerStage;
13851d5ae102SDimitry Andric
13861d5ae102SDimitry Andric for (unsigned I = 0; I < StageDiff; ++I)
13871d5ae102SDimitry Andric Reg = phi(Reg);
13881d5ae102SDimitry Andric return Reg;
13891d5ae102SDimitry Andric }
13901d5ae102SDimitry Andric
13911d5ae102SDimitry Andric // First, dive through the phi chain to find the defaults for the generated
13921d5ae102SDimitry Andric // phis.
1393e3b55780SDimitry Andric SmallVector<std::optional<Register>, 4> Defaults;
13941d5ae102SDimitry Andric Register LoopReg = Reg;
13951d5ae102SDimitry Andric auto LoopProducer = Producer;
13961d5ae102SDimitry Andric while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
13971d5ae102SDimitry Andric LoopReg = getLoopPhiReg(*LoopProducer, BB);
13981d5ae102SDimitry Andric Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
13991d5ae102SDimitry Andric LoopProducer = MRI.getUniqueVRegDef(LoopReg);
14001d5ae102SDimitry Andric assert(LoopProducer);
14011d5ae102SDimitry Andric }
14021d5ae102SDimitry Andric int LoopProducerStage = S.getStage(LoopProducer);
14031d5ae102SDimitry Andric
1404e3b55780SDimitry Andric std::optional<Register> IllegalPhiDefault;
14051d5ae102SDimitry Andric
14061d5ae102SDimitry Andric if (LoopProducerStage == -1) {
14071d5ae102SDimitry Andric // Do nothing.
14081d5ae102SDimitry Andric } else if (LoopProducerStage > ConsumerStage) {
14091d5ae102SDimitry Andric // This schedule is only representable if ProducerStage == ConsumerStage+1.
14101d5ae102SDimitry Andric // In addition, Consumer's cycle must be scheduled after Producer in the
14111d5ae102SDimitry Andric // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
14121d5ae102SDimitry Andric // functions.
14131d5ae102SDimitry Andric #ifndef NDEBUG // Silence unused variables in non-asserts mode.
14141d5ae102SDimitry Andric int LoopProducerCycle = S.getCycle(LoopProducer);
14151d5ae102SDimitry Andric int ConsumerCycle = S.getCycle(&MI);
14161d5ae102SDimitry Andric #endif
14171d5ae102SDimitry Andric assert(LoopProducerCycle <= ConsumerCycle);
14181d5ae102SDimitry Andric assert(LoopProducerStage == ConsumerStage + 1);
14191d5ae102SDimitry Andric // Peel off the first phi from Defaults and insert a phi between producer
14201d5ae102SDimitry Andric // and consumer. This phi will not be at the front of the block so we
14211d5ae102SDimitry Andric // consider it illegal. It will only exist during the rewrite process; it
14221d5ae102SDimitry Andric // needs to exist while we peel off prologs because these could take the
14231d5ae102SDimitry Andric // default value. After that we can replace all uses with the loop producer
14241d5ae102SDimitry Andric // value.
14251d5ae102SDimitry Andric IllegalPhiDefault = Defaults.front();
14261d5ae102SDimitry Andric Defaults.erase(Defaults.begin());
14271d5ae102SDimitry Andric } else {
14281d5ae102SDimitry Andric assert(ConsumerStage >= LoopProducerStage);
14291d5ae102SDimitry Andric int StageDiff = ConsumerStage - LoopProducerStage;
14301d5ae102SDimitry Andric if (StageDiff > 0) {
14311d5ae102SDimitry Andric LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
14321d5ae102SDimitry Andric << " to " << (Defaults.size() + StageDiff) << "\n");
14331d5ae102SDimitry Andric // If we need more phis than we have defaults for, pad out with undefs for
14341d5ae102SDimitry Andric // the earliest phis, which are at the end of the defaults chain (the
14351d5ae102SDimitry Andric // chain is in reverse order).
1436e3b55780SDimitry Andric Defaults.resize(Defaults.size() + StageDiff,
1437e3b55780SDimitry Andric Defaults.empty() ? std::optional<Register>()
14381d5ae102SDimitry Andric : Defaults.back());
14391d5ae102SDimitry Andric }
14401d5ae102SDimitry Andric }
14411d5ae102SDimitry Andric
14421d5ae102SDimitry Andric // Now we know the number of stages to jump back, insert the phi chain.
14431d5ae102SDimitry Andric auto DefaultI = Defaults.rbegin();
14441d5ae102SDimitry Andric while (DefaultI != Defaults.rend())
14451d5ae102SDimitry Andric LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
14461d5ae102SDimitry Andric
1447145449b1SDimitry Andric if (IllegalPhiDefault) {
14481d5ae102SDimitry Andric // The consumer optionally consumes LoopProducer in the same iteration
14491d5ae102SDimitry Andric // (because the producer is scheduled at an earlier cycle than the consumer)
14501d5ae102SDimitry Andric // or the initial value. To facilitate this we create an illegal block here
14511d5ae102SDimitry Andric // by embedding a phi in the middle of the block. We will fix this up
14521d5ae102SDimitry Andric // immediately prior to pruning.
14531d5ae102SDimitry Andric auto RC = MRI.getRegClass(Reg);
14541d5ae102SDimitry Andric Register R = MRI.createVirtualRegister(RC);
1455cfca06d7SDimitry Andric MachineInstr *IllegalPhi =
14561d5ae102SDimitry Andric BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
1457145449b1SDimitry Andric .addReg(*IllegalPhiDefault)
14581d5ae102SDimitry Andric .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
14591d5ae102SDimitry Andric .addReg(LoopReg)
14601d5ae102SDimitry Andric .addMBB(BB); // Block choice is arbitrary and has no effect.
1461cfca06d7SDimitry Andric // Illegal phi should belong to the producer stage so that it can be
1462cfca06d7SDimitry Andric // filtered correctly during peeling.
1463cfca06d7SDimitry Andric S.setStage(IllegalPhi, LoopProducerStage);
14641d5ae102SDimitry Andric return R;
14651d5ae102SDimitry Andric }
14661d5ae102SDimitry Andric
14671d5ae102SDimitry Andric return LoopReg;
14681d5ae102SDimitry Andric }
14691d5ae102SDimitry Andric
phi(Register LoopReg,std::optional<Register> InitReg,const TargetRegisterClass * RC)1470e3b55780SDimitry Andric Register KernelRewriter::phi(Register LoopReg, std::optional<Register> InitReg,
14711d5ae102SDimitry Andric const TargetRegisterClass *RC) {
14721d5ae102SDimitry Andric // If the init register is not undef, try and find an existing phi.
1473145449b1SDimitry Andric if (InitReg) {
1474e3b55780SDimitry Andric auto I = Phis.find({LoopReg, *InitReg});
14751d5ae102SDimitry Andric if (I != Phis.end())
14761d5ae102SDimitry Andric return I->second;
14771d5ae102SDimitry Andric } else {
14781d5ae102SDimitry Andric for (auto &KV : Phis) {
14791d5ae102SDimitry Andric if (KV.first.first == LoopReg)
14801d5ae102SDimitry Andric return KV.second;
14811d5ae102SDimitry Andric }
14821d5ae102SDimitry Andric }
14831d5ae102SDimitry Andric
14841d5ae102SDimitry Andric // InitReg is either undef or no existing phi takes InitReg as input. Try and
14851d5ae102SDimitry Andric // find a phi that takes undef as input.
14861d5ae102SDimitry Andric auto I = UndefPhis.find(LoopReg);
14871d5ae102SDimitry Andric if (I != UndefPhis.end()) {
14881d5ae102SDimitry Andric Register R = I->second;
1489145449b1SDimitry Andric if (!InitReg)
14901d5ae102SDimitry Andric // Found a phi taking undef as input, and this input is undef so return
14911d5ae102SDimitry Andric // without any more changes.
14921d5ae102SDimitry Andric return R;
14931d5ae102SDimitry Andric // Found a phi taking undef as input, so rewrite it to take InitReg.
14941d5ae102SDimitry Andric MachineInstr *MI = MRI.getVRegDef(R);
1495e3b55780SDimitry Andric MI->getOperand(1).setReg(*InitReg);
1496e3b55780SDimitry Andric Phis.insert({{LoopReg, *InitReg}, R});
1497145449b1SDimitry Andric const TargetRegisterClass *ConstrainRegClass =
1498e3b55780SDimitry Andric MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1499145449b1SDimitry Andric assert(ConstrainRegClass && "Expected a valid constrained register class!");
1500145449b1SDimitry Andric (void)ConstrainRegClass;
15011d5ae102SDimitry Andric UndefPhis.erase(I);
15021d5ae102SDimitry Andric return R;
15031d5ae102SDimitry Andric }
15041d5ae102SDimitry Andric
15051d5ae102SDimitry Andric // Failed to find any existing phi to reuse, so create a new one.
15061d5ae102SDimitry Andric if (!RC)
15071d5ae102SDimitry Andric RC = MRI.getRegClass(LoopReg);
15081d5ae102SDimitry Andric Register R = MRI.createVirtualRegister(RC);
1509145449b1SDimitry Andric if (InitReg) {
1510145449b1SDimitry Andric const TargetRegisterClass *ConstrainRegClass =
15111d5ae102SDimitry Andric MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
1512145449b1SDimitry Andric assert(ConstrainRegClass && "Expected a valid constrained register class!");
1513145449b1SDimitry Andric (void)ConstrainRegClass;
1514145449b1SDimitry Andric }
15151d5ae102SDimitry Andric BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
1516145449b1SDimitry Andric .addReg(InitReg ? *InitReg : undef(RC))
15171d5ae102SDimitry Andric .addMBB(PreheaderBB)
15181d5ae102SDimitry Andric .addReg(LoopReg)
15191d5ae102SDimitry Andric .addMBB(BB);
1520145449b1SDimitry Andric if (!InitReg)
15211d5ae102SDimitry Andric UndefPhis[LoopReg] = R;
15221d5ae102SDimitry Andric else
15231d5ae102SDimitry Andric Phis[{LoopReg, *InitReg}] = R;
15241d5ae102SDimitry Andric return R;
15251d5ae102SDimitry Andric }
15261d5ae102SDimitry Andric
undef(const TargetRegisterClass * RC)15271d5ae102SDimitry Andric Register KernelRewriter::undef(const TargetRegisterClass *RC) {
15281d5ae102SDimitry Andric Register &R = Undefs[RC];
15291d5ae102SDimitry Andric if (R == 0) {
15301d5ae102SDimitry Andric // Create an IMPLICIT_DEF that defines this register if we need it.
15311d5ae102SDimitry Andric // All uses of this should be removed by the time we have finished unrolling
15321d5ae102SDimitry Andric // prologs and epilogs.
15331d5ae102SDimitry Andric R = MRI.createVirtualRegister(RC);
15341d5ae102SDimitry Andric auto *InsertBB = &PreheaderBB->getParent()->front();
15351d5ae102SDimitry Andric BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
15361d5ae102SDimitry Andric TII->get(TargetOpcode::IMPLICIT_DEF), R);
15371d5ae102SDimitry Andric }
15381d5ae102SDimitry Andric return R;
15391d5ae102SDimitry Andric }
15401d5ae102SDimitry Andric
15411d5ae102SDimitry Andric namespace {
15421d5ae102SDimitry Andric /// Describes an operand in the kernel of a pipelined loop. Characteristics of
15431d5ae102SDimitry Andric /// the operand are discovered, such as how many in-loop PHIs it has to jump
15441d5ae102SDimitry Andric /// through and defaults for these phis.
15451d5ae102SDimitry Andric class KernelOperandInfo {
15461d5ae102SDimitry Andric MachineBasicBlock *BB;
15471d5ae102SDimitry Andric MachineRegisterInfo &MRI;
15481d5ae102SDimitry Andric SmallVector<Register, 4> PhiDefaults;
15491d5ae102SDimitry Andric MachineOperand *Source;
15501d5ae102SDimitry Andric MachineOperand *Target;
15511d5ae102SDimitry Andric
15521d5ae102SDimitry Andric public:
KernelOperandInfo(MachineOperand * MO,MachineRegisterInfo & MRI,const SmallPtrSetImpl<MachineInstr * > & IllegalPhis)15531d5ae102SDimitry Andric KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
15541d5ae102SDimitry Andric const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
15551d5ae102SDimitry Andric : MRI(MRI) {
15561d5ae102SDimitry Andric Source = MO;
15571d5ae102SDimitry Andric BB = MO->getParent()->getParent();
15581d5ae102SDimitry Andric while (isRegInLoop(MO)) {
15591d5ae102SDimitry Andric MachineInstr *MI = MRI.getVRegDef(MO->getReg());
15601d5ae102SDimitry Andric if (MI->isFullCopy()) {
15611d5ae102SDimitry Andric MO = &MI->getOperand(1);
15621d5ae102SDimitry Andric continue;
15631d5ae102SDimitry Andric }
15641d5ae102SDimitry Andric if (!MI->isPHI())
15651d5ae102SDimitry Andric break;
15661d5ae102SDimitry Andric // If this is an illegal phi, don't count it in distance.
15671d5ae102SDimitry Andric if (IllegalPhis.count(MI)) {
15681d5ae102SDimitry Andric MO = &MI->getOperand(3);
15691d5ae102SDimitry Andric continue;
15701d5ae102SDimitry Andric }
15711d5ae102SDimitry Andric
15721d5ae102SDimitry Andric Register Default = getInitPhiReg(*MI, BB);
15731d5ae102SDimitry Andric MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
15741d5ae102SDimitry Andric : &MI->getOperand(3);
15751d5ae102SDimitry Andric PhiDefaults.push_back(Default);
15761d5ae102SDimitry Andric }
15771d5ae102SDimitry Andric Target = MO;
15781d5ae102SDimitry Andric }
15791d5ae102SDimitry Andric
operator ==(const KernelOperandInfo & Other) const15801d5ae102SDimitry Andric bool operator==(const KernelOperandInfo &Other) const {
15811d5ae102SDimitry Andric return PhiDefaults.size() == Other.PhiDefaults.size();
15821d5ae102SDimitry Andric }
15831d5ae102SDimitry Andric
print(raw_ostream & OS) const15841d5ae102SDimitry Andric void print(raw_ostream &OS) const {
15851d5ae102SDimitry Andric OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
15861d5ae102SDimitry Andric << *Source->getParent();
15871d5ae102SDimitry Andric }
15881d5ae102SDimitry Andric
15891d5ae102SDimitry Andric private:
isRegInLoop(MachineOperand * MO)15901d5ae102SDimitry Andric bool isRegInLoop(MachineOperand *MO) {
15911d5ae102SDimitry Andric return MO->isReg() && MO->getReg().isVirtual() &&
15921d5ae102SDimitry Andric MRI.getVRegDef(MO->getReg())->getParent() == BB;
15931d5ae102SDimitry Andric }
15941d5ae102SDimitry Andric };
15951d5ae102SDimitry Andric } // namespace
15961d5ae102SDimitry Andric
15971d5ae102SDimitry Andric MachineBasicBlock *
peelKernel(LoopPeelDirection LPD)15981d5ae102SDimitry Andric PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
15991d5ae102SDimitry Andric MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
16001d5ae102SDimitry Andric if (LPD == LPD_Front)
16011d5ae102SDimitry Andric PeeledFront.push_back(NewBB);
16021d5ae102SDimitry Andric else
16031d5ae102SDimitry Andric PeeledBack.push_front(NewBB);
16041d5ae102SDimitry Andric for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
16051d5ae102SDimitry Andric ++I, ++NI) {
16061d5ae102SDimitry Andric CanonicalMIs[&*I] = &*I;
16071d5ae102SDimitry Andric CanonicalMIs[&*NI] = &*I;
16081d5ae102SDimitry Andric BlockMIs[{NewBB, &*I}] = &*NI;
16091d5ae102SDimitry Andric BlockMIs[{BB, &*I}] = &*I;
16101d5ae102SDimitry Andric }
16111d5ae102SDimitry Andric return NewBB;
16121d5ae102SDimitry Andric }
16131d5ae102SDimitry Andric
filterInstructions(MachineBasicBlock * MB,int MinStage)1614706b4fc4SDimitry Andric void PeelingModuloScheduleExpander::filterInstructions(MachineBasicBlock *MB,
1615706b4fc4SDimitry Andric int MinStage) {
1616706b4fc4SDimitry Andric for (auto I = MB->getFirstInstrTerminator()->getReverseIterator();
1617706b4fc4SDimitry Andric I != std::next(MB->getFirstNonPHI()->getReverseIterator());) {
1618706b4fc4SDimitry Andric MachineInstr *MI = &*I++;
1619706b4fc4SDimitry Andric int Stage = getStage(MI);
1620706b4fc4SDimitry Andric if (Stage == -1 || Stage >= MinStage)
1621706b4fc4SDimitry Andric continue;
1622706b4fc4SDimitry Andric
1623706b4fc4SDimitry Andric for (MachineOperand &DefMO : MI->defs()) {
1624706b4fc4SDimitry Andric SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
1625706b4fc4SDimitry Andric for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
1626706b4fc4SDimitry Andric // Only PHIs can use values from this block by construction.
1627706b4fc4SDimitry Andric // Match with the equivalent PHI in B.
1628706b4fc4SDimitry Andric assert(UseMI.isPHI());
1629706b4fc4SDimitry Andric Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
1630706b4fc4SDimitry Andric MI->getParent());
1631706b4fc4SDimitry Andric Subs.emplace_back(&UseMI, Reg);
1632706b4fc4SDimitry Andric }
1633706b4fc4SDimitry Andric for (auto &Sub : Subs)
1634706b4fc4SDimitry Andric Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
1635706b4fc4SDimitry Andric *MRI.getTargetRegisterInfo());
1636706b4fc4SDimitry Andric }
1637706b4fc4SDimitry Andric if (LIS)
1638706b4fc4SDimitry Andric LIS->RemoveMachineInstrFromMaps(*MI);
1639706b4fc4SDimitry Andric MI->eraseFromParent();
1640706b4fc4SDimitry Andric }
1641706b4fc4SDimitry Andric }
1642706b4fc4SDimitry Andric
moveStageBetweenBlocks(MachineBasicBlock * DestBB,MachineBasicBlock * SourceBB,unsigned Stage)1643706b4fc4SDimitry Andric void PeelingModuloScheduleExpander::moveStageBetweenBlocks(
1644706b4fc4SDimitry Andric MachineBasicBlock *DestBB, MachineBasicBlock *SourceBB, unsigned Stage) {
1645706b4fc4SDimitry Andric auto InsertPt = DestBB->getFirstNonPHI();
1646706b4fc4SDimitry Andric DenseMap<Register, Register> Remaps;
1647c0981da4SDimitry Andric for (MachineInstr &MI : llvm::make_early_inc_range(
1648c0981da4SDimitry Andric llvm::make_range(SourceBB->getFirstNonPHI(), SourceBB->end()))) {
1649c0981da4SDimitry Andric if (MI.isPHI()) {
1650706b4fc4SDimitry Andric // This is an illegal PHI. If we move any instructions using an illegal
1651cfca06d7SDimitry Andric // PHI, we need to create a legal Phi.
1652c0981da4SDimitry Andric if (getStage(&MI) != Stage) {
1653cfca06d7SDimitry Andric // The legal Phi is not necessary if the illegal phi's stage
1654cfca06d7SDimitry Andric // is being moved.
1655c0981da4SDimitry Andric Register PhiR = MI.getOperand(0).getReg();
1656706b4fc4SDimitry Andric auto RC = MRI.getRegClass(PhiR);
1657706b4fc4SDimitry Andric Register NR = MRI.createVirtualRegister(RC);
1658cfca06d7SDimitry Andric MachineInstr *NI = BuildMI(*DestBB, DestBB->getFirstNonPHI(),
1659cfca06d7SDimitry Andric DebugLoc(), TII->get(TargetOpcode::PHI), NR)
1660706b4fc4SDimitry Andric .addReg(PhiR)
1661706b4fc4SDimitry Andric .addMBB(SourceBB);
1662c0981da4SDimitry Andric BlockMIs[{DestBB, CanonicalMIs[&MI]}] = NI;
1663c0981da4SDimitry Andric CanonicalMIs[NI] = CanonicalMIs[&MI];
1664706b4fc4SDimitry Andric Remaps[PhiR] = NR;
1665cfca06d7SDimitry Andric }
1666706b4fc4SDimitry Andric }
1667c0981da4SDimitry Andric if (getStage(&MI) != Stage)
1668706b4fc4SDimitry Andric continue;
1669c0981da4SDimitry Andric MI.removeFromParent();
1670c0981da4SDimitry Andric DestBB->insert(InsertPt, &MI);
1671c0981da4SDimitry Andric auto *KernelMI = CanonicalMIs[&MI];
1672c0981da4SDimitry Andric BlockMIs[{DestBB, KernelMI}] = &MI;
1673706b4fc4SDimitry Andric BlockMIs.erase({SourceBB, KernelMI});
1674706b4fc4SDimitry Andric }
1675706b4fc4SDimitry Andric SmallVector<MachineInstr *, 4> PhiToDelete;
1676706b4fc4SDimitry Andric for (MachineInstr &MI : DestBB->phis()) {
1677706b4fc4SDimitry Andric assert(MI.getNumOperands() == 3);
1678706b4fc4SDimitry Andric MachineInstr *Def = MRI.getVRegDef(MI.getOperand(1).getReg());
1679706b4fc4SDimitry Andric // If the instruction referenced by the phi is moved inside the block
1680706b4fc4SDimitry Andric // we don't need the phi anymore.
1681706b4fc4SDimitry Andric if (getStage(Def) == Stage) {
1682706b4fc4SDimitry Andric Register PhiReg = MI.getOperand(0).getReg();
1683ac9a064cSDimitry Andric assert(Def->findRegisterDefOperandIdx(MI.getOperand(1).getReg(),
1684ac9a064cSDimitry Andric /*TRI=*/nullptr) != -1);
1685cfca06d7SDimitry Andric MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
1686706b4fc4SDimitry Andric MI.getOperand(0).setReg(PhiReg);
1687706b4fc4SDimitry Andric PhiToDelete.push_back(&MI);
1688706b4fc4SDimitry Andric }
1689706b4fc4SDimitry Andric }
1690706b4fc4SDimitry Andric for (auto *P : PhiToDelete)
1691706b4fc4SDimitry Andric P->eraseFromParent();
1692706b4fc4SDimitry Andric InsertPt = DestBB->getFirstNonPHI();
1693706b4fc4SDimitry Andric // Helper to clone Phi instructions into the destination block. We clone Phi
1694706b4fc4SDimitry Andric // greedily to avoid combinatorial explosion of Phi instructions.
1695706b4fc4SDimitry Andric auto clonePhi = [&](MachineInstr *Phi) {
1696706b4fc4SDimitry Andric MachineInstr *NewMI = MF.CloneMachineInstr(Phi);
1697706b4fc4SDimitry Andric DestBB->insert(InsertPt, NewMI);
1698706b4fc4SDimitry Andric Register OrigR = Phi->getOperand(0).getReg();
1699706b4fc4SDimitry Andric Register R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
1700706b4fc4SDimitry Andric NewMI->getOperand(0).setReg(R);
1701706b4fc4SDimitry Andric NewMI->getOperand(1).setReg(OrigR);
1702706b4fc4SDimitry Andric NewMI->getOperand(2).setMBB(*DestBB->pred_begin());
1703706b4fc4SDimitry Andric Remaps[OrigR] = R;
1704706b4fc4SDimitry Andric CanonicalMIs[NewMI] = CanonicalMIs[Phi];
1705706b4fc4SDimitry Andric BlockMIs[{DestBB, CanonicalMIs[Phi]}] = NewMI;
1706706b4fc4SDimitry Andric PhiNodeLoopIteration[NewMI] = PhiNodeLoopIteration[Phi];
1707706b4fc4SDimitry Andric return R;
1708706b4fc4SDimitry Andric };
1709706b4fc4SDimitry Andric for (auto I = DestBB->getFirstNonPHI(); I != DestBB->end(); ++I) {
1710706b4fc4SDimitry Andric for (MachineOperand &MO : I->uses()) {
1711706b4fc4SDimitry Andric if (!MO.isReg())
1712706b4fc4SDimitry Andric continue;
1713706b4fc4SDimitry Andric if (Remaps.count(MO.getReg()))
1714706b4fc4SDimitry Andric MO.setReg(Remaps[MO.getReg()]);
1715706b4fc4SDimitry Andric else {
1716706b4fc4SDimitry Andric // If we are using a phi from the source block we need to add a new phi
1717706b4fc4SDimitry Andric // pointing to the old one.
1718706b4fc4SDimitry Andric MachineInstr *Use = MRI.getUniqueVRegDef(MO.getReg());
1719706b4fc4SDimitry Andric if (Use && Use->isPHI() && Use->getParent() == SourceBB) {
1720706b4fc4SDimitry Andric Register R = clonePhi(Use);
1721706b4fc4SDimitry Andric MO.setReg(R);
1722706b4fc4SDimitry Andric }
1723706b4fc4SDimitry Andric }
1724706b4fc4SDimitry Andric }
1725706b4fc4SDimitry Andric }
1726706b4fc4SDimitry Andric }
1727706b4fc4SDimitry Andric
1728706b4fc4SDimitry Andric Register
getPhiCanonicalReg(MachineInstr * CanonicalPhi,MachineInstr * Phi)1729706b4fc4SDimitry Andric PeelingModuloScheduleExpander::getPhiCanonicalReg(MachineInstr *CanonicalPhi,
1730706b4fc4SDimitry Andric MachineInstr *Phi) {
1731706b4fc4SDimitry Andric unsigned distance = PhiNodeLoopIteration[Phi];
1732706b4fc4SDimitry Andric MachineInstr *CanonicalUse = CanonicalPhi;
1733cfca06d7SDimitry Andric Register CanonicalUseReg = CanonicalUse->getOperand(0).getReg();
1734706b4fc4SDimitry Andric for (unsigned I = 0; I < distance; ++I) {
1735706b4fc4SDimitry Andric assert(CanonicalUse->isPHI());
1736706b4fc4SDimitry Andric assert(CanonicalUse->getNumOperands() == 5);
1737706b4fc4SDimitry Andric unsigned LoopRegIdx = 3, InitRegIdx = 1;
1738706b4fc4SDimitry Andric if (CanonicalUse->getOperand(2).getMBB() == CanonicalUse->getParent())
1739706b4fc4SDimitry Andric std::swap(LoopRegIdx, InitRegIdx);
1740cfca06d7SDimitry Andric CanonicalUseReg = CanonicalUse->getOperand(LoopRegIdx).getReg();
1741cfca06d7SDimitry Andric CanonicalUse = MRI.getVRegDef(CanonicalUseReg);
1742706b4fc4SDimitry Andric }
1743cfca06d7SDimitry Andric return CanonicalUseReg;
1744706b4fc4SDimitry Andric }
1745706b4fc4SDimitry Andric
peelPrologAndEpilogs()17461d5ae102SDimitry Andric void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
17471d5ae102SDimitry Andric BitVector LS(Schedule.getNumStages(), true);
17481d5ae102SDimitry Andric BitVector AS(Schedule.getNumStages(), true);
17491d5ae102SDimitry Andric LiveStages[BB] = LS;
17501d5ae102SDimitry Andric AvailableStages[BB] = AS;
17511d5ae102SDimitry Andric
17521d5ae102SDimitry Andric // Peel out the prologs.
17531d5ae102SDimitry Andric LS.reset();
17541d5ae102SDimitry Andric for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
17556f8fc217SDimitry Andric LS[I] = true;
17561d5ae102SDimitry Andric Prologs.push_back(peelKernel(LPD_Front));
17571d5ae102SDimitry Andric LiveStages[Prologs.back()] = LS;
17581d5ae102SDimitry Andric AvailableStages[Prologs.back()] = LS;
17591d5ae102SDimitry Andric }
17601d5ae102SDimitry Andric
17611d5ae102SDimitry Andric // Create a block that will end up as the new loop exiting block (dominated by
17621d5ae102SDimitry Andric // all prologs and epilogs). It will only contain PHIs, in the same order as
17631d5ae102SDimitry Andric // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
17641d5ae102SDimitry Andric // that the exiting block is a (sub) clone of BB. This in turn gives us the
17651d5ae102SDimitry Andric // property that any value deffed in BB but used outside of BB is used by a
17661d5ae102SDimitry Andric // PHI in the exiting block.
17671d5ae102SDimitry Andric MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
1768706b4fc4SDimitry Andric EliminateDeadPhis(ExitingBB, MRI, LIS, /*KeepSingleSrcPhi=*/true);
17691d5ae102SDimitry Andric // Push out the epilogs, again in reverse order.
17701d5ae102SDimitry Andric // We can't assume anything about the minumum loop trip count at this point,
1771706b4fc4SDimitry Andric // so emit a fairly complex epilog.
1772706b4fc4SDimitry Andric
1773706b4fc4SDimitry Andric // We first peel number of stages minus one epilogue. Then we remove dead
1774706b4fc4SDimitry Andric // stages and reorder instructions based on their stage. If we have 3 stages
1775706b4fc4SDimitry Andric // we generate first:
1776706b4fc4SDimitry Andric // E0[3, 2, 1]
1777706b4fc4SDimitry Andric // E1[3', 2']
1778706b4fc4SDimitry Andric // E2[3'']
1779706b4fc4SDimitry Andric // And then we move instructions based on their stages to have:
1780706b4fc4SDimitry Andric // E0[3]
1781706b4fc4SDimitry Andric // E1[2, 3']
1782706b4fc4SDimitry Andric // E2[1, 2', 3'']
1783706b4fc4SDimitry Andric // The transformation is legal because we only move instructions past
1784706b4fc4SDimitry Andric // instructions of a previous loop iteration.
17851d5ae102SDimitry Andric for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
1786706b4fc4SDimitry Andric Epilogs.push_back(peelKernel(LPD_Back));
1787706b4fc4SDimitry Andric MachineBasicBlock *B = Epilogs.back();
1788706b4fc4SDimitry Andric filterInstructions(B, Schedule.getNumStages() - I);
1789706b4fc4SDimitry Andric // Keep track at which iteration each phi belongs to. We need it to know
1790706b4fc4SDimitry Andric // what version of the variable to use during prologue/epilogue stitching.
1791706b4fc4SDimitry Andric EliminateDeadPhis(B, MRI, LIS, /*KeepSingleSrcPhi=*/true);
1792c0981da4SDimitry Andric for (MachineInstr &Phi : B->phis())
1793c0981da4SDimitry Andric PhiNodeLoopIteration[&Phi] = Schedule.getNumStages() - I;
17941d5ae102SDimitry Andric }
1795706b4fc4SDimitry Andric for (size_t I = 0; I < Epilogs.size(); I++) {
1796706b4fc4SDimitry Andric LS.reset();
1797706b4fc4SDimitry Andric for (size_t J = I; J < Epilogs.size(); J++) {
1798706b4fc4SDimitry Andric int Iteration = J;
1799706b4fc4SDimitry Andric unsigned Stage = Schedule.getNumStages() - 1 + I - J;
1800706b4fc4SDimitry Andric // Move stage one block at a time so that Phi nodes are updated correctly.
1801706b4fc4SDimitry Andric for (size_t K = Iteration; K > I; K--)
1802706b4fc4SDimitry Andric moveStageBetweenBlocks(Epilogs[K - 1], Epilogs[K], Stage);
18036f8fc217SDimitry Andric LS[Stage] = true;
1804706b4fc4SDimitry Andric }
1805706b4fc4SDimitry Andric LiveStages[Epilogs[I]] = LS;
1806706b4fc4SDimitry Andric AvailableStages[Epilogs[I]] = AS;
18071d5ae102SDimitry Andric }
18081d5ae102SDimitry Andric
18091d5ae102SDimitry Andric // Now we've defined all the prolog and epilog blocks as a fallthrough
18101d5ae102SDimitry Andric // sequence, add the edges that will be followed if the loop trip count is
18111d5ae102SDimitry Andric // lower than the number of stages (connecting prologs directly with epilogs).
18121d5ae102SDimitry Andric auto PI = Prologs.begin();
18131d5ae102SDimitry Andric auto EI = Epilogs.begin();
18141d5ae102SDimitry Andric assert(Prologs.size() == Epilogs.size());
18151d5ae102SDimitry Andric for (; PI != Prologs.end(); ++PI, ++EI) {
18161d5ae102SDimitry Andric MachineBasicBlock *Pred = *(*EI)->pred_begin();
18171d5ae102SDimitry Andric (*PI)->addSuccessor(*EI);
18181d5ae102SDimitry Andric for (MachineInstr &MI : (*EI)->phis()) {
18191d5ae102SDimitry Andric Register Reg = MI.getOperand(1).getReg();
18201d5ae102SDimitry Andric MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
1821706b4fc4SDimitry Andric if (Use && Use->getParent() == Pred) {
1822706b4fc4SDimitry Andric MachineInstr *CanonicalUse = CanonicalMIs[Use];
1823706b4fc4SDimitry Andric if (CanonicalUse->isPHI()) {
1824706b4fc4SDimitry Andric // If the use comes from a phi we need to skip as many phi as the
1825706b4fc4SDimitry Andric // distance between the epilogue and the kernel. Trace through the phi
1826706b4fc4SDimitry Andric // chain to find the right value.
1827706b4fc4SDimitry Andric Reg = getPhiCanonicalReg(CanonicalUse, Use);
1828706b4fc4SDimitry Andric }
18291d5ae102SDimitry Andric Reg = getEquivalentRegisterIn(Reg, *PI);
1830706b4fc4SDimitry Andric }
18311d5ae102SDimitry Andric MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
18321d5ae102SDimitry Andric MI.addOperand(MachineOperand::CreateMBB(*PI));
18331d5ae102SDimitry Andric }
18341d5ae102SDimitry Andric }
18351d5ae102SDimitry Andric
18361d5ae102SDimitry Andric // Create a list of all blocks in order.
18371d5ae102SDimitry Andric SmallVector<MachineBasicBlock *, 8> Blocks;
18381d5ae102SDimitry Andric llvm::copy(PeeledFront, std::back_inserter(Blocks));
18391d5ae102SDimitry Andric Blocks.push_back(BB);
18401d5ae102SDimitry Andric llvm::copy(PeeledBack, std::back_inserter(Blocks));
18411d5ae102SDimitry Andric
18421d5ae102SDimitry Andric // Iterate in reverse order over all instructions, remapping as we go.
18431d5ae102SDimitry Andric for (MachineBasicBlock *B : reverse(Blocks)) {
1844145449b1SDimitry Andric for (auto I = B->instr_rbegin();
18451d5ae102SDimitry Andric I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
1846145449b1SDimitry Andric MachineBasicBlock::reverse_instr_iterator MI = I++;
1847145449b1SDimitry Andric rewriteUsesOf(&*MI);
18481d5ae102SDimitry Andric }
18491d5ae102SDimitry Andric }
1850706b4fc4SDimitry Andric for (auto *MI : IllegalPhisToDelete) {
1851706b4fc4SDimitry Andric if (LIS)
1852706b4fc4SDimitry Andric LIS->RemoveMachineInstrFromMaps(*MI);
1853706b4fc4SDimitry Andric MI->eraseFromParent();
1854706b4fc4SDimitry Andric }
1855706b4fc4SDimitry Andric IllegalPhisToDelete.clear();
1856706b4fc4SDimitry Andric
18571d5ae102SDimitry Andric // Now all remapping has been done, we're free to optimize the generated code.
18581d5ae102SDimitry Andric for (MachineBasicBlock *B : reverse(Blocks))
18591d5ae102SDimitry Andric EliminateDeadPhis(B, MRI, LIS);
18601d5ae102SDimitry Andric EliminateDeadPhis(ExitingBB, MRI, LIS);
18611d5ae102SDimitry Andric }
18621d5ae102SDimitry Andric
CreateLCSSAExitingBlock()18631d5ae102SDimitry Andric MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
18641d5ae102SDimitry Andric MachineFunction &MF = *BB->getParent();
18651d5ae102SDimitry Andric MachineBasicBlock *Exit = *BB->succ_begin();
18661d5ae102SDimitry Andric if (Exit == BB)
18671d5ae102SDimitry Andric Exit = *std::next(BB->succ_begin());
18681d5ae102SDimitry Andric
18691d5ae102SDimitry Andric MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
18701d5ae102SDimitry Andric MF.insert(std::next(BB->getIterator()), NewBB);
18711d5ae102SDimitry Andric
18721d5ae102SDimitry Andric // Clone all phis in BB into NewBB and rewrite.
18731d5ae102SDimitry Andric for (MachineInstr &MI : BB->phis()) {
18741d5ae102SDimitry Andric auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
18751d5ae102SDimitry Andric Register OldR = MI.getOperand(3).getReg();
18761d5ae102SDimitry Andric Register R = MRI.createVirtualRegister(RC);
18771d5ae102SDimitry Andric SmallVector<MachineInstr *, 4> Uses;
18781d5ae102SDimitry Andric for (MachineInstr &Use : MRI.use_instructions(OldR))
18791d5ae102SDimitry Andric if (Use.getParent() != BB)
18801d5ae102SDimitry Andric Uses.push_back(&Use);
18811d5ae102SDimitry Andric for (MachineInstr *Use : Uses)
18821d5ae102SDimitry Andric Use->substituteRegister(OldR, R, /*SubIdx=*/0,
18831d5ae102SDimitry Andric *MRI.getTargetRegisterInfo());
18841d5ae102SDimitry Andric MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
18851d5ae102SDimitry Andric .addReg(OldR)
18861d5ae102SDimitry Andric .addMBB(BB);
18871d5ae102SDimitry Andric BlockMIs[{NewBB, &MI}] = NI;
18881d5ae102SDimitry Andric CanonicalMIs[NI] = &MI;
18891d5ae102SDimitry Andric }
18901d5ae102SDimitry Andric BB->replaceSuccessor(Exit, NewBB);
18911d5ae102SDimitry Andric Exit->replacePhiUsesWith(BB, NewBB);
18921d5ae102SDimitry Andric NewBB->addSuccessor(Exit);
18931d5ae102SDimitry Andric
18941d5ae102SDimitry Andric MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
18951d5ae102SDimitry Andric SmallVector<MachineOperand, 4> Cond;
18961d5ae102SDimitry Andric bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
18971d5ae102SDimitry Andric (void)CanAnalyzeBr;
18981d5ae102SDimitry Andric assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
18991d5ae102SDimitry Andric TII->removeBranch(*BB);
19001d5ae102SDimitry Andric TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
19011d5ae102SDimitry Andric Cond, DebugLoc());
19021d5ae102SDimitry Andric TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
19031d5ae102SDimitry Andric return NewBB;
19041d5ae102SDimitry Andric }
19051d5ae102SDimitry Andric
19061d5ae102SDimitry Andric Register
getEquivalentRegisterIn(Register Reg,MachineBasicBlock * BB)19071d5ae102SDimitry Andric PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg,
19081d5ae102SDimitry Andric MachineBasicBlock *BB) {
19091d5ae102SDimitry Andric MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
1910ac9a064cSDimitry Andric unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
19111d5ae102SDimitry Andric return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
19121d5ae102SDimitry Andric }
19131d5ae102SDimitry Andric
rewriteUsesOf(MachineInstr * MI)19141d5ae102SDimitry Andric void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
19151d5ae102SDimitry Andric if (MI->isPHI()) {
19161d5ae102SDimitry Andric // This is an illegal PHI. The loop-carried (desired) value is operand 3,
19171d5ae102SDimitry Andric // and it is produced by this block.
19181d5ae102SDimitry Andric Register PhiR = MI->getOperand(0).getReg();
19191d5ae102SDimitry Andric Register R = MI->getOperand(3).getReg();
19201d5ae102SDimitry Andric int RMIStage = getStage(MRI.getUniqueVRegDef(R));
19211d5ae102SDimitry Andric if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
19221d5ae102SDimitry Andric R = MI->getOperand(1).getReg();
19231d5ae102SDimitry Andric MRI.setRegClass(R, MRI.getRegClass(PhiR));
19241d5ae102SDimitry Andric MRI.replaceRegWith(PhiR, R);
1925706b4fc4SDimitry Andric // Postpone deleting the Phi as it may be referenced by BlockMIs and used
1926706b4fc4SDimitry Andric // later to figure out how to remap registers.
1927706b4fc4SDimitry Andric MI->getOperand(0).setReg(PhiR);
1928706b4fc4SDimitry Andric IllegalPhisToDelete.push_back(MI);
19291d5ae102SDimitry Andric return;
19301d5ae102SDimitry Andric }
19311d5ae102SDimitry Andric
19321d5ae102SDimitry Andric int Stage = getStage(MI);
19331d5ae102SDimitry Andric if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
19341d5ae102SDimitry Andric LiveStages[MI->getParent()].test(Stage))
19351d5ae102SDimitry Andric // Instruction is live, no rewriting to do.
19361d5ae102SDimitry Andric return;
19371d5ae102SDimitry Andric
19381d5ae102SDimitry Andric for (MachineOperand &DefMO : MI->defs()) {
19391d5ae102SDimitry Andric SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
19401d5ae102SDimitry Andric for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
19411d5ae102SDimitry Andric // Only PHIs can use values from this block by construction.
19421d5ae102SDimitry Andric // Match with the equivalent PHI in B.
19431d5ae102SDimitry Andric assert(UseMI.isPHI());
19441d5ae102SDimitry Andric Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
19451d5ae102SDimitry Andric MI->getParent());
19461d5ae102SDimitry Andric Subs.emplace_back(&UseMI, Reg);
19471d5ae102SDimitry Andric }
19481d5ae102SDimitry Andric for (auto &Sub : Subs)
19491d5ae102SDimitry Andric Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
19501d5ae102SDimitry Andric *MRI.getTargetRegisterInfo());
19511d5ae102SDimitry Andric }
19521d5ae102SDimitry Andric if (LIS)
19531d5ae102SDimitry Andric LIS->RemoveMachineInstrFromMaps(*MI);
19541d5ae102SDimitry Andric MI->eraseFromParent();
19551d5ae102SDimitry Andric }
19561d5ae102SDimitry Andric
fixupBranches()19571d5ae102SDimitry Andric void PeelingModuloScheduleExpander::fixupBranches() {
19581d5ae102SDimitry Andric // Work outwards from the kernel.
19591d5ae102SDimitry Andric bool KernelDisposed = false;
19601d5ae102SDimitry Andric int TC = Schedule.getNumStages() - 1;
19611d5ae102SDimitry Andric for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
19621d5ae102SDimitry Andric ++PI, ++EI, --TC) {
19631d5ae102SDimitry Andric MachineBasicBlock *Prolog = *PI;
19641d5ae102SDimitry Andric MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
19651d5ae102SDimitry Andric MachineBasicBlock *Epilog = *EI;
19661d5ae102SDimitry Andric SmallVector<MachineOperand, 4> Cond;
19671d5ae102SDimitry Andric TII->removeBranch(*Prolog);
1968e3b55780SDimitry Andric std::optional<bool> StaticallyGreater =
1969cfca06d7SDimitry Andric LoopInfo->createTripCountGreaterCondition(TC, *Prolog, Cond);
1970145449b1SDimitry Andric if (!StaticallyGreater) {
19711d5ae102SDimitry Andric LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
19721d5ae102SDimitry Andric // Dynamically branch based on Cond.
19731d5ae102SDimitry Andric TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
19741d5ae102SDimitry Andric } else if (*StaticallyGreater == false) {
19751d5ae102SDimitry Andric LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
19761d5ae102SDimitry Andric // Prolog never falls through; branch to epilog and orphan interior
19771d5ae102SDimitry Andric // blocks. Leave it to unreachable-block-elim to clean up.
19781d5ae102SDimitry Andric Prolog->removeSuccessor(Fallthrough);
19791d5ae102SDimitry Andric for (MachineInstr &P : Fallthrough->phis()) {
1980145449b1SDimitry Andric P.removeOperand(2);
1981145449b1SDimitry Andric P.removeOperand(1);
19821d5ae102SDimitry Andric }
19831d5ae102SDimitry Andric TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
19841d5ae102SDimitry Andric KernelDisposed = true;
19851d5ae102SDimitry Andric } else {
19861d5ae102SDimitry Andric LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
19871d5ae102SDimitry Andric // Prolog always falls through; remove incoming values in epilog.
19881d5ae102SDimitry Andric Prolog->removeSuccessor(Epilog);
19891d5ae102SDimitry Andric for (MachineInstr &P : Epilog->phis()) {
1990145449b1SDimitry Andric P.removeOperand(4);
1991145449b1SDimitry Andric P.removeOperand(3);
19921d5ae102SDimitry Andric }
19931d5ae102SDimitry Andric }
19941d5ae102SDimitry Andric }
19951d5ae102SDimitry Andric
19961d5ae102SDimitry Andric if (!KernelDisposed) {
1997cfca06d7SDimitry Andric LoopInfo->adjustTripCount(-(Schedule.getNumStages() - 1));
1998cfca06d7SDimitry Andric LoopInfo->setPreheader(Prologs.back());
19991d5ae102SDimitry Andric } else {
2000cfca06d7SDimitry Andric LoopInfo->disposed();
20011d5ae102SDimitry Andric }
20021d5ae102SDimitry Andric }
20031d5ae102SDimitry Andric
rewriteKernel()20041d5ae102SDimitry Andric void PeelingModuloScheduleExpander::rewriteKernel() {
2005344a3780SDimitry Andric KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
20061d5ae102SDimitry Andric KR.rewrite();
20071d5ae102SDimitry Andric }
20081d5ae102SDimitry Andric
expand()20091d5ae102SDimitry Andric void PeelingModuloScheduleExpander::expand() {
20101d5ae102SDimitry Andric BB = Schedule.getLoop()->getTopBlock();
20111d5ae102SDimitry Andric Preheader = Schedule.getLoop()->getLoopPreheader();
20121d5ae102SDimitry Andric LLVM_DEBUG(Schedule.dump());
2013cfca06d7SDimitry Andric LoopInfo = TII->analyzeLoopForPipelining(BB);
2014cfca06d7SDimitry Andric assert(LoopInfo);
20151d5ae102SDimitry Andric
20161d5ae102SDimitry Andric rewriteKernel();
20171d5ae102SDimitry Andric peelPrologAndEpilogs();
20181d5ae102SDimitry Andric fixupBranches();
20191d5ae102SDimitry Andric }
20201d5ae102SDimitry Andric
validateAgainstModuloScheduleExpander()20211d5ae102SDimitry Andric void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
20221d5ae102SDimitry Andric BB = Schedule.getLoop()->getTopBlock();
20231d5ae102SDimitry Andric Preheader = Schedule.getLoop()->getLoopPreheader();
20241d5ae102SDimitry Andric
20251d5ae102SDimitry Andric // Dump the schedule before we invalidate and remap all its instructions.
20261d5ae102SDimitry Andric // Stash it in a string so we can print it if we found an error.
20271d5ae102SDimitry Andric std::string ScheduleDump;
20281d5ae102SDimitry Andric raw_string_ostream OS(ScheduleDump);
20291d5ae102SDimitry Andric Schedule.print(OS);
20301d5ae102SDimitry Andric OS.flush();
20311d5ae102SDimitry Andric
20321d5ae102SDimitry Andric // First, run the normal ModuleScheduleExpander. We don't support any
20331d5ae102SDimitry Andric // InstrChanges.
20341d5ae102SDimitry Andric assert(LIS && "Requires LiveIntervals!");
20351d5ae102SDimitry Andric ModuloScheduleExpander MSE(MF, Schedule, *LIS,
20361d5ae102SDimitry Andric ModuloScheduleExpander::InstrChangesTy());
20371d5ae102SDimitry Andric MSE.expand();
20381d5ae102SDimitry Andric MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
20391d5ae102SDimitry Andric if (!ExpandedKernel) {
20401d5ae102SDimitry Andric // The expander optimized away the kernel. We can't do any useful checking.
20411d5ae102SDimitry Andric MSE.cleanup();
20421d5ae102SDimitry Andric return;
20431d5ae102SDimitry Andric }
20441d5ae102SDimitry Andric // Before running the KernelRewriter, re-add BB into the CFG.
20451d5ae102SDimitry Andric Preheader->addSuccessor(BB);
20461d5ae102SDimitry Andric
20471d5ae102SDimitry Andric // Now run the new expansion algorithm.
2048344a3780SDimitry Andric KernelRewriter KR(*Schedule.getLoop(), Schedule, BB);
20491d5ae102SDimitry Andric KR.rewrite();
20501d5ae102SDimitry Andric peelPrologAndEpilogs();
20511d5ae102SDimitry Andric
20521d5ae102SDimitry Andric // Collect all illegal phis that the new algorithm created. We'll give these
20531d5ae102SDimitry Andric // to KernelOperandInfo.
20541d5ae102SDimitry Andric SmallPtrSet<MachineInstr *, 4> IllegalPhis;
20551d5ae102SDimitry Andric for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
20561d5ae102SDimitry Andric if (NI->isPHI())
20571d5ae102SDimitry Andric IllegalPhis.insert(&*NI);
20581d5ae102SDimitry Andric }
20591d5ae102SDimitry Andric
20601d5ae102SDimitry Andric // Co-iterate across both kernels. We expect them to be identical apart from
20611d5ae102SDimitry Andric // phis and full COPYs (we look through both).
20621d5ae102SDimitry Andric SmallVector<std::pair<KernelOperandInfo, KernelOperandInfo>, 8> KOIs;
20631d5ae102SDimitry Andric auto OI = ExpandedKernel->begin();
20641d5ae102SDimitry Andric auto NI = BB->begin();
20651d5ae102SDimitry Andric for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
20661d5ae102SDimitry Andric while (OI->isPHI() || OI->isFullCopy())
20671d5ae102SDimitry Andric ++OI;
20681d5ae102SDimitry Andric while (NI->isPHI() || NI->isFullCopy())
20691d5ae102SDimitry Andric ++NI;
20701d5ae102SDimitry Andric assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
20711d5ae102SDimitry Andric // Analyze every operand separately.
20721d5ae102SDimitry Andric for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
20731d5ae102SDimitry Andric OOpI != OI->operands_end(); ++OOpI, ++NOpI)
20741d5ae102SDimitry Andric KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
20751d5ae102SDimitry Andric KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
20761d5ae102SDimitry Andric }
20771d5ae102SDimitry Andric
20781d5ae102SDimitry Andric bool Failed = false;
20791d5ae102SDimitry Andric for (auto &OldAndNew : KOIs) {
20801d5ae102SDimitry Andric if (OldAndNew.first == OldAndNew.second)
20811d5ae102SDimitry Andric continue;
20821d5ae102SDimitry Andric Failed = true;
20831d5ae102SDimitry Andric errs() << "Modulo kernel validation error: [\n";
20841d5ae102SDimitry Andric errs() << " [golden] ";
20851d5ae102SDimitry Andric OldAndNew.first.print(errs());
20861d5ae102SDimitry Andric errs() << " ";
20871d5ae102SDimitry Andric OldAndNew.second.print(errs());
20881d5ae102SDimitry Andric errs() << "]\n";
20891d5ae102SDimitry Andric }
20901d5ae102SDimitry Andric
20911d5ae102SDimitry Andric if (Failed) {
20921d5ae102SDimitry Andric errs() << "Golden reference kernel:\n";
20931d5ae102SDimitry Andric ExpandedKernel->print(errs());
20941d5ae102SDimitry Andric errs() << "New kernel:\n";
20951d5ae102SDimitry Andric BB->print(errs());
20961d5ae102SDimitry Andric errs() << ScheduleDump;
20971d5ae102SDimitry Andric report_fatal_error(
20981d5ae102SDimitry Andric "Modulo kernel validation (-pipeliner-experimental-cg) failed");
20991d5ae102SDimitry Andric }
21001d5ae102SDimitry Andric
21011d5ae102SDimitry Andric // Cleanup by removing BB from the CFG again as the original
21021d5ae102SDimitry Andric // ModuloScheduleExpander intended.
21031d5ae102SDimitry Andric Preheader->removeSuccessor(BB);
21041d5ae102SDimitry Andric MSE.cleanup();
21051d5ae102SDimitry Andric }
21061d5ae102SDimitry Andric
cloneInstr(MachineInstr * OldMI)2107ac9a064cSDimitry Andric MachineInstr *ModuloScheduleExpanderMVE::cloneInstr(MachineInstr *OldMI) {
2108ac9a064cSDimitry Andric MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
2109ac9a064cSDimitry Andric
2110ac9a064cSDimitry Andric // TODO: Offset information needs to be corrected.
2111ac9a064cSDimitry Andric NewMI->dropMemRefs(MF);
2112ac9a064cSDimitry Andric
2113ac9a064cSDimitry Andric return NewMI;
2114ac9a064cSDimitry Andric }
2115ac9a064cSDimitry Andric
2116ac9a064cSDimitry Andric /// Create a dedicated exit for Loop. Exit is the original exit for Loop.
2117ac9a064cSDimitry Andric /// If it is already dedicated exit, return it. Otherwise, insert a new
2118ac9a064cSDimitry Andric /// block between them and return the new block.
createDedicatedExit(MachineBasicBlock * Loop,MachineBasicBlock * Exit)2119ac9a064cSDimitry Andric static MachineBasicBlock *createDedicatedExit(MachineBasicBlock *Loop,
2120ac9a064cSDimitry Andric MachineBasicBlock *Exit) {
2121ac9a064cSDimitry Andric if (Exit->pred_size() == 1)
2122ac9a064cSDimitry Andric return Exit;
2123ac9a064cSDimitry Andric
2124ac9a064cSDimitry Andric MachineFunction *MF = Loop->getParent();
2125ac9a064cSDimitry Andric const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
2126ac9a064cSDimitry Andric
2127ac9a064cSDimitry Andric MachineBasicBlock *NewExit =
2128ac9a064cSDimitry Andric MF->CreateMachineBasicBlock(Loop->getBasicBlock());
2129ac9a064cSDimitry Andric MF->insert(Loop->getIterator(), NewExit);
2130ac9a064cSDimitry Andric
2131ac9a064cSDimitry Andric MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
2132ac9a064cSDimitry Andric SmallVector<MachineOperand, 4> Cond;
2133ac9a064cSDimitry Andric TII->analyzeBranch(*Loop, TBB, FBB, Cond);
2134ac9a064cSDimitry Andric if (TBB == Loop)
2135ac9a064cSDimitry Andric FBB = NewExit;
2136ac9a064cSDimitry Andric else if (FBB == Loop)
2137ac9a064cSDimitry Andric TBB = NewExit;
2138ac9a064cSDimitry Andric else
2139ac9a064cSDimitry Andric llvm_unreachable("unexpected loop structure");
2140ac9a064cSDimitry Andric TII->removeBranch(*Loop);
2141ac9a064cSDimitry Andric TII->insertBranch(*Loop, TBB, FBB, Cond, DebugLoc());
2142ac9a064cSDimitry Andric Loop->replaceSuccessor(Exit, NewExit);
2143ac9a064cSDimitry Andric TII->insertUnconditionalBranch(*NewExit, Exit, DebugLoc());
2144ac9a064cSDimitry Andric NewExit->addSuccessor(Exit);
2145ac9a064cSDimitry Andric
2146ac9a064cSDimitry Andric Exit->replacePhiUsesWith(Loop, NewExit);
2147ac9a064cSDimitry Andric
2148ac9a064cSDimitry Andric return NewExit;
2149ac9a064cSDimitry Andric }
2150ac9a064cSDimitry Andric
2151ac9a064cSDimitry Andric /// Insert branch code into the end of MBB. It branches to GreaterThan if the
2152ac9a064cSDimitry Andric /// remaining trip count for instructions in LastStage0Insts is greater than
2153ac9a064cSDimitry Andric /// RequiredTC, and to Otherwise otherwise.
insertCondBranch(MachineBasicBlock & MBB,int RequiredTC,InstrMapTy & LastStage0Insts,MachineBasicBlock & GreaterThan,MachineBasicBlock & Otherwise)2154ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::insertCondBranch(MachineBasicBlock &MBB,
2155ac9a064cSDimitry Andric int RequiredTC,
2156ac9a064cSDimitry Andric InstrMapTy &LastStage0Insts,
2157ac9a064cSDimitry Andric MachineBasicBlock &GreaterThan,
2158ac9a064cSDimitry Andric MachineBasicBlock &Otherwise) {
2159ac9a064cSDimitry Andric SmallVector<MachineOperand, 4> Cond;
2160ac9a064cSDimitry Andric LoopInfo->createRemainingIterationsGreaterCondition(RequiredTC, MBB, Cond,
2161ac9a064cSDimitry Andric LastStage0Insts);
2162ac9a064cSDimitry Andric
2163ac9a064cSDimitry Andric if (SwapBranchTargetsMVE) {
2164ac9a064cSDimitry Andric // Set SwapBranchTargetsMVE to true if a target prefers to replace TBB and
2165ac9a064cSDimitry Andric // FBB for optimal performance.
2166ac9a064cSDimitry Andric if (TII->reverseBranchCondition(Cond))
2167ac9a064cSDimitry Andric llvm_unreachable("can not reverse branch condition");
2168ac9a064cSDimitry Andric TII->insertBranch(MBB, &Otherwise, &GreaterThan, Cond, DebugLoc());
2169ac9a064cSDimitry Andric } else {
2170ac9a064cSDimitry Andric TII->insertBranch(MBB, &GreaterThan, &Otherwise, Cond, DebugLoc());
2171ac9a064cSDimitry Andric }
2172ac9a064cSDimitry Andric }
2173ac9a064cSDimitry Andric
2174ac9a064cSDimitry Andric /// Generate a pipelined loop that is unrolled by using MVE algorithm and any
2175ac9a064cSDimitry Andric /// other necessary blocks. The control flow is modified to execute the
2176ac9a064cSDimitry Andric /// pipelined loop if the trip count satisfies the condition, otherwise the
2177ac9a064cSDimitry Andric /// original loop. The original loop is also used to execute the remainder
2178ac9a064cSDimitry Andric /// iterations which occur due to unrolling.
generatePipelinedLoop()2179ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::generatePipelinedLoop() {
2180ac9a064cSDimitry Andric // The control flow for pipelining with MVE:
2181ac9a064cSDimitry Andric //
2182ac9a064cSDimitry Andric // OrigPreheader:
2183ac9a064cSDimitry Andric // // The block that is originally the loop preheader
2184ac9a064cSDimitry Andric // goto Check
2185ac9a064cSDimitry Andric //
2186ac9a064cSDimitry Andric // Check:
2187ac9a064cSDimitry Andric // // Check whether the trip count satisfies the requirements to pipeline.
2188ac9a064cSDimitry Andric // if (LoopCounter > NumStages + NumUnroll - 2)
2189ac9a064cSDimitry Andric // // The minimum number of iterations to pipeline =
2190ac9a064cSDimitry Andric // // iterations executed in prolog/epilog (NumStages-1) +
2191ac9a064cSDimitry Andric // // iterations executed in one kernel run (NumUnroll)
2192ac9a064cSDimitry Andric // goto Prolog
2193ac9a064cSDimitry Andric // // fallback to the original loop
2194ac9a064cSDimitry Andric // goto NewPreheader
2195ac9a064cSDimitry Andric //
2196ac9a064cSDimitry Andric // Prolog:
2197ac9a064cSDimitry Andric // // All prolog stages. There are no direct branches to the epilogue.
2198ac9a064cSDimitry Andric // goto NewKernel
2199ac9a064cSDimitry Andric //
2200ac9a064cSDimitry Andric // NewKernel:
2201ac9a064cSDimitry Andric // // NumUnroll copies of the kernel
2202ac9a064cSDimitry Andric // if (LoopCounter > MVE-1)
2203ac9a064cSDimitry Andric // goto NewKernel
2204ac9a064cSDimitry Andric // goto Epilog
2205ac9a064cSDimitry Andric //
2206ac9a064cSDimitry Andric // Epilog:
2207ac9a064cSDimitry Andric // // All epilog stages.
2208ac9a064cSDimitry Andric // if (LoopCounter > 0)
2209ac9a064cSDimitry Andric // // The remainder is executed in the original loop
2210ac9a064cSDimitry Andric // goto NewPreheader
2211ac9a064cSDimitry Andric // goto NewExit
2212ac9a064cSDimitry Andric //
2213ac9a064cSDimitry Andric // NewPreheader:
2214ac9a064cSDimitry Andric // // Newly created preheader for the original loop.
2215ac9a064cSDimitry Andric // // The initial values of the phis in the loop are merged from two paths.
2216ac9a064cSDimitry Andric // NewInitVal = Phi OrigInitVal, Check, PipelineLastVal, Epilog
2217ac9a064cSDimitry Andric // goto OrigKernel
2218ac9a064cSDimitry Andric //
2219ac9a064cSDimitry Andric // OrigKernel:
2220ac9a064cSDimitry Andric // // The original loop block.
2221ac9a064cSDimitry Andric // if (LoopCounter != 0)
2222ac9a064cSDimitry Andric // goto OrigKernel
2223ac9a064cSDimitry Andric // goto NewExit
2224ac9a064cSDimitry Andric //
2225ac9a064cSDimitry Andric // NewExit:
2226ac9a064cSDimitry Andric // // Newly created dedicated exit for the original loop.
2227ac9a064cSDimitry Andric // // Merge values which are referenced after the loop
2228ac9a064cSDimitry Andric // Merged = Phi OrigVal, OrigKernel, PipelineVal, Epilog
2229ac9a064cSDimitry Andric // goto OrigExit
2230ac9a064cSDimitry Andric //
2231ac9a064cSDimitry Andric // OrigExit:
2232ac9a064cSDimitry Andric // // The block that is originally the loop exit.
2233ac9a064cSDimitry Andric // // If it is already deicated exit, NewExit is not created.
2234ac9a064cSDimitry Andric
2235ac9a064cSDimitry Andric // An example of where each stage is executed:
2236ac9a064cSDimitry Andric // Assume #Stages 3, #MVE 4, #Iterations 12
2237ac9a064cSDimitry Andric // Iter 0 1 2 3 4 5 6 7 8 9 10-11
2238ac9a064cSDimitry Andric // -------------------------------------------------
2239ac9a064cSDimitry Andric // Stage 0 Prolog#0
2240ac9a064cSDimitry Andric // Stage 1 0 Prolog#1
2241ac9a064cSDimitry Andric // Stage 2 1 0 Kernel Unroll#0 Iter#0
2242ac9a064cSDimitry Andric // Stage 2 1 0 Kernel Unroll#1 Iter#0
2243ac9a064cSDimitry Andric // Stage 2 1 0 Kernel Unroll#2 Iter#0
2244ac9a064cSDimitry Andric // Stage 2 1 0 Kernel Unroll#3 Iter#0
2245ac9a064cSDimitry Andric // Stage 2 1 0 Kernel Unroll#0 Iter#1
2246ac9a064cSDimitry Andric // Stage 2 1 0 Kernel Unroll#1 Iter#1
2247ac9a064cSDimitry Andric // Stage 2 1 0 Kernel Unroll#2 Iter#1
2248ac9a064cSDimitry Andric // Stage 2 1 0 Kernel Unroll#3 Iter#1
2249ac9a064cSDimitry Andric // Stage 2 1 Epilog#0
2250ac9a064cSDimitry Andric // Stage 2 Epilog#1
2251ac9a064cSDimitry Andric // Stage 0-2 OrigKernel
2252ac9a064cSDimitry Andric
2253ac9a064cSDimitry Andric LoopInfo = TII->analyzeLoopForPipelining(OrigKernel);
2254ac9a064cSDimitry Andric assert(LoopInfo && "Must be able to analyze loop!");
2255ac9a064cSDimitry Andric
2256ac9a064cSDimitry Andric calcNumUnroll();
2257ac9a064cSDimitry Andric
2258ac9a064cSDimitry Andric Check = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2259ac9a064cSDimitry Andric Prolog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2260ac9a064cSDimitry Andric NewKernel = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2261ac9a064cSDimitry Andric Epilog = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2262ac9a064cSDimitry Andric NewPreheader = MF.CreateMachineBasicBlock(OrigKernel->getBasicBlock());
2263ac9a064cSDimitry Andric
2264ac9a064cSDimitry Andric MF.insert(OrigKernel->getIterator(), Check);
2265ac9a064cSDimitry Andric MF.insert(OrigKernel->getIterator(), Prolog);
2266ac9a064cSDimitry Andric MF.insert(OrigKernel->getIterator(), NewKernel);
2267ac9a064cSDimitry Andric MF.insert(OrigKernel->getIterator(), Epilog);
2268ac9a064cSDimitry Andric MF.insert(OrigKernel->getIterator(), NewPreheader);
2269ac9a064cSDimitry Andric
2270ac9a064cSDimitry Andric NewExit = createDedicatedExit(OrigKernel, OrigExit);
2271ac9a064cSDimitry Andric
2272ac9a064cSDimitry Andric NewPreheader->transferSuccessorsAndUpdatePHIs(OrigPreheader);
2273ac9a064cSDimitry Andric TII->insertUnconditionalBranch(*NewPreheader, OrigKernel, DebugLoc());
2274ac9a064cSDimitry Andric
2275ac9a064cSDimitry Andric OrigPreheader->addSuccessor(Check);
2276ac9a064cSDimitry Andric TII->removeBranch(*OrigPreheader);
2277ac9a064cSDimitry Andric TII->insertUnconditionalBranch(*OrigPreheader, Check, DebugLoc());
2278ac9a064cSDimitry Andric
2279ac9a064cSDimitry Andric Check->addSuccessor(Prolog);
2280ac9a064cSDimitry Andric Check->addSuccessor(NewPreheader);
2281ac9a064cSDimitry Andric
2282ac9a064cSDimitry Andric Prolog->addSuccessor(NewKernel);
2283ac9a064cSDimitry Andric
2284ac9a064cSDimitry Andric NewKernel->addSuccessor(NewKernel);
2285ac9a064cSDimitry Andric NewKernel->addSuccessor(Epilog);
2286ac9a064cSDimitry Andric
2287ac9a064cSDimitry Andric Epilog->addSuccessor(NewPreheader);
2288ac9a064cSDimitry Andric Epilog->addSuccessor(NewExit);
2289ac9a064cSDimitry Andric
2290ac9a064cSDimitry Andric InstrMapTy LastStage0Insts;
2291ac9a064cSDimitry Andric insertCondBranch(*Check, Schedule.getNumStages() + NumUnroll - 2,
2292ac9a064cSDimitry Andric LastStage0Insts, *Prolog, *NewPreheader);
2293ac9a064cSDimitry Andric
2294ac9a064cSDimitry Andric // VRMaps map (prolog/kernel/epilog phase#, original register#) to new
2295ac9a064cSDimitry Andric // register#
2296ac9a064cSDimitry Andric SmallVector<ValueMapTy> PrologVRMap, KernelVRMap, EpilogVRMap;
2297ac9a064cSDimitry Andric generateProlog(PrologVRMap);
2298ac9a064cSDimitry Andric generateKernel(PrologVRMap, KernelVRMap, LastStage0Insts);
2299ac9a064cSDimitry Andric generateEpilog(KernelVRMap, EpilogVRMap, LastStage0Insts);
2300ac9a064cSDimitry Andric }
2301ac9a064cSDimitry Andric
2302ac9a064cSDimitry Andric /// Replace MI's use operands according to the maps.
updateInstrUse(MachineInstr * MI,int StageNum,int PhaseNum,SmallVectorImpl<ValueMapTy> & CurVRMap,SmallVectorImpl<ValueMapTy> * PrevVRMap)2303ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::updateInstrUse(
2304ac9a064cSDimitry Andric MachineInstr *MI, int StageNum, int PhaseNum,
2305ac9a064cSDimitry Andric SmallVectorImpl<ValueMapTy> &CurVRMap,
2306ac9a064cSDimitry Andric SmallVectorImpl<ValueMapTy> *PrevVRMap) {
2307ac9a064cSDimitry Andric // If MI is in the prolog/kernel/epilog block, CurVRMap is
2308ac9a064cSDimitry Andric // PrologVRMap/KernelVRMap/EpilogVRMap respectively.
2309ac9a064cSDimitry Andric // PrevVRMap is nullptr/PhiVRMap/KernelVRMap respectively.
2310ac9a064cSDimitry Andric // Refer to the appropriate map according to the stage difference between
2311ac9a064cSDimitry Andric // MI and the definition of an operand.
2312ac9a064cSDimitry Andric
2313ac9a064cSDimitry Andric for (MachineOperand &UseMO : MI->uses()) {
2314ac9a064cSDimitry Andric if (!UseMO.isReg() || !UseMO.getReg().isVirtual())
2315ac9a064cSDimitry Andric continue;
2316ac9a064cSDimitry Andric int DiffStage = 0;
2317ac9a064cSDimitry Andric Register OrigReg = UseMO.getReg();
2318ac9a064cSDimitry Andric MachineInstr *DefInst = MRI.getVRegDef(OrigReg);
2319ac9a064cSDimitry Andric if (!DefInst || DefInst->getParent() != OrigKernel)
2320ac9a064cSDimitry Andric continue;
2321ac9a064cSDimitry Andric unsigned InitReg = 0;
2322ac9a064cSDimitry Andric unsigned DefReg = OrigReg;
2323ac9a064cSDimitry Andric if (DefInst->isPHI()) {
2324ac9a064cSDimitry Andric ++DiffStage;
2325ac9a064cSDimitry Andric unsigned LoopReg;
2326ac9a064cSDimitry Andric getPhiRegs(*DefInst, OrigKernel, InitReg, LoopReg);
2327ac9a064cSDimitry Andric // LoopReg is guaranteed to be defined within the loop by canApply()
2328ac9a064cSDimitry Andric DefReg = LoopReg;
2329ac9a064cSDimitry Andric DefInst = MRI.getVRegDef(LoopReg);
2330ac9a064cSDimitry Andric }
2331ac9a064cSDimitry Andric unsigned DefStageNum = Schedule.getStage(DefInst);
2332ac9a064cSDimitry Andric DiffStage += StageNum - DefStageNum;
2333ac9a064cSDimitry Andric Register NewReg;
2334ac9a064cSDimitry Andric if (PhaseNum >= DiffStage && CurVRMap[PhaseNum - DiffStage].count(DefReg))
2335ac9a064cSDimitry Andric // NewReg is defined in a previous phase of the same block
2336ac9a064cSDimitry Andric NewReg = CurVRMap[PhaseNum - DiffStage][DefReg];
2337ac9a064cSDimitry Andric else if (!PrevVRMap)
2338ac9a064cSDimitry Andric // Since this is the first iteration, refer the initial register of the
2339ac9a064cSDimitry Andric // loop
2340ac9a064cSDimitry Andric NewReg = InitReg;
2341ac9a064cSDimitry Andric else
2342ac9a064cSDimitry Andric // Cases where DiffStage is larger than PhaseNum.
2343ac9a064cSDimitry Andric // If MI is in the kernel block, the value is defined by the previous
2344ac9a064cSDimitry Andric // iteration and PhiVRMap is referenced. If MI is in the epilog block, the
2345ac9a064cSDimitry Andric // value is defined in the kernel block and KernelVRMap is referenced.
2346ac9a064cSDimitry Andric NewReg = (*PrevVRMap)[PrevVRMap->size() - (DiffStage - PhaseNum)][DefReg];
2347ac9a064cSDimitry Andric
2348ac9a064cSDimitry Andric const TargetRegisterClass *NRC =
2349ac9a064cSDimitry Andric MRI.constrainRegClass(NewReg, MRI.getRegClass(OrigReg));
2350ac9a064cSDimitry Andric if (NRC)
2351ac9a064cSDimitry Andric UseMO.setReg(NewReg);
2352ac9a064cSDimitry Andric else {
2353ac9a064cSDimitry Andric Register SplitReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2354ac9a064cSDimitry Andric BuildMI(*OrigKernel, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY),
2355ac9a064cSDimitry Andric SplitReg)
2356ac9a064cSDimitry Andric .addReg(NewReg);
2357ac9a064cSDimitry Andric UseMO.setReg(SplitReg);
2358ac9a064cSDimitry Andric }
2359ac9a064cSDimitry Andric }
2360ac9a064cSDimitry Andric }
2361ac9a064cSDimitry Andric
2362ac9a064cSDimitry Andric /// Return a phi if Reg is referenced by the phi.
2363ac9a064cSDimitry Andric /// canApply() guarantees that at most only one such phi exists.
getLoopPhiUser(Register Reg,MachineBasicBlock * Loop)2364ac9a064cSDimitry Andric static MachineInstr *getLoopPhiUser(Register Reg, MachineBasicBlock *Loop) {
2365ac9a064cSDimitry Andric for (MachineInstr &Phi : Loop->phis()) {
2366ac9a064cSDimitry Andric unsigned InitVal, LoopVal;
2367ac9a064cSDimitry Andric getPhiRegs(Phi, Loop, InitVal, LoopVal);
2368ac9a064cSDimitry Andric if (LoopVal == Reg)
2369ac9a064cSDimitry Andric return Φ
2370ac9a064cSDimitry Andric }
2371ac9a064cSDimitry Andric return nullptr;
2372ac9a064cSDimitry Andric }
2373ac9a064cSDimitry Andric
2374ac9a064cSDimitry Andric /// Generate phis for registers defined by OrigMI.
generatePhi(MachineInstr * OrigMI,int UnrollNum,SmallVectorImpl<ValueMapTy> & PrologVRMap,SmallVectorImpl<ValueMapTy> & KernelVRMap,SmallVectorImpl<ValueMapTy> & PhiVRMap)2375ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::generatePhi(
2376ac9a064cSDimitry Andric MachineInstr *OrigMI, int UnrollNum,
2377ac9a064cSDimitry Andric SmallVectorImpl<ValueMapTy> &PrologVRMap,
2378ac9a064cSDimitry Andric SmallVectorImpl<ValueMapTy> &KernelVRMap,
2379ac9a064cSDimitry Andric SmallVectorImpl<ValueMapTy> &PhiVRMap) {
2380ac9a064cSDimitry Andric int StageNum = Schedule.getStage(OrigMI);
2381ac9a064cSDimitry Andric bool UsePrologReg;
2382ac9a064cSDimitry Andric if (Schedule.getNumStages() - NumUnroll + UnrollNum - 1 >= StageNum)
2383ac9a064cSDimitry Andric UsePrologReg = true;
2384ac9a064cSDimitry Andric else if (Schedule.getNumStages() - NumUnroll + UnrollNum == StageNum)
2385ac9a064cSDimitry Andric UsePrologReg = false;
2386ac9a064cSDimitry Andric else
2387ac9a064cSDimitry Andric return;
2388ac9a064cSDimitry Andric
2389ac9a064cSDimitry Andric // Examples that show which stages are merged by phi.
2390ac9a064cSDimitry Andric // Meaning of the symbol following the stage number:
2391ac9a064cSDimitry Andric // a/b: Stages with the same letter are merged (UsePrologReg == true)
2392ac9a064cSDimitry Andric // +: Merged with the initial value (UsePrologReg == false)
2393ac9a064cSDimitry Andric // *: No phis required
2394ac9a064cSDimitry Andric //
2395ac9a064cSDimitry Andric // #Stages 3, #MVE 4
2396ac9a064cSDimitry Andric // Iter 0 1 2 3 4 5 6 7 8
2397ac9a064cSDimitry Andric // -----------------------------------------
2398ac9a064cSDimitry Andric // Stage 0a Prolog#0
2399ac9a064cSDimitry Andric // Stage 1a 0b Prolog#1
2400ac9a064cSDimitry Andric // Stage 2* 1* 0* Kernel Unroll#0
2401ac9a064cSDimitry Andric // Stage 2* 1* 0+ Kernel Unroll#1
2402ac9a064cSDimitry Andric // Stage 2* 1+ 0a Kernel Unroll#2
2403ac9a064cSDimitry Andric // Stage 2+ 1a 0b Kernel Unroll#3
2404ac9a064cSDimitry Andric //
2405ac9a064cSDimitry Andric // #Stages 3, #MVE 2
2406ac9a064cSDimitry Andric // Iter 0 1 2 3 4 5 6 7 8
2407ac9a064cSDimitry Andric // -----------------------------------------
2408ac9a064cSDimitry Andric // Stage 0a Prolog#0
2409ac9a064cSDimitry Andric // Stage 1a 0b Prolog#1
2410ac9a064cSDimitry Andric // Stage 2* 1+ 0a Kernel Unroll#0
2411ac9a064cSDimitry Andric // Stage 2+ 1a 0b Kernel Unroll#1
2412ac9a064cSDimitry Andric //
2413ac9a064cSDimitry Andric // #Stages 3, #MVE 1
2414ac9a064cSDimitry Andric // Iter 0 1 2 3 4 5 6 7 8
2415ac9a064cSDimitry Andric // -----------------------------------------
2416ac9a064cSDimitry Andric // Stage 0* Prolog#0
2417ac9a064cSDimitry Andric // Stage 1a 0b Prolog#1
2418ac9a064cSDimitry Andric // Stage 2+ 1a 0b Kernel Unroll#0
2419ac9a064cSDimitry Andric
2420ac9a064cSDimitry Andric for (MachineOperand &DefMO : OrigMI->defs()) {
2421ac9a064cSDimitry Andric if (!DefMO.isReg() || DefMO.isDead())
2422ac9a064cSDimitry Andric continue;
2423ac9a064cSDimitry Andric Register OrigReg = DefMO.getReg();
2424ac9a064cSDimitry Andric auto NewReg = KernelVRMap[UnrollNum].find(OrigReg);
2425ac9a064cSDimitry Andric if (NewReg == KernelVRMap[UnrollNum].end())
2426ac9a064cSDimitry Andric continue;
2427ac9a064cSDimitry Andric Register CorrespondReg;
2428ac9a064cSDimitry Andric if (UsePrologReg) {
2429ac9a064cSDimitry Andric int PrologNum = Schedule.getNumStages() - NumUnroll + UnrollNum - 1;
2430ac9a064cSDimitry Andric CorrespondReg = PrologVRMap[PrologNum][OrigReg];
2431ac9a064cSDimitry Andric } else {
2432ac9a064cSDimitry Andric MachineInstr *Phi = getLoopPhiUser(OrigReg, OrigKernel);
2433ac9a064cSDimitry Andric if (!Phi)
2434ac9a064cSDimitry Andric continue;
2435ac9a064cSDimitry Andric CorrespondReg = getInitPhiReg(*Phi, OrigKernel);
2436ac9a064cSDimitry Andric }
2437ac9a064cSDimitry Andric
2438ac9a064cSDimitry Andric assert(CorrespondReg.isValid());
2439ac9a064cSDimitry Andric Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2440ac9a064cSDimitry Andric BuildMI(*NewKernel, NewKernel->getFirstNonPHI(), DebugLoc(),
2441ac9a064cSDimitry Andric TII->get(TargetOpcode::PHI), PhiReg)
2442ac9a064cSDimitry Andric .addReg(NewReg->second)
2443ac9a064cSDimitry Andric .addMBB(NewKernel)
2444ac9a064cSDimitry Andric .addReg(CorrespondReg)
2445ac9a064cSDimitry Andric .addMBB(Prolog);
2446ac9a064cSDimitry Andric PhiVRMap[UnrollNum][OrigReg] = PhiReg;
2447ac9a064cSDimitry Andric }
2448ac9a064cSDimitry Andric }
2449ac9a064cSDimitry Andric
replacePhiSrc(MachineInstr & Phi,Register OrigReg,Register NewReg,MachineBasicBlock * NewMBB)2450ac9a064cSDimitry Andric static void replacePhiSrc(MachineInstr &Phi, Register OrigReg, Register NewReg,
2451ac9a064cSDimitry Andric MachineBasicBlock *NewMBB) {
2452ac9a064cSDimitry Andric for (unsigned Idx = 1; Idx < Phi.getNumOperands(); Idx += 2) {
2453ac9a064cSDimitry Andric if (Phi.getOperand(Idx).getReg() == OrigReg) {
2454ac9a064cSDimitry Andric Phi.getOperand(Idx).setReg(NewReg);
2455ac9a064cSDimitry Andric Phi.getOperand(Idx + 1).setMBB(NewMBB);
2456ac9a064cSDimitry Andric return;
2457ac9a064cSDimitry Andric }
2458ac9a064cSDimitry Andric }
2459ac9a064cSDimitry Andric }
2460ac9a064cSDimitry Andric
2461ac9a064cSDimitry Andric /// Generate phis that merge values from multiple routes
mergeRegUsesAfterPipeline(Register OrigReg,Register NewReg)2462ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::mergeRegUsesAfterPipeline(Register OrigReg,
2463ac9a064cSDimitry Andric Register NewReg) {
2464ac9a064cSDimitry Andric SmallVector<MachineOperand *> UsesAfterLoop;
2465ac9a064cSDimitry Andric SmallVector<MachineInstr *> LoopPhis;
2466ac9a064cSDimitry Andric for (MachineRegisterInfo::use_iterator I = MRI.use_begin(OrigReg),
2467ac9a064cSDimitry Andric E = MRI.use_end();
2468ac9a064cSDimitry Andric I != E; ++I) {
2469ac9a064cSDimitry Andric MachineOperand &O = *I;
2470ac9a064cSDimitry Andric if (O.getParent()->getParent() != OrigKernel &&
2471ac9a064cSDimitry Andric O.getParent()->getParent() != Prolog &&
2472ac9a064cSDimitry Andric O.getParent()->getParent() != NewKernel &&
2473ac9a064cSDimitry Andric O.getParent()->getParent() != Epilog)
2474ac9a064cSDimitry Andric UsesAfterLoop.push_back(&O);
2475ac9a064cSDimitry Andric if (O.getParent()->getParent() == OrigKernel && O.getParent()->isPHI())
2476ac9a064cSDimitry Andric LoopPhis.push_back(O.getParent());
2477ac9a064cSDimitry Andric }
2478ac9a064cSDimitry Andric
2479ac9a064cSDimitry Andric // Merge the route that only execute the pipelined loop (when there are no
2480ac9a064cSDimitry Andric // remaining iterations) with the route that execute the original loop.
2481ac9a064cSDimitry Andric if (!UsesAfterLoop.empty()) {
2482ac9a064cSDimitry Andric Register PhiReg = MRI.createVirtualRegister(MRI.getRegClass(OrigReg));
2483ac9a064cSDimitry Andric BuildMI(*NewExit, NewExit->getFirstNonPHI(), DebugLoc(),
2484ac9a064cSDimitry Andric TII->get(TargetOpcode::PHI), PhiReg)
2485ac9a064cSDimitry Andric .addReg(OrigReg)
2486ac9a064cSDimitry Andric .addMBB(OrigKernel)
2487ac9a064cSDimitry Andric .addReg(NewReg)
2488ac9a064cSDimitry Andric .addMBB(Epilog);
2489ac9a064cSDimitry Andric
2490ac9a064cSDimitry Andric for (MachineOperand *MO : UsesAfterLoop)
2491ac9a064cSDimitry Andric MO->setReg(PhiReg);
2492ac9a064cSDimitry Andric
2493ac9a064cSDimitry Andric if (!LIS.hasInterval(PhiReg))
2494ac9a064cSDimitry Andric LIS.createEmptyInterval(PhiReg);
2495ac9a064cSDimitry Andric }
2496ac9a064cSDimitry Andric
2497ac9a064cSDimitry Andric // Merge routes from the pipelined loop and the bypassed route before the
2498ac9a064cSDimitry Andric // original loop
2499ac9a064cSDimitry Andric if (!LoopPhis.empty()) {
2500ac9a064cSDimitry Andric for (MachineInstr *Phi : LoopPhis) {
2501ac9a064cSDimitry Andric unsigned InitReg, LoopReg;
2502ac9a064cSDimitry Andric getPhiRegs(*Phi, OrigKernel, InitReg, LoopReg);
2503ac9a064cSDimitry Andric Register NewInit = MRI.createVirtualRegister(MRI.getRegClass(InitReg));
2504ac9a064cSDimitry Andric BuildMI(*NewPreheader, NewPreheader->getFirstNonPHI(), Phi->getDebugLoc(),
2505ac9a064cSDimitry Andric TII->get(TargetOpcode::PHI), NewInit)
2506ac9a064cSDimitry Andric .addReg(InitReg)
2507ac9a064cSDimitry Andric .addMBB(Check)
2508ac9a064cSDimitry Andric .addReg(NewReg)
2509ac9a064cSDimitry Andric .addMBB(Epilog);
2510ac9a064cSDimitry Andric replacePhiSrc(*Phi, InitReg, NewInit, NewPreheader);
2511ac9a064cSDimitry Andric }
2512ac9a064cSDimitry Andric }
2513ac9a064cSDimitry Andric }
2514ac9a064cSDimitry Andric
generateProlog(SmallVectorImpl<ValueMapTy> & PrologVRMap)2515ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::generateProlog(
2516ac9a064cSDimitry Andric SmallVectorImpl<ValueMapTy> &PrologVRMap) {
2517ac9a064cSDimitry Andric PrologVRMap.clear();
2518ac9a064cSDimitry Andric PrologVRMap.resize(Schedule.getNumStages() - 1);
2519ac9a064cSDimitry Andric DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2520ac9a064cSDimitry Andric for (int PrologNum = 0; PrologNum < Schedule.getNumStages() - 1;
2521ac9a064cSDimitry Andric ++PrologNum) {
2522ac9a064cSDimitry Andric for (MachineInstr *MI : Schedule.getInstructions()) {
2523ac9a064cSDimitry Andric if (MI->isPHI())
2524ac9a064cSDimitry Andric continue;
2525ac9a064cSDimitry Andric int StageNum = Schedule.getStage(MI);
2526ac9a064cSDimitry Andric if (StageNum > PrologNum)
2527ac9a064cSDimitry Andric continue;
2528ac9a064cSDimitry Andric MachineInstr *NewMI = cloneInstr(MI);
2529ac9a064cSDimitry Andric updateInstrDef(NewMI, PrologVRMap[PrologNum], false);
2530ac9a064cSDimitry Andric NewMIMap[NewMI] = {PrologNum, StageNum};
2531ac9a064cSDimitry Andric Prolog->push_back(NewMI);
2532ac9a064cSDimitry Andric }
2533ac9a064cSDimitry Andric }
2534ac9a064cSDimitry Andric
2535ac9a064cSDimitry Andric for (auto I : NewMIMap) {
2536ac9a064cSDimitry Andric MachineInstr *MI = I.first;
2537ac9a064cSDimitry Andric int PrologNum = I.second.first;
2538ac9a064cSDimitry Andric int StageNum = I.second.second;
2539ac9a064cSDimitry Andric updateInstrUse(MI, StageNum, PrologNum, PrologVRMap, nullptr);
2540ac9a064cSDimitry Andric }
2541ac9a064cSDimitry Andric
2542ac9a064cSDimitry Andric LLVM_DEBUG({
2543ac9a064cSDimitry Andric dbgs() << "prolog:\n";
2544ac9a064cSDimitry Andric Prolog->dump();
2545ac9a064cSDimitry Andric });
2546ac9a064cSDimitry Andric }
2547ac9a064cSDimitry Andric
generateKernel(SmallVectorImpl<ValueMapTy> & PrologVRMap,SmallVectorImpl<ValueMapTy> & KernelVRMap,InstrMapTy & LastStage0Insts)2548ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::generateKernel(
2549ac9a064cSDimitry Andric SmallVectorImpl<ValueMapTy> &PrologVRMap,
2550ac9a064cSDimitry Andric SmallVectorImpl<ValueMapTy> &KernelVRMap, InstrMapTy &LastStage0Insts) {
2551ac9a064cSDimitry Andric KernelVRMap.clear();
2552ac9a064cSDimitry Andric KernelVRMap.resize(NumUnroll);
2553ac9a064cSDimitry Andric SmallVector<ValueMapTy> PhiVRMap;
2554ac9a064cSDimitry Andric PhiVRMap.resize(NumUnroll);
2555ac9a064cSDimitry Andric DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2556ac9a064cSDimitry Andric DenseMap<MachineInstr *, MachineInstr *> MIMapLastStage0;
2557ac9a064cSDimitry Andric for (int UnrollNum = 0; UnrollNum < NumUnroll; ++UnrollNum) {
2558ac9a064cSDimitry Andric for (MachineInstr *MI : Schedule.getInstructions()) {
2559ac9a064cSDimitry Andric if (MI->isPHI())
2560ac9a064cSDimitry Andric continue;
2561ac9a064cSDimitry Andric int StageNum = Schedule.getStage(MI);
2562ac9a064cSDimitry Andric MachineInstr *NewMI = cloneInstr(MI);
2563ac9a064cSDimitry Andric if (UnrollNum == NumUnroll - 1)
2564ac9a064cSDimitry Andric LastStage0Insts[MI] = NewMI;
2565ac9a064cSDimitry Andric updateInstrDef(NewMI, KernelVRMap[UnrollNum],
2566ac9a064cSDimitry Andric (UnrollNum == NumUnroll - 1 && StageNum == 0));
2567ac9a064cSDimitry Andric generatePhi(MI, UnrollNum, PrologVRMap, KernelVRMap, PhiVRMap);
2568ac9a064cSDimitry Andric NewMIMap[NewMI] = {UnrollNum, StageNum};
2569ac9a064cSDimitry Andric NewKernel->push_back(NewMI);
2570ac9a064cSDimitry Andric }
2571ac9a064cSDimitry Andric }
2572ac9a064cSDimitry Andric
2573ac9a064cSDimitry Andric for (auto I : NewMIMap) {
2574ac9a064cSDimitry Andric MachineInstr *MI = I.first;
2575ac9a064cSDimitry Andric int UnrollNum = I.second.first;
2576ac9a064cSDimitry Andric int StageNum = I.second.second;
2577ac9a064cSDimitry Andric updateInstrUse(MI, StageNum, UnrollNum, KernelVRMap, &PhiVRMap);
2578ac9a064cSDimitry Andric }
2579ac9a064cSDimitry Andric
2580ac9a064cSDimitry Andric // If remaining trip count is greater than NumUnroll-1, loop continues
2581ac9a064cSDimitry Andric insertCondBranch(*NewKernel, NumUnroll - 1, LastStage0Insts, *NewKernel,
2582ac9a064cSDimitry Andric *Epilog);
2583ac9a064cSDimitry Andric
2584ac9a064cSDimitry Andric LLVM_DEBUG({
2585ac9a064cSDimitry Andric dbgs() << "kernel:\n";
2586ac9a064cSDimitry Andric NewKernel->dump();
2587ac9a064cSDimitry Andric });
2588ac9a064cSDimitry Andric }
2589ac9a064cSDimitry Andric
generateEpilog(SmallVectorImpl<ValueMapTy> & KernelVRMap,SmallVectorImpl<ValueMapTy> & EpilogVRMap,InstrMapTy & LastStage0Insts)2590ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::generateEpilog(
2591ac9a064cSDimitry Andric SmallVectorImpl<ValueMapTy> &KernelVRMap,
2592ac9a064cSDimitry Andric SmallVectorImpl<ValueMapTy> &EpilogVRMap, InstrMapTy &LastStage0Insts) {
2593ac9a064cSDimitry Andric EpilogVRMap.clear();
2594ac9a064cSDimitry Andric EpilogVRMap.resize(Schedule.getNumStages() - 1);
2595ac9a064cSDimitry Andric DenseMap<MachineInstr *, std::pair<int, int>> NewMIMap;
2596ac9a064cSDimitry Andric for (int EpilogNum = 0; EpilogNum < Schedule.getNumStages() - 1;
2597ac9a064cSDimitry Andric ++EpilogNum) {
2598ac9a064cSDimitry Andric for (MachineInstr *MI : Schedule.getInstructions()) {
2599ac9a064cSDimitry Andric if (MI->isPHI())
2600ac9a064cSDimitry Andric continue;
2601ac9a064cSDimitry Andric int StageNum = Schedule.getStage(MI);
2602ac9a064cSDimitry Andric if (StageNum <= EpilogNum)
2603ac9a064cSDimitry Andric continue;
2604ac9a064cSDimitry Andric MachineInstr *NewMI = cloneInstr(MI);
2605ac9a064cSDimitry Andric updateInstrDef(NewMI, EpilogVRMap[EpilogNum], StageNum - 1 == EpilogNum);
2606ac9a064cSDimitry Andric NewMIMap[NewMI] = {EpilogNum, StageNum};
2607ac9a064cSDimitry Andric Epilog->push_back(NewMI);
2608ac9a064cSDimitry Andric }
2609ac9a064cSDimitry Andric }
2610ac9a064cSDimitry Andric
2611ac9a064cSDimitry Andric for (auto I : NewMIMap) {
2612ac9a064cSDimitry Andric MachineInstr *MI = I.first;
2613ac9a064cSDimitry Andric int EpilogNum = I.second.first;
2614ac9a064cSDimitry Andric int StageNum = I.second.second;
2615ac9a064cSDimitry Andric updateInstrUse(MI, StageNum, EpilogNum, EpilogVRMap, &KernelVRMap);
2616ac9a064cSDimitry Andric }
2617ac9a064cSDimitry Andric
2618ac9a064cSDimitry Andric // If there are remaining iterations, they are executed in the original loop.
2619ac9a064cSDimitry Andric // Instructions related to loop control, such as loop counter comparison,
2620ac9a064cSDimitry Andric // are indicated by shouldIgnoreForPipelining() and are assumed to be placed
2621ac9a064cSDimitry Andric // in stage 0. Thus, the map is for the last one in the kernel.
2622ac9a064cSDimitry Andric insertCondBranch(*Epilog, 0, LastStage0Insts, *NewPreheader, *NewExit);
2623ac9a064cSDimitry Andric
2624ac9a064cSDimitry Andric LLVM_DEBUG({
2625ac9a064cSDimitry Andric dbgs() << "epilog:\n";
2626ac9a064cSDimitry Andric Epilog->dump();
2627ac9a064cSDimitry Andric });
2628ac9a064cSDimitry Andric }
2629ac9a064cSDimitry Andric
2630ac9a064cSDimitry Andric /// Calculate the number of unroll required and set it to NumUnroll
calcNumUnroll()2631ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::calcNumUnroll() {
2632ac9a064cSDimitry Andric DenseMap<MachineInstr *, unsigned> Inst2Idx;
2633ac9a064cSDimitry Andric NumUnroll = 1;
2634ac9a064cSDimitry Andric for (unsigned I = 0; I < Schedule.getInstructions().size(); ++I)
2635ac9a064cSDimitry Andric Inst2Idx[Schedule.getInstructions()[I]] = I;
2636ac9a064cSDimitry Andric
2637ac9a064cSDimitry Andric for (MachineInstr *MI : Schedule.getInstructions()) {
2638ac9a064cSDimitry Andric if (MI->isPHI())
2639ac9a064cSDimitry Andric continue;
2640ac9a064cSDimitry Andric int StageNum = Schedule.getStage(MI);
2641ac9a064cSDimitry Andric for (const MachineOperand &MO : MI->uses()) {
2642ac9a064cSDimitry Andric if (!MO.isReg() || !MO.getReg().isVirtual())
2643ac9a064cSDimitry Andric continue;
2644ac9a064cSDimitry Andric MachineInstr *DefMI = MRI.getVRegDef(MO.getReg());
2645ac9a064cSDimitry Andric if (DefMI->getParent() != OrigKernel)
2646ac9a064cSDimitry Andric continue;
2647ac9a064cSDimitry Andric
2648ac9a064cSDimitry Andric int NumUnrollLocal = 1;
2649ac9a064cSDimitry Andric if (DefMI->isPHI()) {
2650ac9a064cSDimitry Andric ++NumUnrollLocal;
2651ac9a064cSDimitry Andric // canApply() guarantees that DefMI is not phi and is an instruction in
2652ac9a064cSDimitry Andric // the loop
2653ac9a064cSDimitry Andric DefMI = MRI.getVRegDef(getLoopPhiReg(*DefMI, OrigKernel));
2654ac9a064cSDimitry Andric }
2655ac9a064cSDimitry Andric NumUnrollLocal += StageNum - Schedule.getStage(DefMI);
2656ac9a064cSDimitry Andric if (Inst2Idx[MI] <= Inst2Idx[DefMI])
2657ac9a064cSDimitry Andric --NumUnrollLocal;
2658ac9a064cSDimitry Andric NumUnroll = std::max(NumUnroll, NumUnrollLocal);
2659ac9a064cSDimitry Andric }
2660ac9a064cSDimitry Andric }
2661ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "NumUnroll: " << NumUnroll << "\n");
2662ac9a064cSDimitry Andric }
2663ac9a064cSDimitry Andric
2664ac9a064cSDimitry Andric /// Create new virtual registers for definitions of NewMI and update NewMI.
2665ac9a064cSDimitry Andric /// If the definitions are referenced after the pipelined loop, phis are
2666ac9a064cSDimitry Andric /// created to merge with other routes.
updateInstrDef(MachineInstr * NewMI,ValueMapTy & VRMap,bool LastDef)2667ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::updateInstrDef(MachineInstr *NewMI,
2668ac9a064cSDimitry Andric ValueMapTy &VRMap,
2669ac9a064cSDimitry Andric bool LastDef) {
2670ac9a064cSDimitry Andric for (MachineOperand &MO : NewMI->operands()) {
2671ac9a064cSDimitry Andric if (!MO.isReg() || !MO.getReg().isVirtual() || !MO.isDef())
2672ac9a064cSDimitry Andric continue;
2673ac9a064cSDimitry Andric Register Reg = MO.getReg();
2674ac9a064cSDimitry Andric const TargetRegisterClass *RC = MRI.getRegClass(Reg);
2675ac9a064cSDimitry Andric Register NewReg = MRI.createVirtualRegister(RC);
2676ac9a064cSDimitry Andric MO.setReg(NewReg);
2677ac9a064cSDimitry Andric VRMap[Reg] = NewReg;
2678ac9a064cSDimitry Andric if (LastDef)
2679ac9a064cSDimitry Andric mergeRegUsesAfterPipeline(Reg, NewReg);
2680ac9a064cSDimitry Andric }
2681ac9a064cSDimitry Andric }
2682ac9a064cSDimitry Andric
expand()2683ac9a064cSDimitry Andric void ModuloScheduleExpanderMVE::expand() {
2684ac9a064cSDimitry Andric OrigKernel = Schedule.getLoop()->getTopBlock();
2685ac9a064cSDimitry Andric OrigPreheader = Schedule.getLoop()->getLoopPreheader();
2686ac9a064cSDimitry Andric OrigExit = Schedule.getLoop()->getExitBlock();
2687ac9a064cSDimitry Andric
2688ac9a064cSDimitry Andric LLVM_DEBUG(Schedule.dump());
2689ac9a064cSDimitry Andric
2690ac9a064cSDimitry Andric generatePipelinedLoop();
2691ac9a064cSDimitry Andric }
2692ac9a064cSDimitry Andric
2693ac9a064cSDimitry Andric /// Check if ModuloScheduleExpanderMVE can be applied to L
canApply(MachineLoop & L)2694ac9a064cSDimitry Andric bool ModuloScheduleExpanderMVE::canApply(MachineLoop &L) {
2695ac9a064cSDimitry Andric if (!L.getExitBlock()) {
2696ac9a064cSDimitry Andric LLVM_DEBUG(
2697ac9a064cSDimitry Andric dbgs() << "Can not apply MVE expander: No single exit block.\n";);
2698ac9a064cSDimitry Andric return false;
2699ac9a064cSDimitry Andric }
2700ac9a064cSDimitry Andric
2701ac9a064cSDimitry Andric MachineBasicBlock *BB = L.getTopBlock();
2702ac9a064cSDimitry Andric MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
2703ac9a064cSDimitry Andric
2704ac9a064cSDimitry Andric // Put some constraints on the operands of the phis to simplify the
2705ac9a064cSDimitry Andric // transformation
2706ac9a064cSDimitry Andric DenseSet<unsigned> UsedByPhi;
2707ac9a064cSDimitry Andric for (MachineInstr &MI : BB->phis()) {
2708ac9a064cSDimitry Andric // Registers defined by phis must be used only inside the loop and be never
2709ac9a064cSDimitry Andric // used by phis.
2710ac9a064cSDimitry Andric for (MachineOperand &MO : MI.defs())
2711ac9a064cSDimitry Andric if (MO.isReg())
2712ac9a064cSDimitry Andric for (MachineInstr &Ref : MRI.use_instructions(MO.getReg()))
2713ac9a064cSDimitry Andric if (Ref.getParent() != BB || Ref.isPHI()) {
2714ac9a064cSDimitry Andric LLVM_DEBUG(dbgs()
2715ac9a064cSDimitry Andric << "Can not apply MVE expander: A phi result is "
2716ac9a064cSDimitry Andric "referenced outside of the loop or by phi.\n";);
2717ac9a064cSDimitry Andric return false;
2718ac9a064cSDimitry Andric }
2719ac9a064cSDimitry Andric
2720ac9a064cSDimitry Andric // A source register from the loop block must be defined inside the loop.
2721ac9a064cSDimitry Andric // A register defined inside the loop must be referenced by only one phi at
2722ac9a064cSDimitry Andric // most.
2723ac9a064cSDimitry Andric unsigned InitVal, LoopVal;
2724ac9a064cSDimitry Andric getPhiRegs(MI, MI.getParent(), InitVal, LoopVal);
2725ac9a064cSDimitry Andric if (!Register(LoopVal).isVirtual() ||
2726ac9a064cSDimitry Andric MRI.getVRegDef(LoopVal)->getParent() != BB) {
2727ac9a064cSDimitry Andric LLVM_DEBUG(
2728ac9a064cSDimitry Andric dbgs() << "Can not apply MVE expander: A phi source value coming "
2729ac9a064cSDimitry Andric "from the loop is not defined in the loop.\n";);
2730ac9a064cSDimitry Andric return false;
2731ac9a064cSDimitry Andric }
2732ac9a064cSDimitry Andric if (UsedByPhi.count(LoopVal)) {
2733ac9a064cSDimitry Andric LLVM_DEBUG(dbgs() << "Can not apply MVE expander: A value defined in the "
2734ac9a064cSDimitry Andric "loop is referenced by two or more phis.\n";);
2735ac9a064cSDimitry Andric return false;
2736ac9a064cSDimitry Andric }
2737ac9a064cSDimitry Andric UsedByPhi.insert(LoopVal);
2738ac9a064cSDimitry Andric }
2739ac9a064cSDimitry Andric
2740ac9a064cSDimitry Andric return true;
2741ac9a064cSDimitry Andric }
2742ac9a064cSDimitry Andric
27431d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
27441d5ae102SDimitry Andric // ModuloScheduleTestPass implementation
27451d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
27461d5ae102SDimitry Andric // This pass constructs a ModuloSchedule from its module and runs
27471d5ae102SDimitry Andric // ModuloScheduleExpander.
27481d5ae102SDimitry Andric //
27491d5ae102SDimitry Andric // The module is expected to contain a single-block analyzable loop.
27501d5ae102SDimitry Andric // The total order of instructions is taken from the loop as-is.
27511d5ae102SDimitry Andric // Instructions are expected to be annotated with a PostInstrSymbol.
27521d5ae102SDimitry Andric // This PostInstrSymbol must have the following format:
27531d5ae102SDimitry Andric // "Stage=%d Cycle=%d".
27541d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
27551d5ae102SDimitry Andric
27561d5ae102SDimitry Andric namespace {
27571d5ae102SDimitry Andric class ModuloScheduleTest : public MachineFunctionPass {
27581d5ae102SDimitry Andric public:
27591d5ae102SDimitry Andric static char ID;
27601d5ae102SDimitry Andric
ModuloScheduleTest()27611d5ae102SDimitry Andric ModuloScheduleTest() : MachineFunctionPass(ID) {
27621d5ae102SDimitry Andric initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry());
27631d5ae102SDimitry Andric }
27641d5ae102SDimitry Andric
27651d5ae102SDimitry Andric bool runOnMachineFunction(MachineFunction &MF) override;
27661d5ae102SDimitry Andric void runOnLoop(MachineFunction &MF, MachineLoop &L);
27671d5ae102SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const27681d5ae102SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
2769ac9a064cSDimitry Andric AU.addRequired<MachineLoopInfoWrapperPass>();
2770ac9a064cSDimitry Andric AU.addRequired<LiveIntervalsWrapperPass>();
27711d5ae102SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU);
27721d5ae102SDimitry Andric }
27731d5ae102SDimitry Andric };
27741d5ae102SDimitry Andric } // namespace
27751d5ae102SDimitry Andric
27761d5ae102SDimitry Andric char ModuloScheduleTest::ID = 0;
27771d5ae102SDimitry Andric
27781d5ae102SDimitry Andric INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
27791d5ae102SDimitry Andric "Modulo Schedule test pass", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)2780ac9a064cSDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
2781ac9a064cSDimitry Andric INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
27821d5ae102SDimitry Andric INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
27831d5ae102SDimitry Andric "Modulo Schedule test pass", false, false)
27841d5ae102SDimitry Andric
27851d5ae102SDimitry Andric bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
2786ac9a064cSDimitry Andric MachineLoopInfo &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();
27871d5ae102SDimitry Andric for (auto *L : MLI) {
27881d5ae102SDimitry Andric if (L->getTopBlock() != L->getBottomBlock())
27891d5ae102SDimitry Andric continue;
27901d5ae102SDimitry Andric runOnLoop(MF, *L);
27911d5ae102SDimitry Andric return false;
27921d5ae102SDimitry Andric }
27931d5ae102SDimitry Andric return false;
27941d5ae102SDimitry Andric }
27951d5ae102SDimitry Andric
parseSymbolString(StringRef S,int & Cycle,int & Stage)27961d5ae102SDimitry Andric static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
27971d5ae102SDimitry Andric std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
27981d5ae102SDimitry Andric std::pair<StringRef, StringRef> StageTokenAndValue =
27991d5ae102SDimitry Andric getToken(StageAndCycle.first, "-");
28001d5ae102SDimitry Andric std::pair<StringRef, StringRef> CycleTokenAndValue =
28011d5ae102SDimitry Andric getToken(StageAndCycle.second, "-");
28021d5ae102SDimitry Andric if (StageTokenAndValue.first != "Stage" ||
28031d5ae102SDimitry Andric CycleTokenAndValue.first != "_Cycle") {
28041d5ae102SDimitry Andric llvm_unreachable(
28051d5ae102SDimitry Andric "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
28061d5ae102SDimitry Andric return;
28071d5ae102SDimitry Andric }
28081d5ae102SDimitry Andric
28091d5ae102SDimitry Andric StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
28101d5ae102SDimitry Andric CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
28111d5ae102SDimitry Andric
28121d5ae102SDimitry Andric dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
28131d5ae102SDimitry Andric }
28141d5ae102SDimitry Andric
runOnLoop(MachineFunction & MF,MachineLoop & L)28151d5ae102SDimitry Andric void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
2816ac9a064cSDimitry Andric LiveIntervals &LIS = getAnalysis<LiveIntervalsWrapperPass>().getLIS();
28171d5ae102SDimitry Andric MachineBasicBlock *BB = L.getTopBlock();
28181d5ae102SDimitry Andric dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
28191d5ae102SDimitry Andric
28201d5ae102SDimitry Andric DenseMap<MachineInstr *, int> Cycle, Stage;
28211d5ae102SDimitry Andric std::vector<MachineInstr *> Instrs;
28221d5ae102SDimitry Andric for (MachineInstr &MI : *BB) {
28231d5ae102SDimitry Andric if (MI.isTerminator())
28241d5ae102SDimitry Andric continue;
28251d5ae102SDimitry Andric Instrs.push_back(&MI);
28261d5ae102SDimitry Andric if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
28271d5ae102SDimitry Andric dbgs() << "Parsing post-instr symbol for " << MI;
28281d5ae102SDimitry Andric parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
28291d5ae102SDimitry Andric }
28301d5ae102SDimitry Andric }
28311d5ae102SDimitry Andric
28321d5ae102SDimitry Andric ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
28331d5ae102SDimitry Andric std::move(Stage));
28341d5ae102SDimitry Andric ModuloScheduleExpander MSE(
28351d5ae102SDimitry Andric MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
28361d5ae102SDimitry Andric MSE.expand();
28371d5ae102SDimitry Andric MSE.cleanup();
28381d5ae102SDimitry Andric }
28391d5ae102SDimitry Andric
28401d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
28411d5ae102SDimitry Andric // ModuloScheduleTestAnnotater implementation
28421d5ae102SDimitry Andric //===----------------------------------------------------------------------===//
28431d5ae102SDimitry Andric
annotate()28441d5ae102SDimitry Andric void ModuloScheduleTestAnnotater::annotate() {
28451d5ae102SDimitry Andric for (MachineInstr *MI : S.getInstructions()) {
28461d5ae102SDimitry Andric SmallVector<char, 16> SV;
28471d5ae102SDimitry Andric raw_svector_ostream OS(SV);
28481d5ae102SDimitry Andric OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
28491d5ae102SDimitry Andric MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
28501d5ae102SDimitry Andric MI->setPostInstrSymbol(MF, Sym);
28511d5ae102SDimitry Andric }
28521d5ae102SDimitry Andric }
2853